test/convolution_1e9+7.test.cpp

View this file on GitHub
Last update: 2025-01-16 19:25:04+08:00
Problem: https://judge.yosupo.jp/problem/convolution_mod_1000000007

Depends on

Code

#define PROBLEM "https://judge.yosupo.jp/problem/convolution_mod_1000000007"

#include "../default/t.cpp"
#include "../modint/MontgomeryModInt.cpp"
#include "../poly/NTTmint.cpp"
#include "../poly/NTTanymod.cpp"

using Mint = MontgomeryModInt<1000000007>;

signed main() {
  ios::sync_with_stdio(false), cin.tie(NULL);

  int n, m; cin >> n >> m;
  vector<Mint> a(n), b(m);
  for(Mint &x : a)
    cin >> x;
  for(Mint &x : b)
    cin >> x;

  cout << convAnyMod(a, b) << '\n';

  return 0;
}

#line 1 "test/convolution_1e9+7.test.cpp"
#define PROBLEM "https://judge.yosupo.jp/problem/convolution_mod_1000000007"

#line 1 "default/t.cpp"
#include <algorithm>
#include <array>
#include <bitset>
#include <cassert>
#include <cctype>
#include <cfenv>
#include <cfloat>
#include <chrono>
#include <cinttypes>
#include <climits>
#include <cmath>
#include <complex>
#include <cstdarg>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <deque>
#include <fstream>
#include <functional>
#include <initializer_list>
#include <iomanip>
#include <ios>
#include <iostream>
#include <istream>
#include <iterator>
#include <limits>
#include <list>
#include <map>
#include <memory>
#include <new>
#include <numeric>
#include <ostream>
#include <queue>
#include <random>
#include <set>
#include <sstream>
#include <stack>
#include <streambuf>
#include <string>
#include <tuple>
#include <type_traits>
#include <variant>
#include <bit>
#include <compare>
#include <concepts>
#include <numbers>
#include <ranges>
#include <span>

#define INT128_MAX (__int128)(((unsigned __int128) 1 << ((sizeof(__int128) * __CHAR_BIT__) - 1)) - 1)
#define INT128_MIN (-INT128_MAX - 1)

#define clock chrono::steady_clock::now().time_since_epoch().count()

using namespace std;

template<class T1, class T2>
ostream& operator<<(ostream& os, const pair<T1, T2> pr) {
  return os << pr.first << ' ' << pr.second;
}
template<class T, size_t N>
ostream& operator<<(ostream& os, const array<T, N> &arr) {
  for(size_t i = 0; T x : arr) {
    os << x;
    if (++i != N) os << ' ';
  }
  return os;
}
template<class T>
ostream& operator<<(ostream& os, const vector<T> &vec) {
  for(size_t i = 0; T x : vec) {
    os << x;
    if (++i != size(vec)) os << ' ';
  }
  return os;
}
template<class T>
ostream& operator<<(ostream& os, const set<T> &s) {
  for(size_t i = 0; T x : s) {
    os << x;
    if (++i != size(s)) os << ' ';
  }
  return os;
}
template<class T1, class T2>
ostream& operator<<(ostream& os, const map<T1, T2> &m) {
  for(size_t i = 0; pair<T1, T2> x : m) {
    os << x;
    if (++i != size(m)) os << ' ';
  }
  return os;
}

#ifdef DEBUG
#define dbg(...) cerr << '(', _do(#__VA_ARGS__), cerr << ") = ", _do2(__VA_ARGS__)
template<typename T> void _do(T &&x) { cerr << x; }
template<typename T, typename ...S> void _do(T &&x, S&&...y) { cerr << x << ", "; _do(y...); }
template<typename T> void _do2(T &&x) { cerr << x << endl; }
template<typename T, typename ...S> void _do2(T &&x, S&&...y) { cerr << x << ", "; _do2(y...); }
#else
#define dbg(...)
#endif

using ll = long long;
using ull = unsigned long long;
using ldb = long double;
using pii = pair<int, int>;
using pll = pair<ll, ll>;

template<typename T> using min_heap = priority_queue<T, vector<T>, greater<T>>;
template<typename T> using max_heap = priority_queue<T>;

template<ranges::forward_range rng, class T = ranges::range_value_t<rng>, class OP = plus<T>>
void pSum(rng &v) {
  if (!v.empty())
    for(T p = v[0]; T &x : v | views::drop(1))
      x = p = OP()(p, x);
}
template<ranges::forward_range rng, class T = ranges::range_value_t<rng>, class OP>
void pSum(rng &v, OP op) {
  if (!v.empty())
    for(T p = v[0]; T &x : v | views::drop(1))
      x = p = op(p, x);
}

template<ranges::forward_range rng>
void Unique(rng &v) {
  ranges::sort(v);
  v.resize(unique(v.begin(), v.end()) - v.begin());
}

template<ranges::random_access_range rng>
rng invPerm(rng p) {
  rng ret = p;
  for(int i = 0; i < ssize(p); i++)
    ret[p[i]] = i;
  return ret;
}

template<ranges::random_access_range rng, ranges::random_access_range rng2>
rng Permute(rng v, rng2 p) {
  rng ret = v;
  for(int i = 0; i < ssize(p); i++)
    ret[p[i]] = v[i];
  return ret;
}

template<bool directed>
vector<vector<int>> readGraph(int n, int m, int base) {
  vector<vector<int>> g(n);
  for(int i = 0; i < m; i++) {
    int u, v; cin >> u >> v;
    u -= base, v -= base;
    g[u].emplace_back(v);
    if constexpr (!directed)
      g[v].emplace_back(u);
  }
  return g;
}

template<class T>
void setBit(T &msk, int bit, bool x) {
  msk = (msk & ~(T(1) << bit)) | (T(x) << bit);
}
template<class T> void flipBit(T &msk, int bit) { msk ^= T(1) << bit; }
template<class T> bool getBit(T msk, int bit) { return msk >> bit & T(1); }

template<class T>
T floorDiv(T a, T b) {
  if (b < 0) a *= -1, b *= -1;
  return a >= 0 ? a / b : (a - b + 1) / b;
}
template<class T>
T ceilDiv(T a, T b) {
  if (b < 0) a *= -1, b *= -1;
  return a >= 0 ? (a + b - 1) / b : a / b;
}

template<class T> bool chmin(T &a, T b) { return a > b ? a = b, 1 : 0; }
template<class T> bool chmax(T &a, T b) { return a < b ? a = b, 1 : 0; }
#line 1 "modint/MontgomeryModInt.cpp"
//reference: https://github.com/NyaanNyaan/library/blob/master/modint/montgomery-modint.hpp#L10
//note: mod should be an odd prime less than 2^30.

template<uint32_t mod>
struct MontgomeryModInt {
  using mint = MontgomeryModInt;
  using i32 = int32_t;
  using u32 = uint32_t;
  using u64 = uint64_t;

  static constexpr u32 get_r() {
    u32 res = 1, base = mod;
    for(i32 i = 0; i < 31; i++)
      res *= base, base *= base;
    return -res;
  }

  static constexpr u32 get_mod() {
    return mod;
  }

  static constexpr u32 n2 = -u64(mod) % mod; //2^64 % mod
  static constexpr u32 r = get_r(); //-P^{-1} % 2^32

  u32 a;

  static u32 reduce(const u64 &b) {
    return (b + u64(u32(b) * r) * mod) >> 32;
  }

  static u32 transform(const u64 &b) {
    return reduce(u64(b) * n2);
  }

  MontgomeryModInt() : a(0) {}
  MontgomeryModInt(const int64_t &b) 
    : a(transform(b % mod + mod)) {}

  mint pow(u64 k) const {
    mint res(1), base(*this);
    while(k) {
      if (k & 1) 
        res *= base;
      base *= base, k >>= 1;
    }
    return res;
  }

  mint inverse() const { return (*this).pow(mod - 2); }

  u32 get() const {
    u32 res = reduce(a);
    return res >= mod ? res - mod : res;
  }

  mint& operator+=(const mint &b) {
    if (i32(a += b.a - 2 * mod) < 0) a += 2 * mod;
    return *this;
  }

  mint& operator-=(const mint &b) {
    if (i32(a -= b.a) < 0) a += 2 * mod;
    return *this;
  }

  mint& operator*=(const mint &b) {
    a = reduce(u64(a) * b.a);
    return *this;
  }

  mint& operator/=(const mint &b) {
    a = reduce(u64(a) * b.inverse().a);
    return *this;
  }

  mint operator-() { return mint() - mint(*this); }
  bool operator==(mint b) const {
    return (a >= mod ? a - mod : a) == (b.a >= mod ? b.a - mod : b.a);
  }
  bool operator!=(mint b) const {
    return (a >= mod ? a - mod : a) != (b.a >= mod ? b.a - mod : b.a);
  }

  friend mint operator+(mint c, mint d) { return c += d; }
  friend mint operator-(mint c, mint d) { return c -= d; }
  friend mint operator*(mint c, mint d) { return c *= d; }
  friend mint operator/(mint c, mint d) { return c /= d; }

  friend ostream& operator<<(ostream& os, const mint& b) {
    return os << b.get();
  }
  friend istream& operator>>(istream& is, mint& b) {
    int64_t val;
    is >> val;
    b = mint(val);
    return is;
  }
};

using mint = MontgomeryModInt<998244353>;
#line 1 "poly/NTTmint.cpp"
//reference: https://judge.yosupo.jp/submission/69896
//remark: MOD = 2^K * C + 1, R is a primitive root modulo MOD
//remark: a.size() <= 2^K must be satisfied
//some common modulo: 998244353  = 2^23 * 119 + 1, R = 3
//                    469762049  = 2^26 * 7   + 1, R = 3
//                    1224736769 = 2^24 * 73  + 1, R = 3

template<int32_t k = 23, int32_t c = 119, int32_t r = 3, class Mint = MontgomeryModInt<998244353>>
struct NTT {

  using u32 = uint32_t;
  static constexpr u32 mod = (1 << k) * c + 1;
  static constexpr u32 get_mod() { return mod; }

  static void ntt(vector<Mint> &a, bool inverse) {
    static array<Mint, 30> w, w_inv;
    if (w[0] == 0) {
      Mint root = 2;
      while(root.pow((mod - 1) / 2) == 1) root += 1;
      for(int i = 0; i < 30; i++)
        w[i] = -(root.pow((mod - 1) >> (i + 2))), w_inv[i] = 1 / w[i];
    }
    int n = ssize(a);
    if (not inverse) {
      for(int m = n; m >>= 1; ) {
        Mint ww = 1;
        for(int s = 0, l = 0; s < n; s += 2 * m) {
          for(int i = s, j = s + m; i < s + m; i++, j++) {
            Mint x = a[i], y = a[j] * ww;
            a[i] = x + y, a[j] = x - y;
          }
          ww *= w[__builtin_ctz(++l)];
        }
      }
    } else {
      for(int m = 1; m < n; m *= 2) {
        Mint ww = 1;
        for(int s = 0, l = 0; s < n; s += 2 * m) {
          for(int i = s, j = s + m; i < s + m; i++, j++) {
            Mint x = a[i], y = a[j];
            a[i] = x + y, a[j] = (x - y) * ww;
          }
          ww *= w_inv[__builtin_ctz(++l)];
        }
      }
      Mint inv = 1 / Mint(n);
      for(Mint &x : a) x *= inv;
    }
  }

  static vector<Mint> conv(vector<Mint> a, vector<Mint> b) {
    int sz = ssize(a) + ssize(b) - 1;
    int n = bit_ceil((u32)sz);

    a.resize(n, 0);
    ntt(a, false);
    b.resize(n, 0);
    ntt(b, false);

    for(int i = 0; i < n; i++)
      a[i] *= b[i];

    ntt(a, true);

    a.resize(sz);

    return a;
  }
};
#line 1 "poly/NTTanymod.cpp"
//reference: https://math314.hateblo.jp/entry/2015/05/07/014908
//reference: https://judge.yosupo.jp/submission/15581
//remark: n * mod^2 < prod of mods(~= 5e26) should be satisfied

template<class Mint>
vector<Mint> convAnyMod(vector<Mint> a, vector<Mint> b) {
  using Mint0 = MontgomeryModInt<998244353>;
  using Mint1 = MontgomeryModInt<469762049>;
  using Mint2 = MontgomeryModInt<167772161>;
  NTT<23, 119, 3, Mint0> ntt0;
  NTT<26, 7, 3, Mint1> ntt1;
  NTT<25, 5, 3, Mint2> ntt2;
  vector<Mint0> a0(ssize(a)), b0(ssize(b));
  vector<Mint1> a1(ssize(a)), b1(ssize(b));
  vector<Mint2> a2(ssize(a)), b2(ssize(b));
  for(int i = 0; i < ssize(a); i++)
    a0[i] = a[i].get(), a1[i] = a[i].get(), a2[i] = a[i].get();
  for(int i = 0; i < ssize(b); i++)
    b0[i] = b[i].get(), b1[i] = b[i].get(), b2[i] = b[i].get();
  vector<Mint0> x = ntt0.conv(a0, b0);
  vector<Mint1> y = ntt1.conv(a1, b1);
  vector<Mint2> z = ntt2.conv(a2, b2);
  vector<Mint> res(ssize(x));
  constexpr uint32_t mod0 = ntt0.get_mod(), mod1 = ntt1.get_mod();
  static const Mint1 im0 = 1 / Mint1(mod0);
  static const Mint2 im1 = 1 / Mint2(mod1), im0m1 = im1 / mod0;
  static const Mint m0 = mod0, m0m1 = m0 * mod1;
  for(int i = 0; i < ssize(x); i++) {
    int y0 = x[i].get();
    int y1 = (im0 * (y[i] - y0)).get();
    int y2 = (im0m1 * (z[i] - y0) - im1 * y1).get();
    res[i] = y0 + m0 * y1 + m0m1 * y2;
  }

  return res;
}
#line 7 "test/convolution_1e9+7.test.cpp"

using Mint = MontgomeryModInt<1000000007>;

signed main() {
  ios::sync_with_stdio(false), cin.tie(NULL);

  int n, m; cin >> n >> m;
  vector<Mint> a(n), b(m);
  for(Mint &x : a)
    cin >> x;
  for(Mint &x : b)
    cin >> x;

  cout << convAnyMod(a, b) << '\n';

  return 0;
}