Outline

Convex hull trick (CHT)

Introduction

This post on Codeforces explained how CHT works thorough. I’ll focus on when to use CHT here.

To solve problems using CHT, you need to transform the original problem to forms like $\max_{k} \left\{ a_k x + b_k \right\}$ ( or $\min_{k} \left\{ a_k x + b_k \right\}$, of course). Sometimes, the problem will give you the “lines” explicity. However, sometimes the “lines” might be complicated and needs some observations. Usually this kind of problems are wrapped into a DP problem (that’s why the title mentioned DP optimization). Let’s go to the examples to see how it works.

Note that usually CHT can be replaced with a special kind of segment tree called Li-Chao segmemt tree. I’ve written a post about it (link). Futhermore, if the problem doesn’t require us to solve it online, we usually can use a technique called CDQ divide and conquer to solve it. I also have written a post about it (link).

Implementation

Two kinds of implementation are given here.

The first one is found in the KTH notebook, called “LineContainer” (ref).

code
struct Line {
  mutable ll m, b, p;
  bool operator<(const Line& o) const { return m < o.m; }
  bool operator<(ll x) const { return p < x; }
};

struct LineContainer : multiset<Line, less<>> {
  // (for doubles, use inf = 1/.0, div(a,b) = a/b)
  const ll inf = LLONG_MAX;
  ll div(ll a, ll b) { // floored division
    return a / b - ((a ^ b) < 0 && a % b); 
  }
  bool isect(iterator x, iterator y) {
    if (y == end()) { x->p = inf; return false; }
    if (x->m == y->m) x->p = x->b > y->b ? inf : -inf;
    else x->p = div(y->b - x->b, x->m - y->m);
    return x->p >= y->p;
  }
  void add(ll m, ll b) {
    auto z = insert({m, b, 0}), y = z++, x = y;
    while (isect(y, z)) z = erase(z);
    if (x != begin() && isect(--x, y)) isect(x, y = erase(y));
    while ((y = x) != begin() && (--x)->p >= y->p) isect(x, erase(y));
  }
  ll query(ll x) {
    assert(!empty());
    auto l = *lower_bound(x);
    return l.m * x + l.b;
  }
};

The second one is called “HullDynamic” (ref).

code
const ll is_query = -(1LL<<62);
struct Line {
  ll m, b;
  mutable function<const Line*()> succ;
  bool operator<(const Line& rhs) const {
    if (rhs.b != is_query) return m < rhs.m;
    const Line* s = succ();
    return s ? b - s->b < (s->m - m) * rhs.m : 0;
  }
}; // maintain upper hull for maximum
struct HullDynamic : public multiset<Line> {
  bool bad(iterator y) {
    auto z = next(y);
    if (y == begin()) {
      if (z == end()) return 0;
      return y->m == z->m && y->b <= z->b;
    }
    auto x = prev(y);
    if(z==end())return y->m==x->m&&y->b<=x->b;
    return (x->b-y->b)*(z->m-y->m)>=
            (y->b-z->b)*(y->m-x->m);
  }
  void insert_line(ll m, ll b) {
    auto y = insert({m, b});
    y->succ = [=]{return next(y)==end()?0:&*next(y);};
    if(bad(y)) {erase(y); return; }
    while(next(y)!=end()&&bad(next(y)))erase(next(y));
    while(y!=begin()&&bad(prev(y)))erase(prev(y));
  }
  ll eval(ll x) {
    auto l = *lower_bound((Line) {x, is_query});
    return l.m * x + l.b;
  }
};

According to this comment on Codeforces, the first implementation is faster and simpler than other implementations. However, it needs std=c++14 too work properly, while the second implementation only needs std=c++11.

Example problems

CF631E - Product Sum

Problem description

You’re given an array $a$ of length $N$. The characteristic value of the array is $C=\sum_{i=1}^{N}a_i\cdot i$. Now, you can perform the following operation at most one: pick some element in the array, remove it, and insert it to another (or same) position. Please calculate the maximum $C$ you can achieve after performing at most one operations.

$2\le N\le 2\times 10^5, |a_i| \le 10^6$

Problem analysis

Suppose we move the $i^{th}$ element to the $j^{th}$ position $(j \neq i)$. How much does the characteristic value changes? Elements before $i$ and elements after $j$ won’t change, only elements $a_{i+1}, a_{i+2}, \dots, a_j$ and $a_i$ will change the characteristic value. How much will it change? $$ a_i\cdot j - a_i\cdot i + \left(\sum_{k=i+1}^{j}a_k\cdot (k \pm 1)\right) - \left(\sum_{k=i+1}^{j}a_k \cdot k \right)= \pm\left(\sum_{k=i+1}^{j}a_k\right) + a_i\cdot j - a_i\cdot i $$ , where the $\pm$ sign is decided by relation bewteen $i$ and $j$. Observe that the summation term in the last equation can be handled with prefix sums ($sum_j-sum_i$), so we can calculate the cost in $O(1)$.

Let $C_0$ be the characteristic value of $a$ before any operations. Now, let’s try to solve it with DP:

  • DP state : $dp_i$ represents the maximum $C$ you can achieve by moving $a_i$ to some position $1\le j\lt i$
  • DP transition : $dp_i = \max_j \left\{ C_0 + change(i, j) \right\}$, where $change(i, j)$ is mentioned above.
  • Final answer : $\max_i \left\{ dp_i \right\}$

Now, if we calculate it naively, the complexity is $O(N^2)$. However, observe that $$ \begin{align*} C_0 + change(i, j) &= C_0 \pm(sum_j - sum_i) + a_i \cdot j - a_i \cdot i \\ &= C_0 \pm sum_j \mp sum_i + a_i \cdot j - a_i \cdot i \\ &= C_0 \mp sum_i - a_i\cdot i + j \cdot a_i \pm sum_j \\ \end{align*} $$ If we let $m_j=j$, $x=a_i$, and $b_j=sum_j$, then we have a line $mx\pm b$! Therefore, it can be solved with CHT and improve the time complexity to $O(N\log N)$.

Problem solution

Be careful that $i<j$ and $i>j$ need to be calculate seperately (at least in my implementation).

code
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;

const int N = (int)2e5 + 5;

struct Line {
  mutable ll m, b, p;
  bool operator<(const Line& o) const { return m < o.m; }
  bool operator<(ll x) const { return p < x; }
};

struct LineContainer : multiset<Line, less<>> {
  // (for doubles, use inf = 1/.0, div(a,b) = a/b)
  const ll inf = LLONG_MAX;
  ll div(ll a, ll b) { // floored division
    return a / b - ((a ^ b) < 0 && a % b);
  }
  bool isect(iterator x, iterator y) {
    if (y == end()) { x->p = inf; return false; }
    if (x->m == y->m) x->p = x->b > y->b ? inf : -inf;
    else x->p = div(y->b - x->b, x->m - y->m);
    return x->p >= y->p;
  }
  void add(ll m, ll b) {
    auto z = insert({m, b, 0}), y = z++, x = y;
    while (isect(y, z)) z = erase(z);
    if (x != begin() && isect(--x, y)) isect(x, y = erase(y));
    while ((y = x) != begin() && (--x)->p >= y->p) isect(x, erase(y));
  }
  ll query(ll x) {
    assert(!empty());
    auto l = *lower_bound(x);
    return l.m * x + l.b;
  }
} cht;

ll n, a[N], sum[N], pre[N], dp[N];

void init() {
  cin >> n;
  for(int i = 1 ; i <= n ; ++i) {
    cin >> a[i];
    sum[i] = sum[i - 1] + a[i];
    pre[i] = pre[i - 1] + a[i] * i;
  }
}
void solve() {
  ll ans = pre[n];
  for(int i = 2 ; i <= n ; ++i) {
    // dp[i] = max(dp[i], pre[n] + sum[i - 1] - sum[j - 1] - a[i] * i + a[i] * j);
    cht.add(i - 1, -sum[i - 2]);
    dp[i] = pre[n] + sum[i - 1] - a[i] * i + cht.query(a[i]);
    ans = max(ans, dp[i]);
  }
  cht.clear();
  for(int i = n - 1 ; i >= 1 ; --i) {
    // dp[i] = max(dp[i], pre[n] - sum[j] + sum[i] - a[i] * i + a[i] * j);
    cht.add(i + 1, -sum[i + 1]);
    dp[i] = pre[n] + sum[i] - a[i] * i + cht.query(a[i]);
    ans = max(ans, dp[i]);
  }
  cout << ans << '\n';
}

int main() {
  ios_base::sync_with_stdio(0), cin.tie(0);
  init();
  solve();
}

CF660F - Bear and Bowling 4

Problem description

You are given an array $a$ with length $N$. For a subarray $a[i..j]$, we define its score as $\sum_{k=i}^{j} (k - i + 1) \cdot a_k$. Please find the subarray with the largest score and ouput the score.

$1\le N\le 2\times 10^5, |a_i| \le 10^7$

Problem analysis

Similar to the previous problem, we can get a formula to calculate the score for subarray $a[i..j]$ in $O(1)$: $$ score(i, j) = \sum_{k=i}^{j} (k - i + 1) \cdot a_k = \left( \sum_{k=i}^{j} k\cdot a_k \right) - (i - 1)\left( \sum_{k=i}^{j} a_k \right) $$ The two summation can also be calculated with prefix sums, so we can get the value in $O(1)$.

After we have the formula above, this problem is just like the previous problem:

  • DP state : $dp_i$ represents the maximum subarray ending at $i$
  • DP transition : $dp_i=\max_{1\le j\le i} \{ score(j, i) \}$
  • Final answer : $\max_{1\le i\le N} dp_i$

The $score(j, i)$ term can be rewritten to $$ \begin{align*} score(j, i) &= pre_i - pre_{j-1} - (j-1)sum_{i} + (j - 1)sum_{j-1} \\ &= pre_i - pre_{j-1} - j\cdot sum_{i} + (j - 1)sum_{j-1} \\ &= pre_i + (-j\cdot sum_{i} + (j-1)sum_{j-1}-pre_{j-1}) \\ \end{align*} $$ Let $m_j=-j, x=sum_i, b_j=(j-1)sum_{j-1}-pre_{j-1}$, then we have a line $m_jx+b_j$! So CHT can be used here.

Problem solution

Note that I didn’t explicitly declare the dp arrays (it may get MLE result). See code for more details.

code
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;

const ll inf = (ll)1e18;

struct Line {
  mutable ll m, b, p;
  bool operator<(const Line& o) const { return m < o.m; }
  bool operator<(ll x) const { return p < x; }
};

struct LineContainer : multiset<Line, less<>> {
  // (for doubles, use inf = 1/.0, div(a,b) = a/b)
  const ll inf = LLONG_MAX;
  ll div(ll a, ll b) { // floored division
    return a / b - ((a ^ b) < 0 && a % b); }
  bool isect(iterator x, iterator y) {
    if (y == end()) { x->p = inf; return false; }
    if (x->m == y->m) x->p = x->b > y->b ? inf : -inf;
    else x->p = div(y->b - x->b, x->m - y->m);
    return x->p >= y->p;
  }
  void add(ll m, ll b) {
    auto z = insert({m, b, 0}), y = z++, x = y;
    while (isect(y, z)) z = erase(z);
    if (x != begin() && isect(--x, y)) isect(x, y = erase(y));
    while ((y = x) != begin() && (--x)->p >= y->p)
      isect(x, erase(y));
  }
  ll query(ll x) {
    assert(!empty());
    auto l = *lower_bound(x);
    return l.m * x + l.b;
  }
} cht;

ll n, a, pre, sum;

void solve() {
  ll ans = 0;
  cin >> n;
  cht.add(0, 0);
  for(int i = 1 ; i <= n ; ++i) {
    cin >> a; sum += a, pre += a * i;
    ans = max(ans, pre + cht.query(sum));
    cht.add(-i, sum * i - pre);
  }
  cout << ans << '\n';
}

int main() {
  ios_base::sync_with_stdio(0), cin.tie(0);
  solve();
}

CF311B - Cats Transport

Problem description

Zxr960115 is owner of a large farm. He feeds $M$ cute cats and employs $P$ feeders. There’s a straight road across the farm and $N$ hills along the road, numbered from $1$ to $N$ from left to right. The distance between hill $i$ and $(i - 1)$ is $d_i$ meters. The feeders live in hill $1$.

One day, the cats went out to play. Cat $i$ went on a trip to hill $h_i$, finished its trip at time $t_i$, and then waited at hill $h_i$ for a feeder. The feeders must take all the cats. Each feeder goes straightly from hill $1$ to $N$ without waiting at a hill and takes all the waiting cats at each hill away. Feeders walk at a speed of $1$ meter per unit time and are strong enough to take as many cats as they want.

Your task is to schedule the time leaving from hill $1$ for each feeder so that the sum of the waiting time of all cats is minimized. Note that the time leaving from the hill can be negetive (time travelling???).

$2\le N\le 10^5, 1\le M\le 10^5, 1\le P\le 100, 1\le d_i\lt 10^4, 1\le h_i\le N, 0\le t_i\le 10^9$

Problem analysis

For each cat, we can first calculate the earliest time that the feeder can leave hill $1$. Let it be $et_i$. Then $et_i=t_i+\sum_{j=2}^{h_i} d_j$, as the feeder must walk pass hill $2, 3, \dots, h_i$ before he collect it. Then, we sort the cats according to $et_i$ from small to large. Observe that the feeder will always collect a continus segment (in the sorted array) of cats.

Now, suppose the feeder wants to collect cats from $[l, r]$ and leave hill $1$ at time $x$. Observe that $x\ge et_r$ in order to collect all cats from $[l, r]$. How long will the $i^{th}$ cat wait? It will wait exactly $x - et_i$! Therefore we can calculate the time in $O(1)$ by prefix sums.

Finally, we can write our DP equations:

  • DP state : $dp_{i, j}$ : smallest waiting time for $j$ feeders to collect cats in $[i, l]$
  • DP transition : $dp_{i, j} = \min_{0\le k\lt i} \left\{ dp_{k, j} + cost(k + 1, i) \right\}$
  • Final answer : $dp_{N, M}$

Calculating it directly take $O(N^2K)$. However, we can transform the DP transition into lines! $$ \begin{align*} dp_{k, j} + cost(k+1,i) &= dp_{k, j} + \left( \sum_{l=k+1}^{i} et_i - et_l \right) \\ &= dp_{k, j} + et_i(i-k) - \left( \sum_{l=k+1}^{i} et_l \right) \\ &= dp_{k, j} + i\cdot et_i - k\cdot et_i - sum_i + sum_k \\ &= i\cdot et_i -sum_i + (-k\cdot et_i + sum_k + dp_{k, j}) \\ \end{align*} $$ Let $m_k=-k, x=et_i, b_k=sum_k + dp_{k, j}$, then we get a line $m_ix+b_i$. Thus the time complexity is improved to $O(NK\log N)$.

Problem solution

I use rolling array techniques to optimize space complexity. Also, as the slope of the lines we insert are increasing and our queries are increasing too, we don’t need to implement the complete version of CHT. Only a deque is needed here. More details in code.

code
#include <bits/stdc++.h>
#define F first
#define S second
using namespace std;
typedef long long ll;

const int N = (int)1e5 + 5;
const ll inf = (ll)1e18;

int n, m, p, d[N];
ll pre[N], preD[N], dp[2][N];
struct Cat { int h, t; ll et; } cats[N];

ll value(pair<ll, ll> line, ll x) {
  return line.F * x + line.S;
}
bool better(pair<ll, ll> L, pair<ll, ll> L1, pair<ll, ll> L2) {
  return (L1.S - L.S) * (L1.F - L2.F) <= (L2.S - L1.S) * (L.F - L1.F);
}

void init() {
  cin >> n >> m >> p;
  for(int i = 2 ; i <= n ; ++i) cin >> d[i], preD[i] = preD[i - 1] + d[i];
  for(int i = 1 ; i <= m ; ++i) {
    cin >> cats[i].h >> cats[i].t;
    cats[i].et = cats[i].t - preD[cats[i].h]; // assume >= 0
  }
  sort(cats + 1, cats + 1 + m, [&](auto a, auto b) { return a.et < b.et; });
  for(int i = 1 ; i <= m ; ++i) pre[i] = pre[i - 1] + cats[i].et;
}
void solve() {
  fill(dp[0], dp[0] + N, inf);
  fill(dp[1], dp[1] + N, inf);
  int cur = 1;
  ll ans = inf;
  for(int j = 1 ; j <= p ; ++j) {
    deque<pair<ll, ll> > dq; // <a, b>: ax + b
    dq.push_back({0, 0});
    for(int i = 1 ; i <= m ; ++i) {
      while(dq.size() >= 2 && value(dq[0], cats[i].et) > value(dq[1], cats[i].et)) dq.pop_front();
      dp[cur][i] = i * cats[i].et - pre[i] + value(dq.front(), cats[i].et);
      pair<ll, ll> line = {-i, dp[cur ^ 1][i] + pre[i]};
      while(dq.size() >= 2 && better(line, dq[dq.size() - 1], dq[dq.size() - 2])) dq.pop_back();
      dq.push_back(line);
    }
    ans = min(ans, dp[cur][m]);
    cur ^= 1;
  }
  cout << ans << '\n';
}

int main() {
  ios_base::sync_with_stdio(0), cin.tie(0);
  init();
  solve();
}

CF673E - Levels and Regions

Problem description

You are given a game with $N$ levels, the hardness of the $i^{th}$ level is $t_i$. Now you need to parition the $N$ levels into $K$ groups. Each groups contains some positive number of consecutive levels.

The game repeats the the following process:

  1. Choose the first group with at least one non-beaten level. Denote it as $X$. (If all is beaten, you’re done)
  2. Suppose that $X=\left\{x_1, x_1 + 1, \dots, x_i,\dots\right\}$ and you’ve beat level $x_1, x_2 + 1, \dots x_i - 1$. You now have only $\frac{t_{x_i}}{t_{x_1} + t_{x_1+1} + \dots + t_{x_i}}$ chance to choose the level $x_i$. If you failed to choose $x_i$, you’ll waste one hour playing some level you’ve beaten already. Otherwise, you’ll spent one hour to beat level $x_i$.

Now, please output the minimum possible expected number of hours required to finish the game.

Problem analysis

First, observe that we must play the game from level $1$, $2$ until $N$. That is, we must play it in order.

Then, we need to calculate the expected time to finish (choose) some level $i$. Suppose we have $p$ probability to choose level $i$. If $x$ is the expected time to finish it, then $$ x=p\text{ (choose level i) }+(1-p)(x+1) \text{ (fail to choose level i) }\implies x=\frac{1}{p} $$ Or you can calculate directly ($x=1\cdot p + 2\cdot p(p-1) + 3\cdot p(p-1)^2\dots$).

Now, for a group $X=\left\{x_1, x_1 + 1, \dots, x_k\right\}$, the expected time to pass all levels in this group is $$ \begin{align*} \sum_{i=x_1}^{x_k} \frac{\sum_{j=x_1}^{i}t_j}{t_i} &= \sum_{i=x_1}^{x_k} \left( \frac{\sum_{j=1}^{i}t_j}{t_i} - \frac{\sum_{j=1}^{x_1-1}t_j}{t_i}\right) \\ &= \sum_{i=x_1}^{x_k} \left(\frac{\sum_{j=1}^{i}t_j}{t_i}\right) - \sum_{j=1}^{x_1-1}t_j\left(\frac{1}{t_{x_1}} + \frac{1}{t_{x_1+1}} + \dots + \frac{1}{t_{x_k}} \right) \\ \end{align*} $$ , which can be calculate in $O(1)$ with prefix sum (this one is tricky, in my opinion).

Finally, we can write DP equations and solve it with CHT:

  • DP state : $dp_{i, j}$ represents the minimum expected value if we partition the first $i$ levels into $j$ groups
  • DP transition : $dp_{i, j}=\max_{0\le k\lt i} \left\{ dp_{k, j - 1} + cost(k + 1, i) \right\}$
  • Final answer : $dp_{N, K}$

We can use CHT to optimize it from $O(N^2K)$ to $O(NK\log N)$.

Problem solution

Observe that both the slope and queries are increasing.

code
#include <bits/stdc++.h>
#define F first
#define S second
using namespace std;
typedef long long ll;
typedef long double ld;

const int N = (int)2e5 + 5;
const int K = 50 + 5;
const ld inf = (ld)1e18;

struct CHT {
  deque<pair<ld, ld> > dq;

  ld value(pair<ld, ld> l, ld x) {
    return l.F * x + l.S;
  }
  bool better(pair<ld, ld> L, pair<ld, ld> L1, pair<ld, ld> L2) {
    return (L2.S - L.S) * (L1.F - L2.F) <= (L.F - L2.F) * (L2.S - L1.S);
  }
  void init() {
    dq.clear();
    dq.push_back({0, 0});
  }
  void add(ld m, ld b) {
    while((int)dq.size() >= 2 && better({m, b}, dq[dq.size() - 1], dq[dq.size() - 2])) dq.pop_back();
    dq.push_back({m, b});
  }
  ld query(ld x) {
    while((int)dq.size() >= 2 && value(dq[0], x) < value(dq[1], x)) dq.pop_front();
    return value(dq.front(), x);
  }
} cht;

int n, k, t[N];
ll sum[N];
ld dp[2][N], fsum[N], psum[N];

ld calc(int i, int j) {
  return psum[j] - psum[i - 1] - (ld)sum[i - 1] * (fsum[j] - fsum[i - 1]);
}

void init() {
  cin >> n >> k;
  for(int i = 1 ; i <= n ; ++i) {
    cin >> t[i];
    sum[i] = sum[i - 1] + t[i];
    fsum[i]= fsum[i - 1] + 1.0 / t[i];
    psum[i] = psum[i - 1] + (ld)sum[i] / t[i];
  }
}
void solve() {
  for(int i = 0 ; i < 2 ; ++i) fill(dp[i], dp[i] + n + 1, inf);
  dp[0][0] = 0;
  int cur = 1;
  for(int j = 1 ; j <= k ; ++j) {
    cht.init();
    for(int i = 1 ; i <= n ; ++i) {
      dp[cur][i] = psum[i] - cht.query(fsum[i]);
      cht.add(sum[i], -fsum[i] * sum[i] + psum[i] - dp[cur ^ 1][i]);
    }
    cur ^= 1;
  }
  cout << fixed << setprecision(10) << dp[cur ^ 1][n] << '\n';
}

int main() {
  ios_base::sync_with_stdio(0), cin.tie(0);
  init();
  solve();
}

CF1179D - Fedor Runs for President

Problem description

You’re given a tree of size $N$, and you can add an extra edge to that tree. Please maximize the number of different simple paths. A simple path is a path which goes through every vertex no more than once. Two simple paths are named distinct if sets of their edges are distinct.

$2\le N\le 5\times 10^5$

Problem description

Problem solution

code
#include <bits/stdc++.h>
#define F first
#define S second
using namespace std;
typedef long long ll;
typedef long double ld;

const int N = (int)5e5 + 5;
const ll inf = (ll)1e18;

vector<int> G[N];
ll dp[N], sz[N], n;

struct CHT {
  deque<pair<ll, ll> > dq;

  void init() {
    dq.clear();
    dq.push_back({0, inf});
  }
  inline ll value(pair<ll, ll> l, ll x) { return l.F * x + l.S; }
  inline bool better(pair<ll, ll> L, pair<ll, ll> L1, pair<ll, ll> L2) {
    return (L2.S - L.S) * (L1.F - L2.F) >= (L.F - L2.F) * (L2.S - L1.S);
  }
  ll query(ll x) {
    while(dq.size() >= 2 && value(dq[dq.size() - 1], x) > value(dq[dq.size() - 2], x)) dq.pop_back();
    return value(dq.back(), x);
  }
  void insert(pair<ll, ll> line) {
    while(dq.size() >= 2 && better(line, dq[dq.size() - 1], dq[dq.size() - 2])) dq.pop_back();
    dq.push_back(line);
  }
} cht;

inline ll square(ll x) { return x * (x - 1); }
int dfs(int u, int p) {
  sz[u] = 1;
  for(auto v : G[u]) if(v != p) sz[u] += dfs(v, u);
  return sz[u];
}
ll DP(int u, int p) {
  dp[u] = inf;
  for(auto v : G[u]) if(v != p) dp[u] = min(dp[u], DP(v, u) + square(sz[u] - sz[v]));
  if(p != 0 && (int)G[u].size() == 1) dp[u] = 0;
  return dp[u];
}
ll dfs2(int u, int p) {
  ll ans = inf;
  sort(G[u].begin(), G[u].end(), [&](auto a, auto b) { return sz[a] < sz[b]; });
  for(auto v : G[u]) if(v != p) {
    ans = min(ans, dfs2(v, u));
  }
  cht.init();
  for(auto v : G[u]) if(v != p) {
    ans = min(ans, n * n - 2LL * sz[v] * n + dp[v] + sz[v] * sz[v] + sz[v] - n + cht.query(sz[v]));
    cht.insert({2LL * sz[v], sz[v] * sz[v] + sz[v] + dp[v] - 2LL * sz[v] * n});
  }
  return ans;
}

void init() {
  cin >> n;
  for(int i = 1 ; i < n ; ++i) {
    int a, b; cin >> a >> b;
    G[a].push_back(b);
    G[b].push_back(a);
  }
}
void solve() {
  if(n == 2) {
    cout << 2 << '\n';
    return;
  }
  int root = 1;
  for(int i = 1 ; i <= n ; ++i) if((int)G[i].size() > 1) { root = i; break; }
  dfs(root, 0);
  DP(root, 0);
  cout << (ll)n * (n - 1) - dfs2(root, 0) / 2 << '\n';
}

int main() {
  ios_base::sync_with_stdio(0), cin.tie(0);
  init();
  solve();
}

CF455E - Function

Problem description

You’re given a function an array $a$ with length $N$ and a function $f$ which is defined recursively:

$$ f(i, j)= a[j] + \begin{cases} 0 &, i=1 \\ \min \left\{ f(i-1, j), f(i-1, j-1) \right\} &, 2\le i\le N \end{cases} $$

Now, you’re given $Q$ queries $(x_i, y_i), x_i \le y_i$. Please calculate $f(x_i, y_i)$.

$1\le N\le 10^5, 0\le a_i\le 10^4, 1\le Q\le 10^5, 1\le x_i\le y_i \le N$

Problem analysis

First, the function can be transform into another problem: A query $(i, j)$ means that you are standing at $j$ initially and need to make $i$ moves. In every move, you can choose to stay at the same place or move one step left, and pay cost corresponding to the position you stand. You need to minimize your cost.

After this transformation, the function can be calculate by $f(i, j)=\min_{j-i+1 \le k \le j} \left(\sum_{l=k}^{j} a_l\cdot cnt_l\right)$, where $cnt_l$ represents how many times we visit $a_l$. If you look carefully at this function, then you might observe that if we’re ending at some $k$, then $a_k$ must equals to $\min_{k\le l\le j} a_l$ otherwise we can stop eariler to obtain a better cost. This means that we can get the best cost the we always walk to the end directly and stay there till the end. The corresponding cost will be $sum_j - sum_k + a_k\cdot(i-j+k)$, where $sum_i$ is the prefix sum of $a$.

Now, let’s try to rewrite the cost: $$ \begin{align*} sum_j-sum_k+a_k\cdot(i-j+k) &= sum_j + (a_k\cdot (i - j) + a_k\cdot k - sum_k) \end{align*} $$ If we let $m_k=a_k, x=(i-j), b_k=a_k\cdot k - sum_k$, then we got a line $m_kx+b_k$ (again!). Note that $f(i, j)$ can be written as: $$ f(i, j)=\min_{j-i+1 \le k \le j} m_k(i-j)+b_k $$ Therefore, we can solve it by CHT.

Problem solution

You might wonder: “The length of query depends on $i$ and $j$, how do I handle this?”. This can be solved with segment trees. Specifically, we open a CHT on every node of the segment tree, which contains lines that belongs to that segment ($l\le k\lt r$). See code for more details. The time complexity is $O(N\log^2 N)$.

code
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;

const int N = (int)1e5 + 5;

namespace segtree {
  struct Line {
    mutable ll k, m, p;
    bool operator<(const Line& o) const { return k < o.k; }
    bool operator<(ll x) const { return p < x; }
  };
  struct LineContainer : multiset<Line, less<>> {
    // (for doubles, use inf = 1/.0, div(a,b) = a/b)
    const ll inf = LLONG_MAX;
    ll div(ll a, ll b) { // floored division
      return a / b - ((a ^ b) < 0 && a % b); }
    bool isect(iterator x, iterator y) {
      if (y == end()) { x->p = inf; return false; }
      if (x->k == y->k) x->p = x->m > y->m ? inf : -inf;
      else x->p = div(y->m - x->m, x->k - y->k);
      return x->p >= y->p;
    }
    void add(ll k, ll m) {
      auto z = insert({k, m, 0}), y = z++, x = y;
      while (isect(y, z)) z = erase(z);
      if (x != begin() && isect(--x, y)) isect(x, y = erase(y));
      while ((y = x) != begin() && (--x)->p >= y->p)
        isect(x, erase(y));
    }
    ll query(ll x) {
      assert(!empty());
      auto l = *lower_bound(x);
      return l.k * x + l.m;
    }
  } cht[N * 4];

  void insert(int l, int r, int q, ll m, ll b, int o=0) {
    cht[o].add(-m, -b);
    if(l + 1 == r) return;
    int mid = (l + r) >> 1, lson = o * 2 + 1, rson = o * 2 + 2;
    if(q < mid) insert(l, mid, q, m, b, lson);
    else insert(mid, r, q, m, b, rson);
  }
  ll query(int l, int r, int ql, int qr, ll x, int o=0) {
    if(ql <= l && r <= qr) {
      return cht[o].query(x);
    }
    int mid = (l + r) >> 1, lson = o * 2 + 1, rson = o * 2 + 2;
    if(qr <= mid) return query(l, mid, ql, qr, x, lson);
    else if(mid <= ql) return query(mid, r, ql, qr, x, rson);
    else return max(query(l, mid, ql, mid, x, lson), query(mid, r, mid, qr, x, rson));
  }
}

ll n, a[N], sum[N];

void init() {
  cin >> n;
  for(int i = 1 ; i <= n ; ++i) {
    cin >> a[i], sum[i] = sum[i - 1] + a[i];
    segtree::insert(1, n + 1, i, a[i], a[i] * i - sum[i]);
  }
}
void solve() {
  int q; cin >> q;
  while(q--) {
    ll x, y; cin >> x >> y;
    cout << sum[y] - segtree::query(1, n + 1, y - x + 1, y + 1, x - y) << '\n';
  }
}

int main() {
  ios_base::sync_with_stdio(0), cin.tie(0);
  init();
  solve();
}

More problems

https://codeforces.com/blog/entry/63823