Blackops

初心易得,始终难守

0%

HDU 4117 GRE Words(AC自动机构造fail树+线段树|后缀数组+DP)

GRE Words

Time Limit: 30000/15000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others)
Total Submission(s): 4539 Accepted Submission(s): 446

Problem Description
Recently George is preparing for the Graduate Record Examinations (GRE for short). Obviously the most important thing is reciting the words.
Now George is working on a word list containing N words.
He has so poor a memory that it is too hard for him to remember all of the words on the list. But he does find a way to help him to remember. He finds that if a sequence of words has a property that for all pairs of neighboring words, the previous one is a substring of the next one, then the sequence of words is easy to remember.
So he decides to eliminate some words from the word list first to make the list easier for him. Meantime, he doesn’t want to miss the important words. He gives each word an importance, which is represented by an integer ranging from -1000 to 1000, then he wants to know which words to eliminate to maximize the sum of the importance of remaining words. Negative importance just means that George thought it useless and is a waste of time to recite the word.
Note that although he can eliminate any number of words from the word list, he can never change the order between words. In another word, the order of words appeared on the word list is consistent with the order in the input. In addition, a word may have different meanings, so it can appear on the list more than once, and it may have different importance in each occurrence.

Input
The first line contains an integer T(1 <= T <= 50), indicating the number of test cases.
Each test case contains several lines.
The first line contains an integer N(1 <= N <= 2 104), indicating the number of words.
Then N lines follows, each contains a string Si and an integer Wi, representing the word and its importance. Si contains only lowercase letters.
You can assume that the total length of all words will not exceeded 3
105.

Output
For each test case in the input, print one line: “Case #X: Y”, where X is the test case number (starting with 1) and Y is the largest importance of the remaining sequence of words.

Sample Input
1
5
a 1
ab 2
abb 3
baba 5
abbab 8

Sample Output
Case #1: 14

题目链接:HDU 4117
后缀数组:据说题目是随机数据,后缀数组是碰巧能过,否则人造数据肯定T;
还是用AC自动机比较的好(这题AC自动机简直是噩梦,debug了一天,最后发现是各种小错误引起的不是ME就是WA,果然还是太太太蒻了)
题意就是类似于带权的最长上升子序列,而这里的最长上升自序列的条件是前者是后者的子串,用$dp[i]$表示以第$i$个字符串结尾的最大权值;
由于可以利用区间最小值与区间长度之间的单调性关系优化一点复杂度,从小到大枚举当前位置的$rank_i$,然后在$1~rank_{i-1}$与$rank_{i+1}~rank_{len}$之间找到顺序在$i$之后的字符串$str_j$,判断$str_i$是否是$str_j$是其子串,当$LCP(str_i,str_j)<Len[i]$时说明当前字符串已经不能作为子串了,直接break,否则就有:

说说AC自动机吧,没做这道题之前还不知道fail指针还有这种用法,先想想fail指针指向的两头有什么关系?显然如果从节点$u$指向$v$,记根节点到某一个点$x$路径所形成的字符串为$str_x$,那么$str_u$的最长公共后缀是$str_v$,那如果把fail指针反一下,可以发现$str_v$是所有指向的$str_u$的后缀,即$v$的子树都是它可以成为子串的目标,那如果当$v$是结束点时,它就是所指向的$str_u$的子串了,那么状态可以这样转移:从成为结尾节点的$str_v$转移到$str_u$,但是这样会漏掉重复字符串的情况,即路径上可能有多个结束节点,那么就要把整个路径遍历一遍,然后把它的价值转移下去,遍历某一个字符串的时候维护$L \rightarrow u$的最大价值,最后结尾时更新到结尾节点的子树里
后缀数组代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#include <stdio.h>
#include <iostream>
#include <algorithm>
#include <cstdlib>
#include <cstring>
#include <bitset>
#include <string>
#include <stack>
#include <cmath>
#include <queue>
#include <set>
#include <map>
using namespace std;
#define INF 0x3f3f3f3f
#define LC(x) (x<<1)
#define RC(x) ((x<<1)+1)
#define MID(x,y) ((x+y)>>1)
#define fin(name) freopen(name,"r",stdin)
#define fout(name) freopen(name,"w",stdout)
#define CLR(arr,val) memset(arr,val,sizeof(arr))
#define FAST_IO ios::sync_with_stdio(false);cin.tie(0);
typedef pair<int, int> pii;
typedef long long LL;
const double PI = acos(-1.0);
const int N = 3e5 + 7 + 2e4;
const int M = 2e4 + 7;
char s[N];
int wa[N], wb[N], sa[N], cnt[N];
int ran[N], height[N], ID[N];
int st[M], val[M], Len[M];
int dp[M];

inline bool cmp(int r[], int a, int b, int d)
{
return r[a] == r[b] && r[a + d] == r[b + d];
}
void DA(int n, int m)
{
int i, *x = wa, *y = wb;
for (i = 0; i < m; ++i)
cnt[i] = 0;
for (i = 0; i < n; ++i)
++cnt[x[i] = s[i]];
for (i = 1; i < m; ++i)
cnt[i] += cnt[i - 1];
for (i = n - 1; i >= 0; --i)
sa[--cnt[x[i]]] = i;
for (int k = 1; k <= n; k <<= 1)
{
int p = 0;
for (i = n - k; i < n; ++i)
y[p++] = i;
for (i = 0; i < n; ++i)
if (sa[i] >= k)
y[p++] = sa[i] - k;
for (i = 0; i < m; ++i)
cnt[i] = 0;
for (i = 0; i < n; ++i)
++cnt[x[y[i]]];
for (i = 1; i < m; ++i)
cnt[i] += cnt[i - 1];
for (i = n - 1; i >= 0; --i)
sa[--cnt[x[y[i]]]] = y[i];
swap(x, y);
x[sa[0]] = 0;
p = 1;
for (i = 1; i < n; ++i)
x[sa[i]] = cmp(y, sa[i - 1], sa[i], k) ? p - 1 : p++;
m = p;
if (m >= n)
break;
}
}
void gethgt(int n)
{
int i, k = 0;
for (i = 1; i <= n; ++i)
ran[sa[i]] = i;
for (i = 0; i < n; ++i)
{
if (k)
--k;
int j = sa[ran[i] - 1];
while (s[j + k] == s[i + k])
++k;
height[ran[i]] = k;
}
}
int main(void)
{
int T, n, i, j;
scanf("%d", &T);
for (int q = 1; q <= T; ++q)
{
scanf("%d", &n);
int sum = 0;
for (i = 0; i < n; ++i)
{
scanf("%s%d", s + sum, &val[i]);
dp[i] = val[i];
Len[i] = strlen(s + sum);
st[i] = sum;
s[sum + Len[i]] = '$';
int ed = sum + Len[i];
for (j = sum; j <= ed; ++j)
ID[j] = i;
sum += Len[i] + 1;
}
s[sum] = '\0';
DA(sum + 1, 'z' + 1);
gethgt(sum);
for (i = 0; i < n; ++i)
{
int mh = INF;
for (j = ran[st[i]] - 1; j >= 1; --j)
{
mh = min(mh, height[j + 1]);
if (mh < Len[i])
break;
int v = ID[sa[j]];
if (v > i)
dp[v] = max(dp[v], dp[i] + val[v]);
}
mh = INF;
for (j = ran[st[i]] + 1; j <= sum; ++j)
{
mh = min(mh, height[j]);
if (mh < Len[i])
break;
int v = ID[sa[j]];
if (v > i)
dp[v] = max(dp[v], dp[i] + val[v]);
}
}
int ans = 0;
for (i = 0; i < n; ++i)
ans = max(ans, dp[i]);
printf("Case #%d: %d\n", q, ans);
}
return 0;
}

AC自动机代码(注意pushdown不要写错):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
#include <stdio.h>
#include <iostream>
#include <algorithm>
#include <cstdlib>
#include <cstring>
#include <bitset>
#include <string>
#include <stack>
#include <cmath>
#include <queue>
#include <set>
#include <map>
using namespace std;
#define INF 0x3f3f3f3f
#define LC(x) (x<<1)
#define RC(x) ((x<<1)+1)
#define MID(x,y) ((x+y)>>1)
#define fin(name) freopen(name,"r",stdin)
#define fout(name) freopen(name,"w",stdout)
#define CLR(arr,val) memset(arr,val,sizeof(arr))
#define FAST_IO ios::sync_with_stdio(false);cin.tie(0);
typedef pair<int, int> pii;
typedef long long LL;
const double PI = acos(-1.0);
const int N = 2e4 + 7;
const int M = 3e5 + 7;
struct Trie
{
int nxt[26];
int fail;
void init()
{
for (int i = 0; i < 26; ++i)
nxt[i] = -1;
fail = 0;
}
} L[M];
struct seg
{
int l, mid, r;
int v, flag;
} T[M * 4];
struct edge
{
int to, nxt;
edge() {}
edge(int _to, int _nxt): to(_to), nxt(_nxt) {};
} E[M];
int sz;
int head[M], tot;
int in[M], out[M], idx, w[N];
vector<string>str;

void init()
{
sz = 0;
for (int i = 0; i < M; ++i)
head[i] = -1;
tot = 0;
idx = 0;
L[sz++].init();
str.clear();
}
namespace ac
{
void ins(const string &s, int len)
{
int u = 0;
for (int i = 0; i < len; ++i)
{
int v = s[i] - 'a';
if (L[u].nxt[v] == -1)
{
L[sz].init();
L[u].nxt[v] = sz++;
}
u = L[u].nxt[v];
}
}
void build()
{
queue<int>Q;
for (int i = 0; i < 26; ++i)
{
int v = L[0].nxt[i];
if (~v)
{
L[v].fail = 0;
Q.push(v);
}
else
L[0].nxt[i] = 0;
}
while (!Q.empty())
{
int u = Q.front();
Q.pop();
int uf = L[u].fail;
for (int i = 0; i < 26; ++i)
{
int v = L[u].nxt[i];
if (~v)
{
L[v].fail = L[uf].nxt[i];
Q.push(v);
}
else
L[u].nxt[i] = L[uf].nxt[i];
}
}
}
}
void build(int k, int l, int r)
{
T[k].l = l;
T[k].r = r;
T[k].mid = MID(l, r);
T[k].v = 0;
T[k].flag = 0;
if (l == r)
return ;
build(LC(k), l, T[k].mid);
build(RC(k), T[k].mid + 1, r);
}
void pushup(int k)
{
T[k].v = max(T[LC(k)].v, T[RC(k)].v);
}
void pushdown(int k)
{
if (T[k].flag)
{
T[LC(k)].flag = max(T[LC(k)].flag, T[k].flag);
T[RC(k)].flag = max(T[RC(k)].flag, T[k].flag);
T[LC(k)].v = max(T[LC(k)].v, T[k].flag);
T[RC(k)].v = max(T[RC(k)].v, T[k].flag);
T[k].flag = 0;
}
}
void update(int k, int l, int r, int v)
{
if (l <= T[k].l && T[k].r <= r)
{
T[k].v = max(T[k].v, v);
T[k].flag = max(T[k].flag, v);
}
else
{
pushdown(k);
if (r <= T[k].mid)
update(LC(k), l, r, v);
else if (l > T[k].mid)
update(RC(k), l, r, v);
else
update(LC(k), l, T[k].mid, v), update(RC(k), T[k].mid + 1, r, v);
pushup(k);
}
}
int query(int k, int x)
{
if (T[k].l == T[k].r)
return T[k].v;
else
{
pushdown(k);
if (x <= T[k].mid)
return query(LC(k), x);
else
return query(RC(k), x);
}
}
inline void add(int s, int t)
{
E[tot] = edge(t, head[s]);
head[s] = tot++;
}
void dfs(int u, int f)
{
in[u] = ++idx;
for (int i = head[u]; ~i; i = E[i].nxt)
{
int v = E[i].to;
if (v != f)
dfs(v, u);
}
out[u] = idx;
}
int main(void)
{
int tcase, n, i, j, u, maxv;
scanf("%d", &tcase);
for (int q = 1; q <= tcase; ++q)
{
init();
scanf("%d", &n);
string Str;
for (i = 0; i < n; ++i)
{
cin >> Str >> w[i];
str.emplace_back(Str);
ac::ins(Str, Str.length());
}
ac::build();
for (i = 1; i < sz; ++i)
add(L[i].fail, i);
dfs(0, -1);
build(1, 1, idx);
int ans = 0;
for (i = 0; i < n; ++i)
{
if (w[i] <= 0)
continue;
maxv = 0;
u = 0;
int len = str[i].length();
for (j = 0; j < len; ++j)
{
u = L[u].nxt[str[i][j] - 'a'];
maxv = max(maxv, query(1, in[u]));
}
update(1, in[u], out[u], maxv + w[i]);
ans = max(ans, maxv + w[i]);
}
printf("Case #%d: %d\n", q, ans);
}
return 0;
}