HDOJ 4080

题目

问:长度为N的字符串中至少出现M次的最长的字串的长度和最右出现位置。

数据范围

$1\leq M \leq N \leq 40000$

做法

二分长度,Hash判断

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef pair<ll, ll> pll;
const int MAX_N = 4e4 + 5;
const ll P = 1e7 + 9;
const ll MOD = 1e9 + 7;
// const ll MOD2 = 1e9 + 9;
int M, N;
char s[MAX_N];
ll hsh[MAX_N];
// ll hsh2[MAX_N];
ll pow_p[MAX_N];
// ll pow_p2[MAX_N];
map<ll, int> cnt;
// map<pll, int> cnt;
int pos[MAX_N];
inline ll GetHash(int l, int r)
{
ll tmp = (hsh[r] - hsh[l] * pow_p[r - l] % MOD) % MOD;
if (tmp < 0) tmp += MOD;
return tmp;
}
// inline ll GetHash2(int l, int r)
// {
// ll tmp = (hsh2[r] - hsh2[l] * pow_p2[r - l] % MOD2) % MOD2;
// if (tmp < 0) tmp += MOD2;
// return tmp;
// }
bool Check(int len)
{
cnt.clear();
int ma = -1;
for (int i = 0; i + len <= N; ++i) {
// int tmp = ++cnt[make_pair(GetHash(i, i + len), GetHash2(i, i + len))];
int tmp = ++cnt[GetHash(i, i + len)];
if (tmp >= ma) {
ma = tmp;
}
if (tmp >= M) pos[len] = i;
}
return ma >= M;
}
int Bs(int lb, int ub)
{
while (ub - lb > 1) {
int mid = (lb + ub) / 2;
if (Check(mid)) lb = mid;
else ub = mid;
}
return lb;
}
int main()
{
pow_p[0] = 1;
for (int i = 1; i < MAX_N; ++i) pow_p[i] = pow_p[i - 1] * P % MOD;
// pow_p2[0] = 1;
// for (int i = 1; i < MAX_N; ++i) pow_p2[i] = pow_p2[i - 1] * P % MOD2;
while (scanf("%d", &M), M) {
scanf("%s", s);
N = strlen(s);
hsh[0] = 0;
// hsh2[0] = 0;
for (int i = 1; i <= N; ++i) {
hsh[i] = (hsh[i - 1] * P + s[i - 1]) % MOD;
// hsh2[i] = (hsh2[i - 1] * P + s[i - 1]) % MOD2;
}
int max_len = Bs(0, N + 1); // [l, r)
if (max_len != 0) {
printf("%d %d\n", max_len, pos[max_len]);
} else {
puts("none");
}
}
return 0;
}

总结

  1. 看清题意再做 这不是废话吗
  2. Hash的基数取$10^6$ 到$10^8$间的质数,比如23456789或者19961993,模数取大质数,比如$10^9+9$ 。小质数($10^5$级别的)冲突的概率很大。
  3. Double Hash更加保险,模数可以取$10^9+7$和$10^9+9$这两个孪生素数,冲突的概率极低,很稳。
  4. 追求更稳的话还可以Triple Hash, Ultra Hash, Rampage Hash