Yao Lirong's Blog

Tsinghua DSA 作业总结 (1)

2021/02/09

CST数据结构(2020秋)PA1a

1-1 A*B Problem

心得

  1. 每个数组存一位的话速度太慢过不了,必须压位
  2. 10e5 是 10 * 10^5 所以是 10^6 …
  3. 我们把一个大整数分成几块存在数组里的时候,如果这个数头上有0的话,0就会被忽略了(不压位的话没有这个问题,因为0也就是1位)比如4位4位存,100046000303025会变成[3025, 30, 460, 100],直接输出会变成100460303025,明显不对,所以我们输出的时候要记得补全0

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#include<iostream>
#include<cstdio>
#include<cstring>
using namespace std;
// N is the maximum number of digits of the input, M is the maximum number of digits of the product
const int N = 5007, M = 10023;
// we multiply each 10000 together and store them in a single entry, 10e3 has 4 0s .
const int ten = 10e3, d_ten = 4;
// a is input A in e4 base, b is input B in e4 base, c is their product in e4 base.
int a[N], b[N], c[M];
// a_len is the number of entires in a needed to store input A, so it's the digit needed to store A in e4 base; b_len is that for B
int a_len, b_len;
// digit is a helper array we will need during multiplication
int digit[] = {1,10,100,1000,10000};
// inputA and inputB is A and B read in from stream
char inputA[N], inputB[N];
// multiplies e4 base A and B together and store the result in M
void multiply();
// returns the length of product in e4 base
int get_clen();
// print padding zero in each entry of array C when outputting the result
void print_padding_zero(int,int);
int main()
{
int n = 0;
scanf("%d",&n);
for (int i=0; i<n; i++){
memset(a,0,sizeof(a));
memset(b,0,sizeof(b));
memset(c,0,sizeof(c));
memset(inputA,0,sizeof(inputA));
memset(inputB,0,sizeof(inputB));

scanf("%s",inputA); int ina_len = strlen(inputA); a_len = (ina_len-1)/d_ten + 1;
scanf("%s",inputB); int inb_len = strlen(inputB); b_len = (inb_len-1)/d_ten + 1;


// store the number in reverse order and e4 baes in array a,b
for (int i=ina_len-1; i>=0;){
int tostore = 0;
for (int j=0; j<d_ten && i-j>=0 ; j++){
int ASCII = inputA[i-j] - '0';
tostore += ASCII*digit[j];
}
a[a_len-1 - i/d_ten] = tostore;
i = i-d_ten;
}

for (int i=inb_len-1; i>=0;){
int tostore = 0;
for (int j=0; j<d_ten && i-j>=0 ; j++){
int ASCII = inputB[i-j] - '0';
tostore += ASCII*digit[j];
}
b[b_len-1 - i/d_ten] = tostore;
i = i-d_ten;
}

multiply();

int c_index = get_clen();
printf("%d", c[c_index]);
for (int i=c_index-1; i>=0; i--){
print_padding_zero(c[i], ten/10);
printf("%d", c[i]);
}
printf("\n");

}



return 0;
}

void multiply()
{
for(int i=0; i<a_len; i++){
for(int j=0; j<b_len; j++){
int product = a[i] * b[j];
c[i+j] += product;
c[i+j+1] += c[i+j] / ten; // carry over digit
c[i+j] %= ten; // only stores e4 base number
}
}
}

inline int get_clen()
{
int n = a_len + b_len + 3;
while (c[n]==0 && n>0) n--;
return n;
}

/**
* Each entry in array C should store an e4 base number, but sometimes it stores a number smaller than that.
* That's because it ignores the leading 0s (leading 0s in reversely stored C) when in this case 00XX.
* Example: 1004 0030 57 stored in C has form [57, 30, 1004], this function helps print out the first 00 in 0030
**/
inline void print_padding_zero(int n, int digit){
if (n<digit) printf("0");
if (digit == 10) return;
print_padding_zero(n,digit/10);
}

Reference

  1. 高精度乘法
  2. 高精度乘法的压位

1-3 Filename

心得

编辑距离,移动窗口节省空间

  1. 字符串的读入:一开始以为是 getline 的问题,读不进来字符串,实际上是因为仅用 scanf 读入3个整数后会留下一个换行符在 buffer 中。使用 cin.ignore(100,'\n') 删除换行符(忽略 100 个字符,或者忽略1个 '\n';忽略所有读入,直到总共忽略了 100 个字符,或者忽略了 1 个换行符)

  2. dp数组开到 $1001^2$ 会爆炸,改用滚动窗口,dp[0][j] 表示 $ x_1x_2…x_{i-1}$ 到 $y_1 y_2 … y_j$ (前一步)所需要的操作,dp[1][j] 表示 $ x_1x_2…x_{i-1}$ 到 $y_1 y_2 … y_j$ (这一步)所需要的操作。几个实现细节在注释中已标出

  3. TLE: 第一感觉想的是在每次计算 $i, s.t. x_1x_2…x_i$ 变成 $y$ 所需要的操作后(即计算完成 dp[i][0...m]后),扫一遍数组,如果所有值都大于 k 的话,就停止查找。但是这样不会对数据规模有任何可观的缩减,因为如果说我们有长为 $10^5$ 的两个字符串,并且他们可以在 k 操作内互相转换(极端情况两个相同的字符串),我们仍然需要进行 $O(mn) = 10^5 \times 10^5$ 次操作。

    根据习题课的解决方法,其实当两个数组间的长度差超过 k 时就绝对不可能从一个转换成另一个了。所以,对于每个 $x$ 的子串 $x_1x_2…x_i$,我们只需要看 $y_{i-k} y_{i-k+1} … y_{i+k}$ 就可以了。当然还要注意 $i-k, i+k$别越界,所以实际上是看 $y_{min(1,i-k)} … y_{max(m,i+k)}$ 这个子序列

  4. 这个改动会造成一些 WA,直觉一下子想到是有可能在最后退出循环时,我们因为 k 的限制压根就没扫到 dp[n][m](= dp[1][m])。实际上问题差不多,是因为 dp[i][j] = min(dp[i-1][j], dp[i][(j-1)]) + 1; 这句话中 dp[0][j] 我们一开始全初始化为0,对于 dp[i][i+k] 这个位置,它的一种方案 dp[i-1][i+k] 永远不会被上一步更新到,因为上一步只更新 dp[i-1][(i-1)-k] ~ dp[i-1][(i-1)+k]dp[i-1][i+k] 恒等于0,即 dp[i][i+k] 永远会采取 dp[i-1][i+k] 这一方案。将 dp[0][j] 初始化为 infinity 解决

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#include<iostream>
#include<cstdio>
using namespace std;

const int N = 501007, Inf = 501; // Inf is "effective infinite" : k<=500
char a[N], b[N]; // a, b store the string x, y
int dp[2][N]; // For each iteration i, dp[0] is equivalent to dp[i-1], dp[1] is equivalent to dp[i]

int main()
{
int n,m,k; //length of x, length of y, max number of operations
scanf("%d%d%d", &n, &m, &k);
cin.ignore(100,'\n');
cin.getline(a+1, n+1);
cin.getline(b+1, m+1);

// initialize dp[0][j] to be the operations needed to edit an empty string to y1y2...yj
// initialize the rest of the array to be infinite
for(int j=0;j<=m;j++) dp[0][j] = j, dp[1][j] = Inf;


for(int i=1; i<=n; i++) { // start from the 1st character

// dp[0][0] represents dp[i-1][0] in an ordinary dp array
// dp[i-1][0] represents the distance between x1x2...x_{i-1} to the empty string
dp[0][0] = i-1;


for(int j=max(1, i-k);j<=min(m,i+k);j++){ // only looks at y[i-k] to y[i+k]
if(a[i]==b[j])
dp[1][j] = dp[0][(j-1)];
else
dp[1][j] = min(dp[0][j], dp[1][(j-1)]) + 1;
}

// "previous" of next iteration i+1 is current value from this iteration i
for(int j=max(1, i-k);j<=min(m,i+k);j++)
dp[0][j] = dp[1][j];

}

printf("%d\n", dp[1][m]<=k ? dp[1][m] : -1);

return 0;
}

Reference

  1. C++ cin.ignore()的用法详解
  2. C++ cin>> cin.get() cin.getline()

1.4 Risk

心得

Queap, 二分搜索

  1. 每次询问的时候,假设我们要看前 m 天,现在的 Queap 中存了 qsize 天,如果 qsize>m 的话,我们存了一些没必要看的天,那么我们就需要把这些没必要的天给推出去,所以看出来我们需要推出 qsize-m 个没必要的天。然而我的实现在 dequeapenqueap 时会实时更新 qsize 所以实际上 Queap 只会弹出大概一半的元素,会造成很大的问题。所以我们必须先记录 qsize-m 然后再更新
  2. 最后的T次询问是对已经有的数据,询问有多少在相应的区间内。我们这里可以使用排序后二分查找区间分界点的位置,而不是对于每个元素都看是在哪个区间内。这样可以大大缩短时间

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#define _CRT_SECURE_NO_WARNINGS

#include<iostream>
#include<cstdio>
using namespace std;

// elements in queue/queap are the number of infections of previous days
// size of queue/queap is the number of days we need to keep track of

// Queue Node
struct qNode {
long long value;
qNode* next, * prev;
};
qNode *qHead = new qNode, * qTail = new qNode;

// Queap Node; Invariant: node n in a queap [h...n...t] is the max element in interval in [h...n]
struct pNode {
long long value, num;
pNode* next, * prev;
};
pNode *pHead = new pNode, * pTail = new pNode;

long long qsize = 0;

// enqueue value v into Q;
// update "max value available" in queap P
void enqueap(long long v) {
qNode* q = new qNode; pNode* p = new pNode;
qsize += 1;

q->value = v, q->next = qHead->next; q->prev = qHead;
qHead->next->prev = q; qHead->next = q;

pNode* i = pHead->next; int num = 0;
while (i != pTail) {
if (v >= i->value) { num += i->num; i = i->next; delete i->prev;}
else break;
} // i is the first interval max greater than inserted value

// replace everything between pHead and i with the newly inserted value
if (num > 0) {
p->value = v; p->num = num+1;
p->next = i; p->prev = pHead;
i->prev = p; pHead->next = p;
}
else { // num==0 says v < pHead->next, insert p in between Head and Head->next
p->value = v; p->num = 1;
p->next = i; p->prev = pHead;
i->prev = p; pHead->next = p;
}

return;
}

// dequeue one element from Q and from P
void dequeap() {
if (qsize <=0) return;

qsize -= 1;

qNode *qDel = qTail->prev;
qTail->prev = qTail->prev->prev;
qTail->prev->next = qTail;
delete qDel;

pNode *pDel = pTail->prev;
if (pDel->num > 1) pDel->num--; //there is still element after deletion
else if (pDel->num == 1){ //only one element left, there will be 0 elements after this deletion, so let's just delete this node altogether
pTail->prev = pDel->prev;
pDel->prev->next = pTail;
delete pDel;
}
else throw "0 element in pNode error"; // there is no element in the top node, which is not supposed to happen

return;
}

// returns the Max value in the Queap; returns 0 if Queap is empty
inline long long getMax() {
return qsize>0 ? pTail->prev->value : 0;
}

// print both queue and queap for debugging purposes
void printQueue() {
qNode* q = new qNode; q = qHead->next;
pNode* p = new pNode; p = pHead->next;

cout<<"printing out queue Q:"<<endl;
while (q != qTail) {
cout << q->value << " ";
q = q->next;
} cout << endl;


cout<<"printing out queap P:"<<endl;
while (p != pTail) {
long long v = p->value;
for (int i = 0; i < p->num; i++) {
cout << v << " ";
}
p = p->next;
} cout << endl;
}

// cmp function for qsort
inline int sort_cmp (const void * a, const void * b)
{
return ( *(long long*)a - *(long long*)b );
}

//returns the last element <=g in [l,r]
int bisearch(const long long a[], int l, int r, long long g) {
int mid = -1;
while(l<r) {
mid = (l+r)>>1;
a[mid] <= g ? l=mid+1 : r=mid;
}
return l-1;
}


long long observed[1000007]; int input[1000007];
int main()
{
// initializes Q and P
qHead->next = qTail, qTail->prev = qHead;
pHead->next = pTail, pTail->prev = pHead;


int n; scanf("%d", &n);
for (int i = 0; i < n; i++) {
scanf("%d", &input[i]);
}

// For each day, we first check which day is the earliest day we have to keep track of
// If there are some days we no longer have to keep track of, we dequeue them from the queap and queue
// Then record the maximum infection number maintained by queap and enqueue the infection number of today
for (int i = 0; i < n; i++) {
long long m; scanf("%lld", &m);
long long num = qsize-m;
for(long long j=0; j < num; j++){
dequeap();
}
observed[i] = getMax();

enqueap(input[i]);
}

qsort(observed, n, sizeof(long long), sort_cmp);

// There are T queries, always on already observed infection number
// We only care about number of days in a certain range, not the date or any other information
// Therefore, we can use binary search to get the number of days in this given range.
int T;
scanf("%d", &T);
for (int i=0; i<T; i++){
long long p,q;
scanf("%lld%lld", &p, &q);
int pnum = 0, qnum = 0;
pnum = bisearch(observed,0,n,p-1) + 1; // <p \equiv <=p-1; returns the index of the last element <p, so there are index+1 elements
qnum = bisearch(observed,0,n,q-1) - pnum + 1; // similarly, <q \equiv <=q-1; bisearch(observed,0,n,q-1) - bisearch(observed,0,n,p-1) gives the number of elements in range [p,q)
printf("%d %d\n", pnum, qnum);
}

return 0;

}

Reference

  1. 双向链表(结构体+指针)
  2. 定义结构体变量及初始化; 结构体定义变量的三种方法
  3. unsigned long long int scanf
CATALOG
  1. 1. 1-1 A*B Problem
    1. 1.1. 心得
    2. 1.2. 代码
    3. 1.3. Reference
  2. 2. 1-3 Filename
    1. 2.1. 心得
    2. 2.2. 代码
    3. 2.3. Reference
  3. 3. 1.4 Risk
    1. 3.1. 心得
    2. 3.2. 代码
    3. 3.3. Reference