Tsinghua DSA 作业总结 (1)

Tsinghua

 2021/02/09 

CST数据结构（2020秋）PA1a

1-1 A*B Problem

心得

每个数组存一位的话速度太慢过不了，必须压位
10e5 是 10 * 10^5 所以是 10^6 …
我们把一个大整数分成几块存在数组里的时候，如果这个数头上有0的话，0就会被忽略了（不压位的话没有这个问题，因为0也就是1位）比如4位4位存，100046000303025会变成[3025, 30, 460, 100]，直接输出会变成100460303025，明显不对，所以我们输出的时候要记得补全0

代码

#include<iostream>
#include<cstdio>
#include<cstring>
using namespace std;
// N is the maximum number of digits of the input, M is the maximum number of digits of the product
const int N = 5007, M = 10023;
// we multiply each 10000 together and store them in a single entry, 10e3 has 4 0s .
const int ten = 10e3, d_ten = 4;
// a is input A in e4 base, b is input B in e4 base, c is their product in e4 base.
int a[N], b[N], c[M];
// a_len is the number of entires in a needed to store input A, so it's the digit needed to store A in e4 base; b_len is that for B
int a_len, b_len;
// digit is a helper array we will need during multiplication
int digit[] = {1,10,100,1000,10000};
// inputA and inputB is A and B read in from stream
char inputA[N], inputB[N];
// multiplies e4 base A and B together and store the result in M
void multiply();
// returns the length of product in e4 base
int get_clen();
// print padding zero in each entry of array C when outputting the result
void print_padding_zero(int,int);
int main()
{
    int n = 0;
    scanf("%d",&n);
    for (int i=0; i<n; i++){
        memset(a,0,sizeof(a));
        memset(b,0,sizeof(b));
        memset(c,0,sizeof(c));
        memset(inputA,0,sizeof(inputA));
        memset(inputB,0,sizeof(inputB));

        scanf("%s",inputA); int ina_len = strlen(inputA); a_len = (ina_len-1)/d_ten + 1;
        scanf("%s",inputB); int inb_len = strlen(inputB); b_len = (inb_len-1)/d_ten + 1;


        // store the number in reverse order and e4 baes in array a,b
        for (int i=ina_len-1; i>=0;){
            int tostore = 0;
            for (int j=0; j<d_ten && i-j>=0 ; j++){
                int ASCII = inputA[i-j] - '0';
                tostore += ASCII*digit[j];
            }
            a[a_len-1 - i/d_ten] = tostore;
            i = i-d_ten;
        }

        for (int i=inb_len-1; i>=0;){
            int tostore = 0;
            for (int j=0; j<d_ten && i-j>=0 ; j++){
                int ASCII = inputB[i-j] - '0';
                tostore += ASCII*digit[j];
            }
            b[b_len-1 - i/d_ten] = tostore;
            i = i-d_ten;
        }

        multiply();

        int c_index = get_clen();
        printf("%d", c[c_index]);
        for (int i=c_index-1; i>=0; i--){
            print_padding_zero(c[i], ten/10);
            printf("%d", c[i]);
        }
        printf("\n");

    }



    return 0;
}

void multiply()
{
    for(int i=0; i<a_len; i++){
        for(int j=0; j<b_len; j++){
            int product = a[i] * b[j];
            c[i+j] += product;
            c[i+j+1] += c[i+j] / ten; // carry over digit
            c[i+j] %= ten; // only stores e4 base number
        }
    }
}

inline int get_clen()
{
    int n = a_len + b_len + 3;
    while (c[n]==0 && n>0) n--;
    return n;
}

/**
 * Each entry in array C should store an e4 base number, but sometimes it stores a number smaller than that.
 * That's because it ignores the leading 0s (leading 0s in reversely stored C) when  in this case 00XX.
 * Example: 1004 0030 57 stored in C has form [57, 30, 1004], this function helps print out the first 00 in 0030
 **/
inline void print_padding_zero(int n, int digit){
    if (n<digit) printf("0");
    if (digit == 10) return;
    print_padding_zero(n,digit/10);
}

Reference

1-3 Filename

心得

编辑距离，移动窗口节省空间

字符串的读入：一开始以为是 getline 的问题，读不进来字符串，实际上是因为仅用 scanf 读入3个整数后会留下一个换行符在 buffer 中。使用 cin.ignore(100,'\n') 删除换行符（忽略 100 个字符，或者忽略1个 '\n'；忽略所有读入，直到总共忽略了 100 个字符，或者忽略了 1 个换行符）
dp数组开到 $1001^2$ 会爆炸，改用滚动窗口，dp[0][j] 表示 $ x_1x_2…x_{i-1}$ 到 $y_1 y_2 … y_j$ （前一步）所需要的操作，dp[1][j] 表示 $ x_1x_2…x_{i-1}$ 到 $y_1 y_2 … y_j$ （这一步）所需要的操作。几个实现细节在注释中已标出
TLE: 第一感觉想的是在每次计算 $i, s.t. x_1x_2…x_i$ 变成 $y$ 所需要的操作后（即计算完成 dp[i][0...m]后），扫一遍数组，如果所有值都大于 k 的话，就停止查找。但是这样不会对数据规模有任何可观的缩减，因为如果说我们有长为 $10^5$ 的两个字符串，并且他们可以在 k 操作内互相转换（极端情况两个相同的字符串），我们仍然需要进行 $O(mn) = 10^5 \times 10^5$ 次操作。

根据习题课的解决方法，其实当两个数组间的长度差超过 k 时就绝对不可能从一个转换成另一个了。所以，对于每个 $x$ 的子串 $x_1x_2…x_i$，我们只需要看 $y_{i-k} y_{i-k+1} … y_{i+k}$ 就可以了。当然还要注意 $i-k, i+k$别越界，所以实际上是看 $y_{min(1,i-k)} … y_{max(m,i+k)}$ 这个子序列
这个改动会造成一些 WA，直觉一下子想到是有可能在最后退出循环时，我们因为 k 的限制压根就没扫到 dp[n][m](= dp[1][m])。实际上问题差不多，是因为 dp[i][j] = min(dp[i-1][j], dp[i][(j-1)]) + 1; 这句话中 dp[0][j] 我们一开始全初始化为0，对于 dp[i][i+k] 这个位置，它的一种方案 dp[i-1][i+k] 永远不会被上一步更新到，因为上一步只更新 dp[i-1][(i-1)-k] ~ dp[i-1][(i-1)+k] 即 dp[i-1][i+k] 恒等于0，即 dp[i][i+k] 永远会采取 dp[i-1][i+k] 这一方案。将 dp[0][j] 初始化为 infinity 解决

代码

#include<iostream>
#include<cstdio>
using namespace std;

const int N = 501007, Inf = 501; // Inf is "effective infinite" : k<=500
char a[N], b[N]; // a, b store the string x, y
int dp[2][N]; // For each iteration i, dp[0] is equivalent to dp[i-1], dp[1] is equivalent to dp[i]

int main()
{
    int n,m,k; //length of x, length of y, max number of operations
    scanf("%d%d%d", &n, &m, &k);
    cin.ignore(100,'\n');
    cin.getline(a+1, n+1);
    cin.getline(b+1, m+1);

    // initialize dp[0][j] to be the operations needed to edit an empty string to y1y2...yj
    // initialize the rest of the array to be infinite
    for(int j=0;j<=m;j++) dp[0][j] = j, dp[1][j] = Inf;


    for(int i=1; i<=n; i++) { // start from the 1st character

        // dp[0][0] represents dp[i-1][0] in an ordinary dp array
        // dp[i-1][0] represents the distance between x1x2...x_{i-1} to the empty string
        dp[0][0] = i-1;


        for(int j=max(1, i-k);j<=min(m,i+k);j++){ // only looks at y[i-k] to y[i+k]
            if(a[i]==b[j])
                dp[1][j] = dp[0][(j-1)];
            else
                dp[1][j] = min(dp[0][j], dp[1][(j-1)]) + 1;
        }

        // "previous" of next iteration i+1 is current value from this iteration i
        for(int j=max(1, i-k);j<=min(m,i+k);j++)
            dp[0][j] = dp[1][j];

    }

    printf("%d\n", dp[1][m]<=k ? dp[1][m] : -1);

    return 0;
}

Reference

1.4 Risk

心得

Queap, 二分搜索

每次询问的时候，假设我们要看前 m 天，现在的 Queap 中存了 qsize 天，如果 qsize>m 的话，我们存了一些没必要看的天，那么我们就需要把这些没必要的天给推出去，所以看出来我们需要推出 qsize-m 个没必要的天。然而我的实现在 dequeap 和 enqueap 时会实时更新 qsize 所以实际上 Queap 只会弹出大概一半的元素，会造成很大的问题。所以我们必须先记录 qsize-m 然后再更新
最后的T次询问是对已经有的数据，询问有多少在相应的区间内。我们这里可以使用排序后二分查找区间分界点的位置，而不是对于每个元素都看是在哪个区间内。这样可以大大缩短时间

代码

#define _CRT_SECURE_NO_WARNINGS

#include<iostream>
#include<cstdio>
using namespace std;

// elements in queue/queap are the number of infections of previous days
// size of queue/queap is the number of days we need to keep track of

// Queue Node
struct qNode {
    long long value;
    qNode* next, * prev;
};
qNode *qHead = new qNode, * qTail = new qNode;

// Queap Node; Invariant: node n in a queap [h...n...t] is the max element in interval in [h...n]
struct pNode {
    long long value, num;
    pNode* next, * prev;
};
pNode *pHead = new pNode, * pTail = new pNode;

long long qsize = 0;

// enqueue value v into Q;
// update "max value available" in queap P
void enqueap(long long v) {
    qNode* q = new qNode; pNode* p = new pNode;
    qsize += 1;

    q->value = v, q->next = qHead->next; q->prev = qHead;
    qHead->next->prev = q; qHead->next = q;

    pNode* i = pHead->next; int num = 0;
    while (i != pTail) {
        if (v >= i->value) { num += i->num; i = i->next; delete i->prev;}
        else break;
    } // i is the first interval max greater than inserted value

    // replace everything between pHead and i with the newly inserted value
    if (num > 0) {
        p->value = v; p->num = num+1;
        p->next = i; p->prev = pHead;
        i->prev = p; pHead->next = p;
    }
    else { // num==0 says v < pHead->next, insert p in between Head and Head->next
        p->value = v; p->num = 1;
        p->next = i; p->prev = pHead;
        i->prev = p; pHead->next = p;
    }

    return;
}

// dequeue one element from Q and from P
void dequeap() {
    if (qsize <=0) return;

    qsize -= 1;

    qNode *qDel = qTail->prev;
    qTail->prev = qTail->prev->prev;
    qTail->prev->next = qTail;
    delete qDel;

    pNode *pDel = pTail->prev;
    if (pDel->num > 1)  pDel->num--; //there is still element after deletion
    else if (pDel->num == 1){ //only one element left, there will be 0 elements after this deletion, so let's just delete this node altogether
        pTail->prev = pDel->prev;
        pDel->prev->next = pTail;
        delete pDel;
    }
    else throw "0 element in pNode error"; // there is no element in the top node, which is not supposed to happen

    return;
}

// returns the Max value in the Queap; returns 0 if Queap is empty
inline long long getMax() {
    return qsize>0 ? pTail->prev->value : 0;
}

// print both queue and queap for debugging purposes
void printQueue() {
    qNode* q = new qNode; q = qHead->next;
    pNode* p = new pNode; p = pHead->next;

    cout<<"printing out queue Q:"<<endl;
    while (q != qTail) {
        cout << q->value << "  ";
        q = q->next;
    } cout << endl;


    cout<<"printing out queap P:"<<endl;
    while (p != pTail) {
        long long v = p->value;
        for (int i = 0; i < p->num; i++) {
            cout << v << "  ";
        }
        p = p->next;
    } cout << endl;
}

// cmp function for qsort
inline int sort_cmp (const void * a, const void * b)
{
   return ( *(long long*)a - *(long long*)b );
}

//returns the last element <=g in [l,r]
int bisearch(const long long a[], int l, int r, long long g) {
    int mid = -1;
    while(l<r) {
        mid = (l+r)>>1;
        a[mid] <= g ? l=mid+1 : r=mid;
    }
    return l-1;
}


long long observed[1000007]; int input[1000007];
int main()
{
    // initializes Q and P
    qHead->next = qTail, qTail->prev = qHead;
    pHead->next = pTail, pTail->prev = pHead;


    int n; scanf("%d", &n);
    for (int i = 0; i < n; i++) {
        scanf("%d", &input[i]);
    }

    // For each day, we first check which day is the earliest day we have to keep track of
    // If there are some days we no longer have to keep track of, we dequeue them from the queap and queue
    // Then record the maximum infection number maintained by queap and enqueue the infection number of today
    for (int i = 0; i < n; i++) {
        long long m; scanf("%lld", &m);
        long long num = qsize-m;
        for(long long j=0; j < num; j++){
            dequeap();
        }
        observed[i] = getMax();

        enqueap(input[i]);
    }

    qsort(observed, n, sizeof(long long), sort_cmp);

    // There are T queries, always on already observed infection number
    // We only care about number of days in a certain range, not the date or any other information
    // Therefore, we can use binary search to get the number of days in this given range.
    int T;
    scanf("%d", &T);
    for (int i=0; i<T; i++){
        long long p,q;
        scanf("%lld%lld", &p, &q);
        int pnum = 0, qnum = 0;
        pnum = bisearch(observed,0,n,p-1) + 1; // <p \equiv <=p-1; returns the index of the last element <p, so there are index+1 elements
        qnum = bisearch(observed,0,n,q-1) - pnum + 1; // similarly, <q \equiv <=q-1; bisearch(observed,0,n,q-1) - bisearch(observed,0,n,p-1) gives the number of elements in range [p,q)
        printf("%d %d\n", pnum, qnum);
    }

    return 0;

}