Implement the simple approximate matching algorithm. You only need to add your c
ID: 3595993 • Letter: I
Question
Implement the simple approximate matching algorithm. You only need to add your code to rkmatch.c. Specifically, you need to complete the procedures simple_substr_match.
In rkmatch.c, the main procedure considers each chunk of X in turn and invokes simple_substr_match to find a match in file Y. The invocation passes in---as part of the procedure arguments---the pointer to the chunk, chunk's length (k), the pointer to Y's content and Y's length. If a match is found, the procedure returns 1, otherwise, it returns 0. (If a chunk of X appears many times in Y, the return value should still be 1.)
Given Code:
/* Match every k-character snippet of the query_doc document among a collection of documents doc1, doc2, .... ./rkmatch snippet_size query_doc doc1 [doc2...] */ #include <stdio.h> #include <stdlib.h> #include <sys/types.h> #include <sys/stat.h> #include <getopt.h> #include <fcntl.h> #include <unistd.h> #include <string.h> #include <assert.h> #include <time.h> #include <ctype.h> #include "bloom.h" enum algotype { EXACT=0, SIMPLE, RK, RKBATCH}; /* a large prime for RK hash (BIG_PRIME*256 does not overflow)*/ long long BIG_PRIME = 5003943032159437; /* constants used for printing debug information */ const int PRINT_RK_HASH = 5; const int PRINT_BLOOM_BITS = 160; /* calculate modulo addition, i.e. (a+b) % BIG_PRIME */ long long madd(long long a, long long b) { assert(a >= 0 && a < BIG_PRIME && b >= 0 && b < BIG_PRIME); return ((a+b)>BIG_PRIME?(a+b-BIG_PRIME):(a+b)); } /* calculate modulo substraction, i.e. (a-b) % BIG_PRIME*/ long long mdel(long long a, long long b) { assert(a >= 0 && a < BIG_PRIME && b >= 0 && b < BIG_PRIME); return ((a>b)?(a-b):(a+BIG_PRIME-b)); } /* calculate modulo multiplication, i.e. (a*b) % BIG_PRIME */ long long mmul(long long a, long long b) { assert(a >= 0 && a < BIG_PRIME && b >= 0 && b < BIG_PRIME); /* either operand must be no larger than 256, otherwise, there is danger of overflow*/ assert(a <= 256 || b <= 256); return ((a*b) % BIG_PRIME); } /* read the entire content of the file 'fname' into a character array allocated by this procedure. Upon return, *doc contains the address of the character array *doc_len contains the length of the array */ void read_file(const char *fname, unsigned char **doc, int *doc_len) { int fd = open(fname, O_RDONLY); if (fd < 0) { perror("read_file: open "); exit(1); } struct stat st; if (fstat(fd, &st) != 0) { perror("read_file: fstat "); exit(1); } *doc = (unsigned char *)malloc(st.st_size); if (!(*doc)) { fprintf(stderr, " failed to allocate %d bytes. No memory ", (int)st.st_size); exit(1); } int n = read(fd, *doc, st.st_size); if (n < 0) { perror("read_file: read "); exit(1); }else if (n != st.st_size) { fprintf(stderr,"read_file: short read! "); exit(1); } close(fd); *doc_len = n; } /* check if a query string ps (of length k) appears in ts (of length n) as a substring If so, return 1. Else return 0 You may want to use the library function strncmp */ int simple_substr_match(const unsigned char *ps, /* the query string */ int k,/* the length of the query string */ const unsigned char *ts,/* the document string (Y) */ int n/* the length of the document Y */) { /* Your code here */ return 0; }