Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

CS 2123 Data Structures Recitation 4 Due Friday March 11 Difficulty (out of 5) 1

ID: 3676886 • Letter: C

Question

CS 2123 Data Structures Recitation 4 Due Friday March 11 Difficulty (out of 5) 1. (100 pts) Write a program to find the word frequencies in a txt file. Use a binary tree to store the words and maintain a count for each word. Each node of the binary tree has a dynamically allocated string and the frequency. If the word is not in the tree, insert it with a frequency of 1. If the word is in the tree, increase its frequency. You need to use string processing functions to compare strings. The output of the program is words and their frequencies listed in increasing order of fre- quency . Your frequency count should not be case sensitive. Apple and apple should be counted as same word. Convert all the words to lowercase before storing in the tree. Skip punctuation characters and the following characters: space, tab character, new- line character, carriage-return and form-feed character. All otber characters should be considered part of the word. Dynamically allocate the strings and the binary tree. Free all the allocated space before exit and use nalgrind for potential memory errors Use are and are, in your program. For the following file a.txt The red car hit the blue car When you eecute your program as foxo reeitation4 a.txt The output should be as follows blue hit red 2 the Test your program with different files. Submit your program electronically using the Wackboand system. Only one member of the group ould send it. List the name of the groxp members on the top of your submission.

Explanation / Answer

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct node {
char *str;
int freq;
struct node* left;
struct node* right;
};

typedef struct node node;

void insert(node** root, char *s) {
if (*root == NULL) {
*root = (node*) malloc(sizeof(node));
(*root)->str = (char*) malloc(sizeof(char) * (strlen(s) + 1));
strcpy((*root)->str, s);
(*root)->freq = 1;
(*root)->left = NULL;
(*root)->right = NULL;
} else {
int result = strcmp((*root)->str, s);
if (result == 0) {
(*root)->freq += 1;
} else if (result < 0) {
insert(&((*root)->right), s);
} else {
insert(&((*root)->left), s);
}
}
}

void deleteTree(node* root) {
if (root == NULL) return;
deleteTree(root->right);
deleteTree(root->left);
free(root->str);
free(root);
root = NULL;
}

int countNodes(node* root) {
if (root == NULL) return 0;
else return 1 + countNodes(root->left) + countNodes(root->right);
}

int compare(const void *a, const void *b) {
node* nodeA = (node*) a;
node* nodeB = (node*) b;
if (nodeA->freq != nodeB->freq)
return nodeA->freq - nodeB->freq;
else
return strcmp(nodeA->str, nodeB->str);
}

void fillNodes(node* root, node* nodes[], int *k) {
if (root == NULL) return;
fillNodes(root->left, nodes, k);
nodes[*k] = root;
*k = *k + 1;
fillNodes(root->right, nodes, k);
}

int main(int argc, char** argv) {
if (argc < 2) {
printf("File name required!");
return 1;
}
FILE* fp = fopen(argv[1], "r");
node* root = NULL;
int i;
char s[100];
while (fgets(s, 100, fp)) {
char* formatted = (char*) malloc(sizeof(char) * (strlen(s) + 1));
int j = 0;
int k = 0;
while (s[j] != '') {
if (s[j] >= 'a' && s[j] <= 'z') formatted[k++] = s[j];
else if (s[j] >= 'A' && s[j] <= 'Z') formatted[k++] = s[j] - 'A' + 'a';
++j;
}
formatted[k++] = '';
insert(&root, formatted);
free(formatted);
}
int total_nodes = countNodes(root);
node** nodes = (node**) malloc(sizeof(node*) * total_nodes);
int index = 0;
fillNodes(root, nodes, &index);
qsort(nodes, total_nodes, sizeof(nodes[0]), compare);
for (i = 0; i < total_nodes; ++i) {
printf("%d %s ", nodes[i]->freq, nodes[i]->str);
}
free(nodes);
deleteTree(root);
fclose(fp);
return 0;
}