Indexing Websites! This assignment aims to help you practice binary search tree
ID: 3841277 • Letter: I
Question
Indexing Websites! This assignment aims to help you practice binary search tree ADT and in particular AVL Tree. You will write a program that creates an index of a given list of the top popular websites and their l addresses. Requirements: In this assignment, you are given a text file that includes the list of the top popular websites and their IP addresses. When you typically request a website from your browser by typing in a URLI that URL is converted to an IP address2 that is used to retrieve that page from the given URL. In this assignment, you will process this external file with URLs and lP addresses, and then you will create an index of those pages. The attached file includes the list of IP addresses as follows: google.com. 2 16.58.219. 206 youtube.com 172. 217.4.206 facebook.com 31. 13.69.228 baidu .com 123. 125.11 4.1 44 yahoo.com 206.190. 36.45 amazon.com 54 239.1 17.6 Wikipedia.org 208 80.15 4.224 gg. com 61 135.157. 156 google. co. in 216.58.219 195 As you can see here, the first page is the main Google page and the last page is the Google page in Italy (because of the extension "co.in n the IP address) Your task here is to process this file, and generate an AVL tree that indexes the list of pages and their IP addresses. However, in this file, when you process the page URLs, you need to consider that google.com and google.co.in are both belong to Google. For this, you need to just retrieve the name of the page which is the one until the first dot in the URL. For example, once you insert the 9 entries given above you should have the following AVL treeExplanation / Answer
main.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "Assignment/indexingwebsites.h"
#define ARGSIZE 1
void argparser(int argc, char *argv[], char* fileName)
{
if( argc-1!=ARGSIZE )
{
printf("[!] No argument found. ");
printf("Usage : ./IndexingWebsites_Assignment UrlFileName ");
printf("Would you like to enter file name ? (Y/N) ");
scanf("%s", fileName); flush();
if( fileName[0]=='Y' || fileName[0]=='y' )
{
printf("Enter file name : ");
scanf("%s", fileName); flush();
printf(" ");
return;
}
printf("Bye! ");
exit(0);
}
strcpy(fileName, argv[1]);
}
int main(int argc, char *argv[])
{
char fileName[64];
argparser(argc, argv, fileName);
Page* page = read_ip_data( fileName );
printf("Indexing Websites! ");
printf("Your file %s has been loaded, the index has been created! ", fileName);
run(page);
printf(" ");
return 0;
}
________________________________________________________________________________
avltree.c
#include "avltree.h"
Page* create_page(char* pageName, char* ipAddress)
{
Page* page = (Page*) sMalloc( sizeof(Page) );
strcpy( page->pageName, pageName );
page->left = page->right = NULL;
page->ipList = create_ipList();
append_ip_to_page(page, ipAddress);
page->height = 0;
return page;
}
Page* insert_page(Page* page, char* pageName, char* ipAddress)
{
if (page == NULL) // adding new webpage
{
page = create_page(pageName, ipAddress);
}
else if (strcmp(page->pageName, pageName) > 0) // go left
{
page->left = insert_page(page->left, pageName, ipAddress);
if( height_left(page) - height_right(page) == 2 )
{
if( strcmp( page->right->pageName, pageName ) < 0 )
page = single_rotate_left(page);
else
page = double_rotate_left(page);
}
}
else if (strcmp(page->pageName, pageName) < 0) // go right
{
page->right = insert_page(page->right, pageName, ipAddress);
if( height_right(page) - height_left(page) == 2 )
{
if( strcmp( page->right->pageName, pageName ) > 0 )
page = single_rotate_right(page);
else
page = double_rotate_right(page);
}
}
else if ( !strcmp(page->pageName, pageName) ) // already exists
{
if( !is_ip_exist(page->ipList, ipAddress) )
append_ip_to_page(page, ipAddress);
}
return page;
}
void append_ip_to_page(Page* page, char* ipAddress)
{
append_ip_to_ipList(page->ipList, ipAddress);
}
void display_index(Page *page)
{
if (page != NULL)
{
display_index(page->left);
printf("%s, IP addresses: ", page->pageName);
display_ipList(page->ipList);
display_index(page->right);
}
}
int height_left(Page* page)
{
return height(page->left);
}
int height_right(Page* page)
{
return height(page->right);
}
int height(Page* page)
{
if( page==NULL)
return -1;
return page->height;
}
Page* single_rotate_left(Page* page)
{
Page* left_side = page->left;
page->left = left_side->right;
left_side->right = page;
left_side->height = max_height(left_side) +1;
page->height = max_height(page) +1;
return left_side;
}
Page* single_rotate_right(Page* page)
{
Page* right_side = page->right;
page->right = right_side->left;
right_side->left = page;
right_side->height = max_height( right_side ) +1;
page->height = max_height(page) +1;
return right_side;
}
Page* double_rotate_left(Page* page)
{
page->left = single_rotate_right( page->left );
page = single_rotate_left(page);
return page;
}
Page* double_rotate_right(Page* page)
{
page->right = single_rotate_left( page->right );
page = single_rotate_right(page);
return page;
}
int max_height(Page *page)
{
return max_between( height_left(page), height_right(page) ) +1;
}
__________________________________________________________________________________
avltree.h
#ifndef AVLTREE_H
#define AVLTREE_H
#include <stdlib.h>
#include <stdio.h>
#include "linkedlist.h"
#include "toolbox.h"
#define NAME_LEN 64
struct Page
{
char pageName[NAME_LEN];
IpList* ipList;
int height;
struct Page* left;
struct Page* right;
};
typedef struct Page Page;
Page* create_page(char* name, char* ipAddress);
void append_ip_to_page(Page* page, char* ipAddress);
void display_index(Page* page);
Page* insert_page(Page* page, char* pageName, char* ipAddress);
int height_left(Page* page);
int height_right(Page* page);
int height(Page* page);
int max_height(Page* page); // max height between page->left and page->right
Page* single_rotate_left(Page* page);
Page* double_rotate_left(Page* page);
Page* single_rotate_right(Page* page);
Page* double_rotate_right(Page* page);
#endif // AVLTREE_H
________________________________________________________________
indexingwebsites.c
#include "indexingwebsites.h"
void run(Page* page)
{
int selection=0;
char ipAddress[ IP_LEN];
char pageName[ NAME_LEN ];
while(selection!=4)
{
display_menu();
selection = get_selection();
if( selection==1 )
{
display_index(page);
}
else if( selection==2 )
{
printf("Please enter the page: ");
scanf("%s", pageName); flush();
IpList* ipList = search_url(page, pageName);
if( ipList )
{
printf("IP addresses for %s: ", pageName);
display_ipList(ipList);
}
else
printf("Page couldn't find. ");
}
else if( selection==3 )
{
printf("Please enter the URL: ");
scanf("%s", ipAddress); flush();
while( !is_valid_ipv4(ipAddress) )
{
printf("IP has to be in this format xxx.xxx.xxx.xxx ");
printf("Please enter the URL: ");
scanf("%s", ipAddress); flush();
}
Page* correspondingPage = search_ip(page, ipAddress);
if( correspondingPage )
printf("It belongs to %s ", correspondingPage->pageName);
else
printf("There is no page for %s ", ipAddress);
}
else if( selection==4 )
{
printf("Bye! ");
break;
}
printf(" ");
}
}
Page* read_ip_data(char* fileName)
{
Page* page = NULL;
char pageName[ NAME_LEN ];
char ipAddress[ IP_LEN ];
FILE* fo = sFopen(fileName, "r");
if(!fo) return NULL;
while( fscanf(fo, "%s > %s", pageName, ipAddress)!=-1 )
{
sscanf(pageName, "%[^.]", pageName); // means read till first dot
make_lowercase(pageName);
page = insert_page(page, pageName, ipAddress);
}
fclose(fo);
return page;
}
Page* search_ip (Page* page, char* ipAddress)
{
Page* i = NULL;
if (page != NULL)
{
i = search_ip(page->left, ipAddress);
if( is_ip_exist(page->ipList, ipAddress) )
return page;
if( i ) return i;
i = search_ip(page->right, ipAddress);
if( i ) return i;
}
return NULL;
}
IpList* search_url(Page* page, char* pageName)
{
IpList* i = NULL;
if (page != NULL)
{
i = search_url(page->left, pageName);
if( !strcmp( page->pageName, pageName ) )
return page->ipList;
if( i ) return i;
i = search_url(page->right, pageName);
if( i ) return i;
}
return NULL;
}
void display_menu()
{
printf("-------- MENU -------- "
"1. Display the full index "
"2. Search for a URL "
"3. Search for an IP address "
"4. Exit "
"---------------------------- "
);
}
int get_selection()
{
int selection=0;
printf("Option: ");
scanf("%d", &selection); flush();
while( !is_between(selection, 0,5) )
{
printf("Please enter valid option: ");
scanf("%d", &selection); flush();
}
printf(" ");
return selection;
}
___________________________________________________________________________
indexingwebsites.h
#ifndef INDEXINGWEBSITES_H
#define INDEXINGWEBSITES_H
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "avltree.h"
#include "linkedlist.h"
#include "toolbox.h"
#define TRUE 1
#define FALSE 0
void run(Page* page);
void display_menu(); // returns the user selection
int get_selection(); // get menu selection
Page* read_ip_data(char* fileName);
Page* search_ip (Page* page, char* ipAddress);
IpList* search_url (Page* page, char* pageName);
#endif // INDEXINGWEBSITES_H
_______________________________________________________________________________
linkedlist.c
#include "linkedlist.h"
IpList* create_ipList()
{
IpList* ipList = (IpList*) sMalloc( sizeof(IpList) );
ipList->size = 0;
// Head is dummy node
ipList->ipH = create_ip("");
ipList->ipT = ipList->ipH;
return ipList;
}
Ip* create_ip(char* ipAddress)
{
Ip* ip = (Ip*) sMalloc( sizeof(Ip) );
ip->next = NULL;
strcpy( ip->ipAddress, ipAddress );
return ip;
}
void append_ip_to_ipList(IpList* ipList, char* ipAddress)
{
Ip* newIp = create_ip(ipAddress);
newIp->next = NULL;
ipList->ipT->next = newIp;
ipList->ipT = ipList->ipT->next;
ipList->size++;
}
Ip* get_first_ip(IpList* ipList)
{
return ipList->ipH->next;
}
void display_ipList(IpList* ipList)
{
Ip* ip = get_first_ip(ipList);
while( ip )
{
printf("%s", ip->ipAddress);
if( ip->next!=NULL )
printf(", ");
ip = ip->next;
}
printf(" ");
}
int is_ip_exist(IpList* ipList, char* ipAddress)
{
Ip* ip = get_first_ip(ipList);
while( ip )
{
if( !strcmp(ip->ipAddress, ipAddress) )
return TRUE;
ip = ip->next;
}
return FALSE;
}
____________________________________________________________________________
linkedlist.h
#ifndef LINKEDLIST_H
#define LINKEDLIST_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "toolbox.h"
#define TRUE 1
#define FALSE 0
#define IP_LEN 16 // No support for IPv6 so 16 bytes
struct Ip
{
char ipAddress[ IP_LEN ];
struct Ip* next;
};
struct IpList
{
struct Ip* ipH; // Head
struct Ip* ipT; // Tail
int size;
};
typedef struct Ip Ip;
typedef struct IpList IpList;
IpList* create_ipList();
Ip* create_ip(char* ipAddress);
void append_ip_to_ipList(IpList* ipList, char* ipAddress);
Ip* get_first_ip(IpList* ipList);
void display_ipList(IpList* ipList);
int is_ip_exist(IpList* ipList, char* ipAddress);
#endif // LINKEDLIST_H
________________________________________________________________________________
toolbox.c
#include "toolbox.h"
FILE* sFopen(const char *fileName, const char *mode) // Secure open file
{
FILE* fo;
fo = fopen(fileName, mode);
if( !fo )
{
printf("[!] File couldn't found. Please check file name or file. ");
exit(EXIT_FAILURE);
}
return fo;
}
void* sMalloc(const size_t size) // Secure malloc
{
void *ptr = malloc(size);
if(!ptr)
{
printf("[!] No enough memory. ");
exit(EXIT_FAILURE);
}
return ptr;
}
void sFree(void *ptr) // Secure free
{
if(!ptr)
{
printf("[!] The pointer is NULL. ");
exit(EXIT_FAILURE);
}
free(ptr);
}
void flush() // flush stdin
{
while(getchar() != ' ');
}
int is_between(const int n, const int low, const int top)
{
return (n>low&&n<top);
}
void strip(char* s)
{
int i=0;
for(i=0; is_between(s[i], 32, 127) ; i++);
s[i] = '';
}
void make_lowercase(char* str)
{
int i=0;
for(i = 0; str[i]; i++)
if( is_between(str[i], 64,91) )
str[i] = (char) ( (int)str[i] + 32 );
}
int is_valid_ipv4(char* ipAddress)
{
int ipLen = strlen(ipAddress);
if( !is_between(ipLen, 6, 16) )
return FALSE;
// it would be better to take stripted ipAddress
// if it is, will not have char* tail
char tail[16];
tail[0] = 0;
unsigned int parts[4]; // ignore minus sign
int c = sscanf(ipAddress, "%3u.%3u.%3u.%3u%s", &parts[0], &parts[1], &parts[2], &parts[3], tail);
if ( c!=4 || tail[0] )
return FALSE;
int i=0;
for (i=0; i<4; i++)
if (parts[i] > 255)
return FALSE;
return TRUE;
}
int max_between(int first, int second)
{
if(first>second)
return first;
return second;
}
__________________________________________________________________
toolbox.h
#ifndef TOOLBOX_H
#define TOOLBOX_H
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define TRUE 1
#define FALSE 0
FILE* sFopen(const char *fileName, const char *mode); // Secure open file
void flush(); // flush stdin
void sFree(void *ptr); // Secure free
void* sMalloc(const size_t size); // Secure malloc
int is_between(const int n, const int low, const int top); // low and top are not included
void strip(char* s);
void make_lowercase(char* str);
int is_valid_ipv4(char* ipAddress); // return true if ipv4 format is valid
int max_between(int first, int second);
#endif // TOOLBOX_H