Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

Indexing Websites! This assignment aims to help you practice binary search tree

ID: 3841277 • Letter: I

Question

Indexing Websites! This assignment aims to help you practice binary search tree ADT and in particular AVL Tree. You will write a program that creates an index of a given list of the top popular websites and their l addresses. Requirements: In this assignment, you are given a text file that includes the list of the top popular websites and their IP addresses. When you typically request a website from your browser by typing in a URLI that URL is converted to an IP address2 that is used to retrieve that page from the given URL. In this assignment, you will process this external file with URLs and lP addresses, and then you will create an index of those pages. The attached file includes the list of IP addresses as follows: google.com. 2 16.58.219. 206 youtube.com 172. 217.4.206 facebook.com 31. 13.69.228 baidu .com 123. 125.11 4.1 44 yahoo.com 206.190. 36.45 amazon.com 54 239.1 17.6 Wikipedia.org 208 80.15 4.224 gg. com 61 135.157. 156 google. co. in 216.58.219 195 As you can see here, the first page is the main Google page and the last page is the Google page in Italy (because of the extension "co.in n the IP address) Your task here is to process this file, and generate an AVL tree that indexes the list of pages and their IP addresses. However, in this file, when you process the page URLs, you need to consider that google.com and google.co.in are both belong to Google. For this, you need to just retrieve the name of the page which is the one until the first dot in the URL. For example, once you insert the 9 entries given above you should have the following AVL tree

Explanation / Answer

main.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "Assignment/indexingwebsites.h"

#define ARGSIZE 1

void argparser(int argc, char *argv[], char* fileName)
{
    if( argc-1!=ARGSIZE )
    {
        printf("[!] No argument found. ");
        printf("Usage : ./IndexingWebsites_Assignment UrlFileName ");

        printf("Would you like to enter file name ? (Y/N) ");
        scanf("%s", fileName); flush();

        if( fileName[0]=='Y' || fileName[0]=='y' )
        {
            printf("Enter file name : ");
            scanf("%s", fileName); flush();
            printf(" ");
            return;
        }
        printf("Bye! ");
        exit(0);
    }

    strcpy(fileName, argv[1]);
}

int main(int argc, char *argv[])
{
    char fileName[64];
    argparser(argc, argv, fileName);

    Page* page = read_ip_data( fileName );
    printf("Indexing Websites! ");
    printf("Your file %s has been loaded, the index has been created! ", fileName);
    run(page);

    printf(" ");
    return 0;
}

________________________________________________________________________________

avltree.c

#include "avltree.h"

Page* create_page(char* pageName, char* ipAddress)
{
    Page* page = (Page*) sMalloc( sizeof(Page) );

    strcpy( page->pageName, pageName );
    page->left = page->right = NULL;
    page->ipList = create_ipList();

    append_ip_to_page(page, ipAddress);
    page->height = 0;

    return page;
}

Page* insert_page(Page* page, char* pageName, char* ipAddress)
{
    if (page == NULL) // adding new webpage
    {
        page = create_page(pageName, ipAddress);
    }
    else if (strcmp(page->pageName, pageName) > 0) // go left
    {
        page->left = insert_page(page->left, pageName, ipAddress);

        if( height_left(page) - height_right(page) == 2 )
        {
            if( strcmp( page->right->pageName, pageName ) < 0 )
                page = single_rotate_left(page);
            else
                page = double_rotate_left(page);
        }
    }
    else if (strcmp(page->pageName, pageName) < 0) // go right
    {
        page->right = insert_page(page->right, pageName, ipAddress);

        if( height_right(page) - height_left(page) == 2 )
        {
            if( strcmp( page->right->pageName, pageName ) > 0 )
                page = single_rotate_right(page);
            else
                page = double_rotate_right(page);
        }
    }
    else if ( !strcmp(page->pageName, pageName) ) // already exists
    {
        if( !is_ip_exist(page->ipList, ipAddress) )
            append_ip_to_page(page, ipAddress);
    }

    return page;
}

void append_ip_to_page(Page* page, char* ipAddress)
{
    append_ip_to_ipList(page->ipList, ipAddress);
}

void display_index(Page *page)
{
    if (page != NULL)
    {
        display_index(page->left);

        printf("%s, IP addresses: ", page->pageName);
        display_ipList(page->ipList);

        display_index(page->right);
    }
}

int height_left(Page* page)
{
    return height(page->left);
}

int height_right(Page* page)
{
    return height(page->right);
}

int height(Page* page)
{
    if( page==NULL)
        return -1;
    return page->height;
}

Page* single_rotate_left(Page* page)
{
    Page* left_side = page->left;
    page->left = left_side->right;
    left_side->right = page;

    left_side->height = max_height(left_side) +1;
    page->height = max_height(page) +1;


    return left_side;
}

Page* single_rotate_right(Page* page)
{
    Page* right_side = page->right;
    page->right = right_side->left;
    right_side->left = page;

    right_side->height = max_height( right_side ) +1;
    page->height = max_height(page) +1;

    return right_side;
}

Page* double_rotate_left(Page* page)
{
    page->left = single_rotate_right( page->left );
    page = single_rotate_left(page);

    return page;
}

Page* double_rotate_right(Page* page)
{
    page->right = single_rotate_left( page->right );
    page = single_rotate_right(page);

    return page;
}

int max_height(Page *page)
{
    return max_between( height_left(page), height_right(page) ) +1;
}

__________________________________________________________________________________
avltree.h

#ifndef AVLTREE_H
#define AVLTREE_H

#include <stdlib.h>
#include <stdio.h>

#include "linkedlist.h"
#include "toolbox.h"

#define NAME_LEN 64

struct Page
{
    char pageName[NAME_LEN];
    IpList* ipList;

    int height;

    struct Page* left;
    struct Page* right;
};

typedef struct Page Page;

Page* create_page(char* name, char* ipAddress);
void append_ip_to_page(Page* page, char* ipAddress);
void display_index(Page* page);

Page* insert_page(Page* page, char* pageName, char* ipAddress);

int height_left(Page* page);
int height_right(Page* page);
int height(Page* page);
int max_height(Page* page); // max height between page->left and page->right

Page* single_rotate_left(Page* page);
Page* double_rotate_left(Page* page);
Page* single_rotate_right(Page* page);
Page* double_rotate_right(Page* page);

#endif // AVLTREE_H

________________________________________________________________

indexingwebsites.c
#include "indexingwebsites.h"

void run(Page* page)
{
    int selection=0;
    char ipAddress[ IP_LEN];
    char pageName[ NAME_LEN ];

    while(selection!=4)
    {
        display_menu();
        selection = get_selection();

        if( selection==1 )
        {
            display_index(page);
        }
        else if( selection==2 )
        {
            printf("Please enter the page: ");
            scanf("%s", pageName); flush();
            IpList* ipList = search_url(page, pageName);
            if( ipList )
            {
                printf("IP addresses for %s: ", pageName);
                display_ipList(ipList);
            }
            else
                printf("Page couldn't find. ");
        }
        else if( selection==3 )
        {
            printf("Please enter the URL: ");
            scanf("%s", ipAddress); flush();
            while( !is_valid_ipv4(ipAddress) )
            {
                printf("IP has to be in this format xxx.xxx.xxx.xxx ");
                printf("Please enter the URL: ");
                scanf("%s", ipAddress); flush();
            }
            Page* correspondingPage = search_ip(page, ipAddress);
            if( correspondingPage )
                printf("It belongs to %s ", correspondingPage->pageName);
            else
                printf("There is no page for %s ", ipAddress);
        }
        else if( selection==4 )
        {
            printf("Bye! ");
            break;
        }
        printf(" ");
    }
}

Page* read_ip_data(char* fileName)
{
    Page* page = NULL;
    char pageName[ NAME_LEN ];
    char ipAddress[ IP_LEN ];

    FILE* fo = sFopen(fileName, "r");
    if(!fo) return NULL;

    while( fscanf(fo, "%s > %s", pageName, ipAddress)!=-1 )
    {
        sscanf(pageName, "%[^.]", pageName); // means read till first dot
        make_lowercase(pageName);
        page = insert_page(page, pageName, ipAddress);
    }

    fclose(fo);

    return page;
}

Page* search_ip (Page* page, char* ipAddress)
{
    Page* i = NULL;

    if (page != NULL)
    {
        i = search_ip(page->left, ipAddress);
        if( is_ip_exist(page->ipList, ipAddress) )
            return page;
        if( i ) return i;

        i = search_ip(page->right, ipAddress);
        if( i ) return i;
    }
    return NULL;
}

IpList* search_url(Page* page, char* pageName)
{
    IpList* i = NULL;

    if (page != NULL)
    {
        i = search_url(page->left, pageName);
        if( !strcmp( page->pageName, pageName ) )
            return page->ipList;
        if( i ) return i;

        i = search_url(page->right, pageName);
        if( i ) return i;
    }
    return NULL;
}

void display_menu()
{
    printf("--------    MENU    -------- "
           "1. Display the full index "
           "2. Search for a URL "
           "3. Search for an IP address "
           "4. Exit "
           "---------------------------- "
           );
}

int get_selection()
{
    int selection=0;

    printf("Option: ");

    scanf("%d", &selection); flush();
    while( !is_between(selection, 0,5) )
    {
        printf("Please enter valid option: ");
        scanf("%d", &selection); flush();
    }

    printf(" ");

    return selection;
}
___________________________________________________________________________

indexingwebsites.h
#ifndef INDEXINGWEBSITES_H
#define INDEXINGWEBSITES_H

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include "avltree.h"
#include "linkedlist.h"
#include "toolbox.h"

#define TRUE 1
#define FALSE 0

void run(Page* page);
void display_menu(); // returns the user selection
int get_selection(); // get menu selection

Page* read_ip_data(char* fileName);

Page* search_ip (Page* page, char* ipAddress);
IpList* search_url (Page* page, char* pageName);

#endif // INDEXINGWEBSITES_H

_______________________________________________________________________________

linkedlist.c
#include "linkedlist.h"

IpList* create_ipList()
{
    IpList* ipList = (IpList*) sMalloc( sizeof(IpList) );
    ipList->size = 0;

    // Head is dummy node
    ipList->ipH = create_ip("");

    ipList->ipT = ipList->ipH;

    return ipList;
}

Ip* create_ip(char* ipAddress)
{
    Ip* ip = (Ip*) sMalloc( sizeof(Ip) );
    ip->next = NULL;

    strcpy( ip->ipAddress, ipAddress );

    return ip;
}

void append_ip_to_ipList(IpList* ipList, char* ipAddress)
{
    Ip* newIp = create_ip(ipAddress);
    newIp->next = NULL;

    ipList->ipT->next = newIp;
    ipList->ipT = ipList->ipT->next;
    ipList->size++;
}

Ip* get_first_ip(IpList* ipList)
{
    return ipList->ipH->next;
}

void display_ipList(IpList* ipList)
{
    Ip* ip = get_first_ip(ipList);

    while( ip )
    {
        printf("%s", ip->ipAddress);
        if( ip->next!=NULL )
            printf(", ");

        ip = ip->next;
    }
    printf(" ");
}

int is_ip_exist(IpList* ipList, char* ipAddress)
{
    Ip* ip = get_first_ip(ipList);

    while( ip )
    {
        if( !strcmp(ip->ipAddress, ipAddress) )
            return TRUE;
        ip = ip->next;
    }

    return FALSE;
}
____________________________________________________________________________

linkedlist.h

#ifndef LINKEDLIST_H
#define LINKEDLIST_H

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "toolbox.h"

#define TRUE 1
#define FALSE 0

#define IP_LEN 16 // No support for IPv6 so 16 bytes

struct Ip
{
    char ipAddress[ IP_LEN ];
    struct Ip* next;
};

struct IpList
{
    struct Ip* ipH; // Head
    struct Ip* ipT; // Tail
    int size;
};


typedef struct Ip Ip;
typedef struct IpList IpList;

IpList* create_ipList();
Ip* create_ip(char* ipAddress);
void append_ip_to_ipList(IpList* ipList, char* ipAddress);

Ip* get_first_ip(IpList* ipList);
void display_ipList(IpList* ipList);
int is_ip_exist(IpList* ipList, char* ipAddress);

#endif // LINKEDLIST_H

________________________________________________________________________________

toolbox.c

#include "toolbox.h"

FILE* sFopen(const char *fileName, const char *mode) // Secure open file
{
    FILE* fo;
    fo = fopen(fileName, mode);
    if( !fo )
    {
        printf("[!] File couldn't found. Please check file name or file. ");
        exit(EXIT_FAILURE);
    }
    return fo;
}

void* sMalloc(const size_t size) // Secure malloc
{
    void *ptr = malloc(size);
    if(!ptr)
    {
        printf("[!] No enough memory. ");
        exit(EXIT_FAILURE);
    }
    return ptr;
}

void sFree(void *ptr) // Secure free
{
    if(!ptr)
    {
        printf("[!] The pointer is NULL. ");
        exit(EXIT_FAILURE);
    }
    free(ptr);
}

void flush() // flush stdin
{
    while(getchar() != ' ');
}

int is_between(const int n, const int low, const int top)
{
    return (n>low&&n<top);
}
void strip(char* s)
{
    int i=0;

    for(i=0; is_between(s[i], 32, 127) ; i++);
    s[i] = '';
}
void make_lowercase(char* str)
{
    int i=0;
    for(i = 0; str[i]; i++)
        if( is_between(str[i], 64,91) )
            str[i] = (char) ( (int)str[i] + 32 );
}

int is_valid_ipv4(char* ipAddress)
{
    int ipLen = strlen(ipAddress);

    if( !is_between(ipLen, 6, 16) )
        return FALSE;

    // it would be better to take stripted ipAddress
    // if it is, will not have char* tail
    char tail[16];
    tail[0] = 0;

    unsigned int parts[4]; // ignore minus sign
    int c = sscanf(ipAddress, "%3u.%3u.%3u.%3u%s", &parts[0], &parts[1], &parts[2], &parts[3], tail);

    if ( c!=4 || tail[0] )
        return FALSE;

    int i=0;
    for (i=0; i<4; i++)
        if (parts[i] > 255)
            return FALSE;

    return TRUE;
}

int max_between(int first, int second)
{
    if(first>second)
        return first;
    return second;
}
__________________________________________________________________

toolbox.h

#ifndef TOOLBOX_H
#define TOOLBOX_H

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define TRUE 1
#define FALSE 0

FILE* sFopen(const char *fileName, const char *mode); // Secure open file
void flush(); // flush stdin
void sFree(void *ptr); // Secure free
void* sMalloc(const size_t size); // Secure malloc

int is_between(const int n, const int low, const int top); // low and top are not included
void strip(char* s);
void make_lowercase(char* str);
int is_valid_ipv4(char* ipAddress); // return true if ipv4 format is valid
int max_between(int first, int second);

#endif // TOOLBOX_H