A useful data structure for doing lookups is a hash table. In this problem, you will be imple-
menting a hash table with chaining to store the frequency of words in a file. The hash table is
implemented as an array of linked lists. The hash function specifies the index of the linked list to
follow for a given word. The word can be found by following this linked list. Optionally, the word
can be appended to this linked list. You will require the code file ’hash.c’ and data file ’book.txt’
for this problem. You are required to do the following
• The function lookup() returns a pointer to the record having the required string. If not
found it returns NULL or optionally creates a new record at the correct location. Please
complete the rest of the code.
• Complete the function cleartable() to reclaim memory. Make sure each call to malloc()
is matched with a free()
Answers
Answer:
Answer: one possible implementation is shown below:
#include <s t d i o . h>
#include <s t d l i b . h>
#include <s t r i n g . h>
#define MAX BUCKETS 1000
#define MULTIPLIER 31
#define MAX LEN 100
struct wordrec
{
char∗ word ;
unsigned long count ;
struct wordrec ∗ next ;
} ;
struct wordrec ∗ wa ll o c ( const char∗ s t r )
{
struct wordrec ∗ p=(struct wordrec ∗) malloc ( s izeof ( struct wordrec ));
if (p!=NULL)
{
p−>count=0;
p−>word=s t rdup ( s t r ); /∗ c r e a t e s a d u p l i c a t e ∗/
p−>next=NULL;
}
return p ;
}
/∗ hash bucket ∗/
struct wordrec ∗ t a b l e [MAX LEN] ;
/∗
@function ha shs t r ing
@desc produces hash code f o r a s t r i n g
m u lti p l i e r s 31 ,35 have been found to work we ll
∗/
unsigned long ha shs t r ing ( const char∗ str )
{
unsigned long hash=0;
while (∗ s t r )
{
hash= hash∗MULTIPLIER+∗ s t r ;
str++;
}
return hash%MAX BUCKETS;
}
/∗
@function lookup
@desc r e turns a po int e r to the word or c r e a t e s
it i f r e qui r ed
∗/
struct wordrec ∗ lookup ( const char∗ s t r , int c r e a t e )
{
unsigned long hash=ha shs t r ing ( s t r );
struct wordrec ∗ wp=t a b l e [ hash ] ;
struct wordrec ∗ cur r=NULL;
for ( cur r=wp; cur r !=NULL ; cur r=cur r−>next )
i f ( strcmp ( cur r−>word , s t r )==0) /∗ found ∗/
{
return cur r ;
}
/∗ not found ∗/
i f ( c r e a t e )
{
cur r=(struct wordrec ∗) malloc ( s izeof ( struct wordrec ));
cur r−>word=s t rdup ( s t r );
cur r−>count=0;
/∗add to f r o n t ∗/
cur r−>next=t a b l e [ hash ] ;
t a b l e [ hash]= cur r ;
}
return cur r ;
}
/∗
@function c l e a r t a b l e ()
@desc r e c l a im s memory
∗/
void c l e a r t a b l e ()
{
struct wordrec ∗ wp=NULL, ∗ p=NULL;
int i =0;
for ( i =0; i<MAX BUCKETS; i++)
{
wp=t a b l e [ i ] ;
while (wp)
{
p=wp;
wp=wp−>next ;
f r e e (p−>word );
f r e e (p );
}
}
}
int main ( int argc , char∗ argv [ ] )
{
FILE∗ fp=fopen ( "book.txt" , "r" );
char word [ 1 0 2 4 ] ; /∗ big enough∗/
struct wordrec ∗ wp=NULL;
int i =0;
memset( tabl e , 0 , s izeof ( t a bl e ));
/∗ read from input ∗/
while ( 1 )
{
i f ( f s c a n f (fp , "%s" , word )!=1)
break ;
wp=lookup ( word , 1 ); /∗ c r e a t e i f doesn ’ t e x i s t ∗/
wp−>count++;
}
f c l o s e ( fp );
/∗
pr i nt a l l words have f requency >100
∗/
for ( i =0; i<MAX BUCKETS; i++)
{
for (wp=t a b l e [ i ] ; wp!=NULL;wp=w−>next )
{
i f (wp−>count >1000)
{
p r i n tf ( "%s-->%ld\n" ,wp−>word ,wp−>count );
}
}
}
c l e a r t a b l e ();
return 0 ;
}