Perfect Hashing and its implementation in C


Perfect hashing simply means hashing with no collision.But there is nothing like perfect hashing(no collision).These are just a work around to achieve perfect hashing.Using this we can able to search the large set of records in a constant time i.e O(1) time complexity.To achieve searching in strict O(1) time ,one needs to implement hash table and then search the given record in to hash table using hash function.
        Here in perfect hashing ,one need to implement two level hash table , first level hash table consist a set of pointers that points to the second level hash table,we must select hash function that first maps the key in to first level hash table,so it simply means there must be collision in first level hash table ,thats  why i am saying its just a work around to achieve prefect  hashing.We must wisely choose hash function:
                 h(x)=((a*x+b)mod p)mod m
x is the key to be map,
Here m is size of the hash table,we should choose m >2N(N=no of elements)
p is a prime number with value greater than m
and a & b are the value less than p
We must first map all the key to first level hash table using selected hash function,and then count the collision in the single entry of the first level hash table.Suppose n1 is the collision count in one of the entry in the hash table, then second level hash table size corresponding to that entry should be equal to n1^2,so that probability of collision become half.Likewise we calculate the collision count of each corresponding entry in first level hash table and dynamically allocate the second level hash table size.
For each entry in first level hash table,build a second level hash function and map the collision key in second level hash table using another hash function:
           h(x)=((a*x+b)mod p)mod n1^2
where p is prime number which is greater than n1^2.
a & b is less than p.
If second level function also produce collision while mapping key to second level hash table then change the second level hash function(change a & b OR p) dynamically until you get a collision free mapping.So we have collision free mapping and we can able to search the record in O(1) time complexity.
I have implemented the perfect hashing to store 10^5 record in hash table and search any key in O(1) time complexity.Please find the below program:-

_______________________________________________________________________________

#include<stdio.h>
#include <stdlib.h>
#include<time.h>
#include<string.h>
#define hashSize 400000
#define initRandom srand(time(&t));
#define Prime 99999999977
#define maxNoElement 100000
time_t t;
long long int noOfElement=0;
struct FirstLevelHashTable{
long long int a,b,p;
struct SecondLevelHashTable *ptr[hashSize];
long long int count[hashSize];
};
struct SecondLevelHashTable{
long long int a,b,p,length;
long long int number[0];
};
long long int hashFunction(long long int,long long int,long long int,long long int,long long int); //calculate hash function((ax+b)modp)modm and return corresponding index

void intializeFirstLevelHash(struct FirstLevelHashTable**,long long int[]);//Intialize  memory for first level hashing

void intializeSecondLevelHash(struct FirstLevelHashTable**); //Intialize memory dynamically  for second level hashing corresponding to each first level pointer

void mapKey(struct FirstLevelHashTable**,long long int[]); //Map key values to the hash tables using first & second level hash function and change the hash function(in second level hash) if collision

int rehashAll(struct FirstLevelHashTable**,long long int,long long int[]); //Logic to implement another hash function and rehash if collision in second level hashing

int searchKey(struct FirstLevelHashTable**,long long int); //Search key by the user input:::: return true or false based on key existence in hash

void displayHashValues(struct FirstLevelHashTable**,long long int[]);//display hash key of all the key in seperate file output.out

void main(){
long long int i=0,j=0;
clock_t start,end;
double time_taken;
long long int number[maxNoElement];
struct FirstLevelHashTable *head=NULL;
intializeFirstLevelHash(&head,number);
intializeSecondLevelHash(&head);
mapKey(&head,number);
printf("Hash Table Built Sucessfully!!!!\n");
displayHashValues(&head,number);
printf("Output(containing all hash values) has been generated in seperate file(output.out)\n");
long long int key;
int result;
while(1){
printf("Enter Key to search:::::::::Press -1 to exit\n");
scanf("%lli",&key);
if(key==-1)break;
if(key<0){
printf("NOT FOUND\n");
continue;
}
start=clock();
result=searchKey(&head,key);
end=clock();
time_taken=((double)(end-start))/CLOCKS_PER_SEC;
if(result==0){
printf("Key is present\tTime Taken:%lf sec.\n",time_taken);
}
else{
printf("NOT FOUND\tTime Taken:%lf sec.\n",time_taken);
}
}
}

long long int hashFunction(long long int a,long long int b,long long int p,long long int key,long long int mod){
return (((a*key)+b)%p)%mod;
}

void intializeFirstLevelHash(struct FirstLevelHashTable **head,long long int number[]){
long long int i;
*head=(struct FirstLevelHashTable*)malloc(sizeof(struct FirstLevelHashTable));
initRandom;
(*head)->p=Prime;
(*head)->a=rand()%Prime+1;
(*head)->b=rand()%Prime+1;
long long int firstIndex;
for(i=0;i<hashSize;i++){
(*head)->count[i]=0;
}
FILE *fp;
fp=fopen("input.in","r");
if(fp==NULL){
printf("Couldn't open the input file\n");
exit(-1);
}
while(fscanf(fp,"%lli[^\n]",&number[noOfElement])!=EOF){
firstIndex=hashFunction((*head)->a,(*head)->b,(*head)->p,number[noOfElement],hashSize);
(*head)->count[firstIndex]++;
noOfElement++;
if(noOfElement==maxNoElement){break;}
}
fclose(fp);
}

void intializeSecondLevelHash(struct FirstLevelHashTable **head){
long long int i,j;
for(i=0;i<hashSize;i++){
long long int count=(*head)->count[i];
if(count>=1){
(*head)->ptr[i]=(struct SecondLevelHashTable*)malloc(sizeof(struct SecondLevelHashTable)+sizeof(long long int)*(count*count));
(*head)->ptr[i]->length=(count*count);
long long int a=rand()%Prime+1;
long long int b=rand()%Prime+1;
(*head)->ptr[i]->a=a;
(*head)->ptr[i]->b=b;
(*head)->ptr[i]->p=Prime;
for(j=0;j<(count*count);j++){
(*head)->ptr[i]->number[j]=0;
}
}else{
(*head)->ptr[i]=NULL;
}
}
}

void mapKey(struct FirstLevelHashTable **head,long long int number[]){
long long int firstIndex,i,j,index,value;
for(i=0;i<noOfElement;i++){
firstIndex=hashFunction((*head)->a,(*head)->b,(*head)->p,number[i],hashSize);
if((*head)->count[firstIndex]>=1){
index=hashFunction((*head)->ptr[firstIndex]->a,(*head)->ptr[firstIndex]->b,(*head)->ptr[firstIndex]->p,number[i],(*head)->ptr[firstIndex]->length);
value=(*head)->ptr[firstIndex]->number[index];
if(value!=number[i]){
if(value>(long long int)0){
int flag=1;
long long int arrlength=(*head)->count[firstIndex];
long long int arr[arrlength],length=((*head))->ptr[firstIndex]->length;
int count=0;
for(j=0;j<length;j++){
if((*head)->ptr[firstIndex]->number[j]>(long long int)0){
arr[count]=((*head))->ptr[firstIndex]->number[j];
count++;
}
((*head))->ptr[firstIndex]->number[j]=0;
}
arr[count++]=number[i];
for(j=count;j<arrlength;j++){
arr[j]=0;
}
while(flag==1){
flag=rehashAll(&(*head),firstIndex,arr);
}
}else{
(*head)->ptr[firstIndex]->number[index]=number[i];
}
}
}
}

}
int rehashAll(struct FirstLevelHashTable **head,long long int firstIndex,long long int arr[]){
long long int i=0;
long long int index,value;
long long int a=rand()%Prime+1;
long long int b=rand()%Prime+1;
(*head)->ptr[firstIndex]->a=a;
(*head)->ptr[firstIndex]->b=b;
(*head)->ptr[firstIndex]->p=Prime;
long long int arrlength=(*head)->count[firstIndex];
long long int length=(*head)->ptr[firstIndex]->length;
for(i=0;i<length;i++){
(*head)->ptr[firstIndex]->number[i]=0;
}
for(i=0;i<arrlength;i++){
index=hashFunction((*head)->ptr[firstIndex]->a,(*head)->ptr[firstIndex]->b,(*head)->ptr[firstIndex]->p,arr[i],length);
value=(*head)->ptr[firstIndex]->number[index];
if(value>(long long int)0){
return 1;
}else{
(*head)->ptr[firstIndex]->number[index]=arr[i];
}
}
return 0;
}

int searchKey(struct FirstLevelHashTable **head,long long int key){
long long int a,b,p,firstIndex,secondIndex;
a=(*head)->a;
b=(*head)->b;
p=(*head)->p;
firstIndex=hashFunction(a,b,p,key,hashSize);
if((*head)->ptr[firstIndex]==NULL){
return 1;
}
a=(*head)->ptr[firstIndex]->a;
b=(*head)->ptr[firstIndex]->b;
p=(*head)->ptr[firstIndex]->p;
secondIndex=hashFunction(a,b,p,key,(*head)->ptr[firstIndex]->length);
if(key==((*head)->ptr[firstIndex]->number[secondIndex])){
return 0;
}else{
return 1;
}
}
void displayHashValues(struct FirstLevelHashTable **head,long long int number[]){
long long int i,firstIndex,secondIndex;
FILE *fout;
fout=fopen("output.out","w");
fprintf(fout,"%s\t\t\t%s\t\t%s\n","Key","Outer Index","Inner Index");
for(i=0;i<noOfElement;i++){
firstIndex=hashFunction((*head)->a,(*head)->b,(*head)->p,number[i],hashSize);
if((*head)->ptr[firstIndex]==NULL){
fprintf(fout,"%lli\t\t%s\n",number[i],"Not Found");
continue;
}
secondIndex=hashFunction((*head)->ptr[firstIndex]->a,(*head)->ptr[firstIndex]->b,(*head)->ptr[firstIndex]->p,number[i],(*head)->ptr[firstIndex]->length);
fprintf(fout,"%lli\t\t%lli\t\t\t%lli\n",number[i],firstIndex,secondIndex);
}
fclose(fout);
}

________________________________________________________________________________



Comments

Popular posts from this blog

Getting started with Deep Learning Caffe Framework - Fastest way(Installation +Web Demo)

Sudoku using dancing link - Java implementation

OpenCV 2.4.9 JAVA/CPP/Python Installation in Ubuntu 14.04