Array-based Hashtables For simplicity, we will assume that we only insert numeric keys into the hashtable hash(x) = x % B; where B is the number of 5
Implementation class Hashtable { int [B]; bool occupied[b]; Insert(5) h=hash(5) = 5 % 9 = 8 0 2 4 5 6 8 6
Insert class Hashtable { int [B]; bool occupied[b]; Insert(5) h=hash(5) = 5 % 9 = 8 0 2 4 5 Insert() h=hash() = % 9 = 4 6 8 5
Insert class Hashtable { int [B]; bool occupied[b]; Insert(5) 0 2 h=hash(5) = 5 % 9 = 8 Insert() h=hash() = % 9 = 4 4 5 6 8 5 8
Insert class Hashtable { int [B]; bool occupied[b]; 0 2 insert(x) { h = hash(x); [h] = x; occupied[h] = true; 4 5 6 8 5 9
Insert class Hashtable { int [B]; bool occupied[b]; 0 2 Insert() h=hash() = % 9 = 4 4 5 6 Collision 8 5 20
Collision Resolution If h(x) is occupied, we try other We try h 0 x, h x, h 2 x, until an empty bucket is found h i (x) = hash(x) + f(i) where f is called the collision resolution function 2
Linear Probing f i = i Insert() h=hash() = % 9 = 4 h 0 =4 + 0 = 4 (Collision) h =4 + = 5 0 2 4 5 6 8 5 22
Linear Probing f i = i Insert() h=hash() = % 9 = 4 h 0 =4 + 0 = 4 (Collision) h =4 + = 5 0 2 4 5 6 8 5 2
Linear Probing f i = i Insert(20) h=hash(20) = 20 % 9 = 2 h 0 =2 + 0 = 2 0 2 4 5 6 8 5 24
Linear Probing f i = i Insert(20) h=hash(20) = 20 % 9 = 2 h 0 =2 + 0 = 2 0 2 20 4 5 6 8 5 25
Linear Probing f i = i Insert(26) h=hash(26) = 26 % 9 = 8 h 0 =8 + 0 = 8 (Collision) h =8 + = 0 0 2 20 4 5 6 8 5 26
Linear Probing f i = i Insert(26) h=hash(26) = 26 % 9 = 8 h 0 =8 + 0 = 8 (Collision) h =8 + = 0 0 26 2 20 4 5 6 8 5 2
Linear Probing f i = i Insert(40) h=hash(40) = 40 % 9 = 4 h 0 =4 + 0 = 4 (Collision) h =4 + = 5 (Collision) h 2 =4 + 2 = 6 0 26 2 20 4 5 6 8 5 28
Linear Probing f i = i Insert(40) h=hash(40) = 40 % 9 = 4 h 0 =4 + 0 = 4 (Collision) h =4 + = 5 (Collision) h 2 =4 + 2 = 6 0 26 2 20 4 5 6 40 8 5 29
Find Find(40) h=hash(40) = 40 % 9 = 4 h 0 =4 + 0 = 4 0 26 2 20? 4 5 6 40 8 5 0
Find Find(40) h=hash(40) = 40 % 9 = 4 h 0 =4 + 0 = 4 (No match) h =4 + = 5 (No match) 0 26 2 20 4? 5 6 40 8 5
Find Find(40) h=hash(40) = 40 % 9 = 4 h 0 =4 + 0 = 4 (No match) h =4 + = 5 (No match) h 2 =4 + 2 = 6 (Match) 0 26 2 20 4 5 6 40 8 5 2
Find Find(22) h=hash(22) = 22 % 9 = 4 h 0 =4 + 0 = 4 0 26 2 20? 4 5 6 40 8 5
Find Find(22) h=hash(22) = 22 % 9 = 4 h 0 =4 + 0 = 4 (No match) h =4 + = 5 (No match) 0 26 2 20 4? 5 6 40 8 5 4
Find Find(22) h=hash(22) = 22 % 9 = 4 h 0 =4 + 0 = 4 (No match) h =4 + = 5 (No match) h 2 =4 + 2 = 6 (No match) 0 26 2 20 4 5? 6 40 8 5 5
Find Find(22) h=hash(22) = 22 % 9 = 4 h 0 =4 + 0 = 4 (No match) h =4 + = 5 (No match) h 2 =4 + 2 = 6 (No match) h =4 + = (Empty) 0 26 2 20 4 5 6 40? 8 5 6
Find Find(22) h=hash(22) = 22 % 9 = 4 h 0 =4 + 0 = 4 (No match) h =4 + = 5 (No match) h 2 =4 + 2 = 6 (No match) h =4 + = (Empty) Return false (not found) 0 26 2 20 4 5 6 40? 8 5
Insert & Find void insert(x) { h = hash(x); while (occupied[h]) { h++; [h] = x; occupied[h] = true; Do you spot an error? bool find(x) { h = hash(x); while (occupied[h]) { if ([h] == x) return true; h++; return false; 8
Insert & Find void insert(x) { h = hash(x); while (occupied[h]) { h = (h+) % B; [h] = x; occupied[h] = true; bool find(x) { h = hash(x); while (occupied[h]) { if ([h] == x) return true; h = (h+) % B; return false; 9
Deletion bool delete(x) { h = hash(x); while (occupied[h]) { if ([h] == x) { occupied[h] = false; return true; h = (h+) % B; return false; 40
Delete Delete() h=hash() = % 9 = 4 h 0 =4 + 0 = 4 (Match) 0 26 Is this correct?? 2 20 4 5 6 40 8 5 4
Delete Find(40) h=hash(40) = 40 % 9 = 4 h 0 =4 + 0 = 4 (No match) return false!!! 0 26 2 20? 4 5 6 40 8 5 42
Revisit the Design class Hashtable { int [B]; enum {E, O, D status[b]; The status of each bucket is either Empty, Occupied, or Deleted Initially all are empty 4
Status Delete Delete() h=hash() = % 9 = 4 h 0 =4 + 0 = 4 (Match) 0 O 26 E 2 O 20 E? 4 O 5 O 6 O 40 E 8 O 5 44
Status Delete Delete() h=hash() = % 9 = 4 h 0 =4 + 0 = 4 (Match) Find(40) h=hash(40) = 40 % 9 = 4 h 0 =4 + 0 = 4 (No match) h =4 + = 5 (No match) h 2 =4 + 2 = 6 (Match) return true? 0 O 26 E 2 O 20 E 4 D 5 O 6 O 40 E 8 O 5 45
Insert, Find & Delete void insert(x) { h = hash(x); while (status[h]== O ) { h = (h+) % B; [h] = x; status[h] = O ; bool delete(x) { h = hash(x); while (status[h]!= E ) { if ([h] == x) { status[h] = D ; return true; h = (h+) % B; return false; bool find(x) { h = hash(x); while (status[h]!= E ) { if (status[h] == O && [h] == x) return true; h = (h+) % B; return false; 46
Primary Clustering A cluster (a block) of are all occupied Any key that hits the cluster will have to linearly probe until the end of the cluster 0 O 26 E 2 O 20 E 4 O Primary clustering 5 O 6 O 40 E 8 O 5 4
Status Quadratic Probing f i = i 2 Insert(5) 0 E E 2 E E 4 E 5 E 6 E E 8 E 48
Status Quadratic Probing f i = i 2 Insert(5) Insert(2) 0 E E 2 E E 4 E 5 E 6 O 5 E 8 E 49
Status Quadratic Probing f i = i 2 Insert(5) Insert(2) Insert(4) 0 E E 2 E E 4 E 5 O 2 6 O 5 E 8 E 50
Status Quadratic Probing f i = i 2 Insert(5) Insert(2) Insert(4) h 0 =5+0 (Occupied) h =5+ (Occupied) h 2 =5+4=0 (Empty) 0 E E 2 E E 4 E 5 O 2 6 O 5 E 8 E 5
Status Quadratic Probing f i = i 2 Insert(5) Insert(2) Insert(4) h 0 =5+0 (Occupied) h =5+ (Occupied) h 2 =5+4=0 (Empty) Insert() 0 O 4 E 2 E E 4 E 5 O 2 6 O 5 E 8 E 52
Status Quadratic Probing f i = i 2 Insert(5) Insert(2) Insert(4) h 0 =5+0 (Occupied) h =5+ (Occupied) h 2 =5+4=0 (Empty) Insert() h 0 =6+0 (Occupied) h =6+ (Empty) 0 O 4 E 2 E E 4 E 5 O 2 6 O 5 E 8 E 5
Status Quadratic Probing f i = i 2 Insert(4) h 0 =5+0 (Occupied) h =5+ (Occupied) h 2 =5+4=0 (Occupied) h =5+9=5 (Occupied) h 4 =5+6= (Empty) 0 O 4 E 2 E O 4 4 E 5 O 2 6 O 5 O 8 E 54
Status Secondary Clustering For two distinct keys x and x 2, if h(x ) = h(x 2 ), then h i x = h i x 2, i In other words, if two keys start at the same position, they will probe the same sequence of 0 O 4 E 2 E O 4 4 E 5 O 2 6 O 5 O 8 E 55
Double Hashing f i = i hash 2 x Where hash 2 is an alternative hash function Double hashing can eliminate both primary and secondary clustering 56