算法-查找

符号表|二叉查找树|平衡查找树|散列表|查找的应用

符号表

顺序查找

基于无序链表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
public class SequentialSearchST<Key, Value>
{
private int n; // number of key-value pairs
private Node first; // the linked list of key-value pairs

private class Node
{
private Key key;
private Value val;
private Node next;

public Node(Key key, Value val, Node next)
{
this.key = key;
this.val = val;
this.next = next;
}
}

public SequentialSearchST() {}

public int size()
{
return n;
}

public boolean isEmpty()
{
return size() == 0;
}

public boolean contains(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to contains() is null");
return get(key) != null;
}

public Value get(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to get() is null");
for (Node x = first; x != null; x = x.next)
{
if (key.equals(x.key))
return x.val;
}
return null;
}

public void put(Key key, Value val)
{
if (key == null) throw new IllegalArgumentException("first argument to put() is null");
if (val == null)
{
delete(key);
return;
}

for (Node x = first; x != null; x = x.next)
{
if (key.equals(x.key))
{
x.val = val;
return;
}
}
first = new Node(key, val, first);
n++;
}

public void delete(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to delete() is null");
first = delete(first, key);
}

private Node delete(Node x, Key key)
{
if (x == null) return null;
if (key.equals(x.key))
{
n--;
return x.next;
}
x.next = delete(x.next, key);
return x;
}

public Iterable<Key> keys()
{
Queue<Key> queue = new Queue<Key>();
for (Node x = first; x != null; x = x.next)
queue.enqueue(x.key);
return queue;
}

//Unit tests
public static void main(String[] args) {
SequentialSearchST<String, Integer> st = new SequentialSearchST<String, Integer>();
for (int i = 0; !StdIn.isEmpty(); i++) {
String key = StdIn.readString();
st.put(key, i);
}
for (String s : st.keys())
StdOut.println(s + " " + st.get(s));
}
}

二分查找

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
import java.util.NoSuchElementException;

public class BinarySearchST<Key extends Comparable<Key>, Value>
{
private static final int INIT_CAPACITY = 2;
private Key[] keys;
private Value[] vals;
private int n = 0;

public BinarySearchST()
{
this(INIT_CAPACITY);
}

public BinarySearchST(int capacity)
{
keys = (Key[]) new Comparable[capacity];
vals = (Value[]) new Object[capacity];
}

private void resize(int capacity)
{
assert capacity >= n;
Key[] tempk = (Key[]) new Comparable[capacity];
Value[] tempv = (Value[]) new Object[capacity];
for (int i = 0; i < n; i++)
{
tempk[i] = keys[i];
tempv[i] = vals[i];
}
vals = tempv;
keys = tempk;
}

public int size()
{
return n;
}

public boolean isEmpty()
{
return size() == 0;
}

public boolean contains(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to contains() is null");
return get(key) != null;
}

public Value get(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to get() is null");
if (isEmpty()) return null;
int i = rank(key);
if (i < n && keys[i].compareTo(key) == 0) return vals[i];
return null;
}

public int rank(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to rank() is null");

int lo = 0, hi = n - 1;
while (lo <= hi)
{
int mid = lo + (hi - lo) / 2;
int cmp = key.compareTo(keys[mid]);
if (cmp < 0) hi = mid - 1;
else if (cmp > 0) lo = mid + 1;
else return mid;
}
return lo;
}

public void put(Key key, Value val)
{
if (key == null) throw new IllegalArgumentException("first argument to put() is null");

if (val == null)
{
delete(key);
return;
}

int i = rank(key);

if (i < n && keys[i].compareTo(key) == 0)
{
vals[i] = val;
return;
}

if (n == keys.length) resize(2 * keys.length);

for (int j = n; j > i; j--)
{
keys[j] = keys[j - 1];
vals[j] = vals[j - 1];
}
keys[i] = key;
vals[i] = val;
n++;

assert check();
}

public void delete(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to delete() is null");
if (isEmpty()) return;

int i = rank(key);

if (i == n || keys[i].compareTo(key) != 0)
{
return;
}

for (int j = i; j < n - 1; j++)
{
keys[j] = keys[j + 1];
vals[j] = vals[j + 1];
}

n--;
keys[n] = null;
vals[n] = null;

if (n > 0 && n == keys.length / 4)
resize(keys.length / 2);

assert check();
}

public void deleteMin()
{
if (isEmpty()) throw new NoSuchElementException("Symbol table underflow error");
delete(min());
}

public void deleteMax()
{
if (isEmpty()) throw new NoSuchElementException("Symbol table underflow error");
delete(max());
}

public Key min()
{
if (isEmpty()) throw new NoSuchElementException("called min() with empty symbol table");
return keys[0];
}

public Key max()
{
if (isEmpty()) throw new NoSuchElementException("called max() with empty symbol table");
return keys[n - 1];
}

public Key select(int k)
{
if (k < 0 || k >= size()) {
throw new IllegalArgumentException("called select() with invalid argument: " + k);
}
return keys[k];
}

public Key floor(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to floor() is null");
int i = rank(key);
if (i < n && key.compareTo(keys[i]) == 0) return keys[i];
if (i == 0) return null;
else return keys[i - 1];
}

public Key ceiling(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to ceiling() is null");
int i = rank(key);
if (i == n) return null;
else return keys[i];
}

public int size(Key lo, Key hi)
{
if (lo == null) throw new IllegalArgumentException("first argument to size() is null");
if (hi == null) throw new IllegalArgumentException("second argument to size() is null");

if (lo.compareTo(hi) > 0) return 0;
if (contains(hi)) return rank(hi) - rank(lo) + 1;
else return rank(hi) - rank(lo);
}

public Iterable<Key> keys()
{
return keys(min(), max());
}

public Iterable<Key> keys(Key lo, Key hi)
{
if (lo == null) throw new IllegalArgumentException("first argument to keys() is null");
if (hi == null) throw new IllegalArgumentException("second argument to keys() is null");

Queue<Key> queue = new Queue<Key>();
if (lo.compareTo(hi) > 0) return queue;
for (int i = rank(lo); i < rank(hi); i++)
queue.enqueue(keys[i]);
if (contains(hi)) queue.enqueue(keys[rank(hi)]);
return queue;
}

private boolean check()
{
return isSorted() && rankCheck();
}

private boolean isSorted()
{
for (int i = 1; i < size(); i++)
if (keys[i].compareTo(keys[i - 1]) < 0) return false;
return true;
}

private boolean rankCheck()
{
for (int i = 0; i < size(); i++)
if (i != rank(select(i))) return false;
for (int i = 0; i < size(); i++)
if (keys[i].compareTo(select(rank(keys[i]))) != 0) return false;
return true;
}

//Unit test
public static void main(String[] args)
{
BinarySearchST<String, Integer> st = new BinarySearchST<String, Integer>();
for (int i = 0; !StdIn.isEmpty(); i++)
{
String key = StdIn.readString();
st.put(key, i);
}
for (String s : st.keys())
StdOut.println(s + " " + st.get(s));
}
}

在$N$个键的有序数组中进行二分查找最多需要$(lgN+1)$次比较(无论是否成功)

问:在$BinarySearchST$中的类型转换之前,为什么不将$keys[]$和$vals[ ]$一样声明为$Object[ ]$而是$Comparable[ ]$?

答:问得好。如果你这么做,你会得到一个$ClassCastException$,因为键只能是$Comparable[ ]$的(以保证$keys[]$中的元素都有$compareTo()$方法)。因此将$keys[]$声明为$Comparable[ ]$是必需的

二叉查找树

定义。一棵二叉查找树(BST)树是一棵二叉树,其中每个结点都含有一个$Comparable$的键(以及相关联的值)且每个结点的键都大于其左子树中任意结点的键而小于右子树的任意结点的键

递归实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
import edu.princeton.cs.algs4.StdIn;
import edu.princeton.cs.algs4.StdOut;

import java.util.NoSuchElementException;

public class BST<Key extends Comparable<Key>, Value>
{
private Node root;

private class Node
{
private Key key;
private Value val;
private Node left, right;
private int size;

public Node(Key key, Value val, int size)
{
this.key = key;
this.val = val;
this.size = size;
}
}

public BST() {}

public boolean isEmpty()
{
return size() == 0;
}

public int size()
{
return size(root);
}

private int size(Node x)
{
if (x == null) return 0;
else return x.size;
}

public boolean contains(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to contains() is null");
return get(key) != null;
}

public Value get(Key key)
{
return get(root, key);
}

private Value get(Node x, Key key)
{
if (key == null) throw new IllegalArgumentException("calls get() with a null key");
if (x == null) return null;
int cmp = key.compareTo(x.key);
if (cmp < 0) return get(x.left, key);
else if (cmp > 0) return get(x.right, key);
else return x.val;
}

public void put(Key key, Value val)
{
if (key == null) throw new IllegalArgumentException("calls put() with a null key");
if (val == null)
{
delete(key);
return;
}
root = put(root, key, val);
assert check();
}

private Node put(Node x, Key key, Value val)
{
if (x == null) return new Node(key, val, 1);
int cmp = key.compareTo(x.key);
if (cmp < 0) x.left = put(x.left, key, val);
else if (cmp > 0) x.right = put(x.right, key, val);
else x.val = val;
x.size = 1 + size(x.left) + size(x.right);
return x;
}

public void deleteMin()
{
if (isEmpty()) throw new NoSuchElementException("Symbol table underflow");
root = deleteMin(root);
assert check();
}

private Node deleteMin(Node x)
{
if (x.left == null) return x.right;
x.left = deleteMin(x.left);
x.size = size(x.left) + size(x.right) + 1;
return x;
}

public void deleteMax()
{
if (isEmpty()) throw new NoSuchElementException("Symbol table underflow");
root = deleteMax(root);
assert check();
}

private Node deleteMax(Node x)
{
if (x.right == null) return x.left;
x.right = deleteMax(x.right);
x.size = size(x.left) + size(x.right) + 1;
return x;
}

public void delete(Key key)
{
if (key == null) throw new IllegalArgumentException("calls delete() with a null key");
root = delete(root, key);
assert check();
}

private Node delete(Node x, Key key)
{
if (x == null) return null;

int cmp = key.compareTo(x.key);
if (cmp < 0) x.left = delete(x.left, key);
else if (cmp > 0) x.right = delete(x.right, key);
else
{
if (x.right == null) return x.left;
if (x.left == null) return x.right;
Node t = x;
x = min(t.right);
x.right = deleteMin(t.right);
x.left = t.left;
}
x.size = size(x.left) + size(x.right) + 1;
return x;
}

public Key min()
{
if (isEmpty()) throw new NoSuchElementException("calls min() with empty symbol table");
return min(root).key;
}

private Node min(Node x)
{
if (x.left == null) return x;
else return min(x.left);
}

public Key max()
{
if (isEmpty()) throw new NoSuchElementException("calls max() with empty symbol table");
return max(root).key;
}

private Node max(Node x)
{
if (x.right == null) return x;
else return max(x.right);
}

public Key floor(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to floor() is null");
if (isEmpty()) throw new NoSuchElementException("calls floor() with empty symbol table");
Node x = floor(root, key);
if (x == null) throw new NoSuchElementException("argument to floor() is too small");
else return x.key;
}

private Node floor(Node x, Key key)
{
if (x == null) return null;
int cmp = key.compareTo(x.key);
if (cmp == 0) return x;
if (cmp < 0) return floor(x.left, key);
Node t = floor(x.right, key);
if (t != null) return t;
else return x;
}

public Key floor2(Key key)
{
Key x = floor2(root, key, null);
if (x == null) throw new NoSuchElementException("argument to floor() is too small");
else return x;

}

private Key floor2(Node x, Key key, Key best)
{
if (x == null) return best;
int cmp = key.compareTo(x.key);
if (cmp < 0) return floor2(x.left, key, best);
else if (cmp > 0) return floor2(x.right, key, x.key);
else return x.key;
}

public Key ceiling(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to ceiling() is null");
if (isEmpty()) throw new NoSuchElementException("calls ceiling() with empty symbol table");
Node x = ceiling(root, key);
if (x == null) throw new NoSuchElementException("argument to floor() is too large");
else return x.key;
}

private Node ceiling(Node x, Key key)
{
if (x == null) return null;
int cmp = key.compareTo(x.key);
if (cmp == 0) return x;
if (cmp < 0)
{
Node t = ceiling(x.left, key);
if (t != null) return t;
else return x;
}
return ceiling(x.right, key);
}

public Key select(int rank)
{
if (rank < 0 || rank >= size()) {
throw new IllegalArgumentException("argument to select() is invalid: " + rank);
}
return select(root, rank);
}

private Key select(Node x, int rank)
{
if (x == null) return null;
int leftSize = size(x.left);
if (leftSize > rank) return select(x.left, rank);
else if (leftSize < rank) return select(x.right, rank - leftSize - 1);
else return x.key;
}

public int rank(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to rank() is null");
return rank(key, root);
}

private int rank(Key key, Node x)
{
if (x == null) return 0;
int cmp = key.compareTo(x.key);
if (cmp < 0) return rank(key, x.left);
else if (cmp > 0) return 1 + size(x.left) + rank(key, x.right);
else return size(x.left);
}

public Iterable<Key> keys()
{
if (isEmpty()) return new Queue<Key>();
return keys(min(), max());
}

public Iterable<Key> keys(Key lo, Key hi)
{
if (lo == null) throw new IllegalArgumentException("first argument to keys() is null");
if (hi == null) throw new IllegalArgumentException("second argument to keys() is null");

Queue<Key> queue = new Queue<Key>();
keys(root, queue, lo, hi);
return queue;
}

private void keys(Node x, Queue<Key> queue, Key lo, Key hi)
{
if (x == null) return;
int cmplo = lo.compareTo(x.key);
int cmphi = hi.compareTo(x.key);
if (cmplo < 0) keys(x.left, queue, lo, hi);
if (cmplo <= 0 && cmphi >= 0) queue.enqueue(x.key);
if (cmphi > 0) keys(x.right, queue, lo, hi);
}

public int size(Key lo, Key hi)
{
if (lo == null) throw new IllegalArgumentException("first argument to size() is null");
if (hi == null) throw new IllegalArgumentException("second argument to size() is null");

if (lo.compareTo(hi) > 0) return 0;
if (contains(hi)) return rank(hi) - rank(lo) + 1;
else return rank(hi) - rank(lo);
}

public int height()
{
return height(root);
}

private int height(Node x)
{
if (x == null) return -1;
return 1 + Math.max(height(x.left), height(x.right));
}

public Iterable<Key> levelOrder()
{
Queue<Key> keys = new Queue<Key>();
Queue<Node> queue = new Queue<Node>();
queue.enqueue(root);
while (!queue.isEmpty())
{
Node x = queue.dequeue();
if (x == null) continue;
keys.enqueue(x.key);
queue.enqueue(x.left);
queue.enqueue(x.right);
}
return keys;
}

//Check integrity of BST data structure.
private boolean check()
{
if (!isBST()) StdOut.println("Not in symmetric order");
if (!isSizeConsistent()) StdOut.println("Subtree counts not consistent");
if (!isRankConsistent()) StdOut.println("Ranks not consistent");
return isBST() && isSizeConsistent() && isRankConsistent();
}

// does this binary tree satisfy symmetric order?
// Note: this test also ensures that data structure is a binary tree since order is strict
private boolean isBST()
{
return isBST(root, null, null);
}

private boolean isBST(Node x, Key min, Key max)
{
if (x == null) return true;
if (min != null && x.key.compareTo(min) <= 0) return false;
if (max != null && x.key.compareTo(max) >= 0) return false;
return isBST(x.left, min, x.key) && isBST(x.right, x.key, max);
}

// are the size fields correct?
private boolean isSizeConsistent()
{
return isSizeConsistent(root);
}
private boolean isSizeConsistent(Node x)
{
if (x == null) return true;
if (x.size != size(x.left) + size(x.right) + 1) return false;
return isSizeConsistent(x.left) && isSizeConsistent(x.right);
}

// check that ranks are consistent
private boolean isRankConsistent()
{
for (int i = 0; i < size(); i++)
if (i != rank(select(i))) return false;
for (Key key : keys())
if (key.compareTo(select(rank(key))) != 0) return false;
return true;
}


/**
* Unit tests the {@code BST} data type.
*
* @param args the command-line arguments
*/
public static void main(String[] args)
{
BST<String, Integer> st = new BST<String, Integer>();
for (int i = 0; !StdIn.isEmpty(); i++)
{
String key = StdIn.readString();
st.put(key, i);
}

for (String s : st.levelOrder())
StdOut.println(s + " " + st.get(s));

StdOut.println();

for (String s : st.keys())
StdOut.println(s + " " + st.get(s));
}
}

在由$N$个随机键构成的二叉查找树中,查找命中平均所需要的比较次数为~$2lnN$(约$1.39lnN$)

在由$N$个随机键构成的二叉查找树中插入操作和查找未命中平均所需的比较次数为~$2lnN$(约$1.39lnN$)

平衡查找树

2-3查找树

定义。一棵2-3查找树或为一棵空树,或由以下结点组成:

2-结点,含有一个键(及其对应的值)和两条链接,左链接指向的2-3树中的键都小于该结点,右链接指向的2-3树中的键都大于该结点。

3-结点,含有两个键(及其对应的值)和三条链接,左链接指向的2-3树中的键都小于该结点,中链接指向的2-3树中的键都位于该结点的两个键之间,右链接指向的2-3树中的结点都大于该结点。

在一棵大小为$N$的2-3树中,查找和插入操作访问的结点必然不超过$lgN$个。

2-3树在最坏情况下仍有较好的性能。每个操作中处理每个结点的时间不会超过一个很小的常数,且这两个操作都只会访问一条路径上的结点,所以任何查找或者插入的成本肯定不会超过对数级别。

含有10亿个结点的一棵2-3树的高度仅在19到30之间。我们最多只需要访问30个结点就能够在10亿个键中进行任何查找和插入操作,这是相当惊人的。

但是,我们和真正的实现还有一段距离。尽管我们可以用不同的数据类型表示2-结点和3-结点并写出变换所需的代码,但用这种直白的表示方法实现大多数的操作并不方便,因为需要处理的的情况实在太多。我们需要维护两种不同类型的结点,将被查找的键和结点中的每个键进行比较,将链接和其他信息从一个结点复制到另一个结点,将结点从一种数据类型转换到另一种数据类型,等等。实现这些不仅需要大量的代码,而且它们所产生的额外开销可能会使算法比标准的二叉查找树更慢。平衡一棵树的初衷是为了消除最坏情况,但我们希望这种保障所需的代码能够越少越好。幸运的是你将看到,我们只需要一点点代价就能用一种统一的方式完成所有交换。

红黑二叉查找树

红黑树的一种定义是含有红黑链接并满足下列条件的二叉查找树

红链接均为左链接

没有任何一个结点同时和两条红链接相连

该树是完美黑色平衡的,即任意空链接到根结点的路径上黑链接数量相同

红黑树都既是二叉查找树,也是2-3树

根结点总是黑色

颜色转换会使根结点变为红色。这也可能出现在很大的红黑树中。严格地说,红色的根结点说明根结点是一个3-结点的一部分,但实际情况并不是这样。因此我们在每次插入后都会将根结点设为黑色。注意,每当根结点由红变黑时树的黑链接高度就会加1

命题。一棵大小为$N$的红黑树的高度不会超过$2lgN$

命题。一棵大小为$N$的红黑树中,根结点到任意结点的平均路径长度为~$1.00lgN$

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
import java.util.NoSuchElementException;

public class RedBlackBST<Key extends Comparable<Key>, Value>
{
private static final boolean RED = true;
private static final boolean BLACK = false;

private Node root; // root of the BST

// BST helper node data type
private class Node
{
private Key key; // key
private Value val; // associated data
private Node left, right; // links to left and right subtrees
private boolean color; // color of parent link
private int size; // subtree count

public Node(Key key, Value val, boolean color, int size)
{
this.key = key;
this.val = val;
this.color = color;
this.size = size;
}
}

public RedBlackBST() {}

private boolean isRed(Node x)
{
if (x == null) return false;
return x.color == RED;
}

// number of node in subtree rooted at x; 0 if x is null
private int size(Node x)
{
if (x == null) return 0;
return x.size;
}

public int size()
{
return size(root);
}

public boolean isEmpty()
{
return root == null;
}


/***************************************************************************
* Standard BST search.
***************************************************************************/

//Returns the value associated with the given key.
public Value get(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to get() is null");
return get(root, key);
}

// value associated with the given key in subtree rooted at x; null if no such key
private Value get(Node x, Key key)
{
while (x != null) {
int cmp = key.compareTo(x.key);
if (cmp < 0) x = x.left;
else if (cmp > 0) x = x.right;
else return x.val;
}
return null;
}

public boolean contains(Key key)
{
return get(key) != null;
}

/***************************************************************************
* Red-black tree insertion.
***************************************************************************/

public void put(Key key, Value val)
{
if (key == null) throw new IllegalArgumentException("first argument to put() is null");
if (val == null)
{
delete(key);
return;
}

root = put(root, key, val);
root.color = BLACK;
// assert check();
}

// insert the key-value pair in the subtree rooted at h
private Node put(Node h, Key key, Value val)
{
if (h == null) return new Node(key, val, RED, 1);

int cmp = key.compareTo(h.key);
if (cmp < 0) h.left = put(h.left, key, val);
else if (cmp > 0) h.right = put(h.right, key, val);
else h.val = val;

// fix-up any right-leaning links
if (isRed(h.right) && !isRed(h.left)) h = rotateLeft(h);
if (isRed(h.left) && isRed(h.left.left)) h = rotateRight(h);
if (isRed(h.left) && isRed(h.right)) flipColors(h);
h.size = size(h.left) + size(h.right) + 1;

return h;
}

/***************************************************************************
* Red-black tree deletion.
***************************************************************************/

//Removes the smallest key and associated value from the symbol table.
public void deleteMin()
{
if (isEmpty()) throw new NoSuchElementException("BST underflow");

// if both children of root are black, set root to red
if (!isRed(root.left) && !isRed(root.right))
root.color = RED;

root = deleteMin(root);
if (!isEmpty()) root.color = BLACK;
// assert check();
}

// delete the key-value pair with the minimum key rooted at h
private Node deleteMin(Node h)
{
//如果根结点无左孩子,则它必无右孩子,否则应该旋转使根结点成为其右孩子的左孩子
if (h.left == null)
return null;

if (!isRed(h.left) && !isRed(h.left.left))
h = moveRedLeft(h);

h.left = deleteMin(h.left);
return balance(h);
}

//Removes the largest key and associated value from the symbol table.
public void deleteMax()
{
if (isEmpty()) throw new NoSuchElementException("BST underflow");

// if both children of root are black, set root to red
if (!isRed(root.left) && !isRed(root.right))
root.color = RED;

root = deleteMax(root);
if (!isEmpty()) root.color = BLACK;
// assert check();
}

// delete the key-value pair with the maximum key rooted at h
private Node deleteMax(Node h)
{
if (isRed(h.left))
h = rotateRight(h);

if (h.right == null)
return null;

if (!isRed(h.right) && !isRed(h.right.left))
h = moveRedRight(h);

h.right = deleteMax(h.right);

return balance(h);
}

//Removes the specified key and its associated value from this symbol table
public void delete(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to delete() is null");
if (!contains(key)) return;

// if both children of root are black, set root to red
if (!isRed(root.left) && !isRed(root.right))
root.color = RED;

root = delete(root, key);
if (!isEmpty()) root.color = BLACK;
// assert check();
}

// delete the key-value pair with the given key rooted at h
private Node delete(Node h, Key key)
{
// assert get(h, key) != null;

if (key.compareTo(h.key) < 0)
{
if (!isRed(h.left) && !isRed(h.left.left))
h = moveRedLeft(h);
h.left = delete(h.left, key);
}
else
{
if (isRed(h.left))
h = rotateRight(h);
//删除的结点在最底层且为2-结点
if (key.compareTo(h.key) == 0 && (h.right == null))
return null;
if (!isRed(h.right) && !isRed(h.right.left))
h = moveRedRight(h);
if (key.compareTo(h.key) == 0)
{
Node x = min(h.right);
h.key = x.key;
h.val = x.val;
// h.val = get(h.right, min(h.right).key);
// h.key = min(h.right).key;
h.right = deleteMin(h.right);
}
else h.right = delete(h.right, key);
}
return balance(h);
}

/***************************************************************************
* Red-black tree helper functions.
***************************************************************************/

// make a left-leaning link lean to the right
private Node rotateRight(Node h)
{
// assert (h != null) && isRed(h.left);
Node x = h.left;
h.left = x.right;
x.right = h;
x.color = x.right.color;
x.right.color = RED;
x.size = h.size;
h.size = size(h.left) + size(h.right) + 1;
return x;
}

// make a right-leaning link lean to the left
private Node rotateLeft(Node h)
{
// assert (h != null) && isRed(h.right);
Node x = h.right;
h.right = x.left;
x.left = h;
x.color = x.left.color;
x.left.color = RED;
x.size = h.size;
h.size = size(h.left) + size(h.right) + 1;
return x;
}

// flip the colors of a node and its two children
private void flipColors(Node h)
{
// h must have opposite color of its two children
// assert (h != null) && (h.left != null) && (h.right != null);
// assert (!isRed(h) && isRed(h.left) && isRed(h.right))
// || (isRed(h) && !isRed(h.left) && !isRed(h.right));
h.color = !h.color;
h.left.color = !h.left.color;
h.right.color = !h.right.color;
}

// Assuming that h is red and both h.left and h.left.left
// are black, make h.left or one of its children red.
private Node moveRedLeft(Node h)
{
// assert (h != null);
// assert isRed(h) && !isRed(h.left) && !isRed(h.left.left);

flipColors(h);// h.left.color = h.right.color = RED,h从2-结点变为4-结点
if (isRed(h.right.left))//5-结点
{
h.right = rotateRight(h.right);
h = rotateLeft(h);
flipColors(h);
}
return h;
}

// Assuming that h is red and both h.right and h.right.left
// are black, make h.right or one of its children red.
private Node moveRedRight(Node h)
{
// assert (h != null);
// assert isRed(h) && !isRed(h.right) && !isRed(h.right.left);
flipColors(h);
if (isRed(h.left.left))
{
h = rotateRight(h);
flipColors(h);
}
return h;
}

// restore red-black tree invariant
private Node balance(Node h)
{
// assert (h != null);

if (isRed(h.right)) h = rotateLeft(h);
if (isRed(h.left) && isRed(h.left.left)) h = rotateRight(h);
if (isRed(h.left) && isRed(h.right)) flipColors(h);

h.size = size(h.left) + size(h.right) + 1;
return h;
}


/***************************************************************************
* Utility functions.
***************************************************************************/

public int height()
{
return height(root);
}
private int height(Node x)
{
if (x == null) return -1;
return 1 + Math.max(height(x.left), height(x.right));
}

/***************************************************************************
* Ordered symbol table methods.
***************************************************************************/

public Key min()
{
if (isEmpty()) throw new NoSuchElementException("calls min() with empty symbol table");
return min(root).key;
}

// the smallest key in subtree rooted at x; null if no such key
private Node min(Node x)
{
// assert x != null;
if (x.left == null) return x;
else return min(x.left);
}

public Key max()
{
if (isEmpty()) throw new NoSuchElementException("calls max() with empty symbol table");
return max(root).key;
}

// the largest key in the subtree rooted at x; null if no such key
private Node max(Node x)
{
// assert x != null;
if (x.right == null) return x;
else return max(x.right);
}


//Returns the largest key in the symbol table less than or equal to {@code key}.
public Key floor(Key key) {
if (key == null) throw new IllegalArgumentException("argument to floor() is null");
if (isEmpty()) throw new NoSuchElementException("calls floor() with empty symbol table");
Node x = floor(root, key);
if (x == null) throw new NoSuchElementException("argument to floor() is too small");
else return x.key;
}

// the largest key in the subtree rooted at x less than or equal to the given key
private Node floor(Node x, Key key)
{
if (x == null) return null;
int cmp = key.compareTo(x.key);
if (cmp == 0) return x;
if (cmp < 0) return floor(x.left, key);
Node t = floor(x.right, key);
if (t != null) return t;
else return x;
}

//Returns the smallest key in the symbol table greater than or equal to {@code key}.
public Key ceiling(Key key) {
if (key == null) throw new IllegalArgumentException("argument to ceiling() is null");
if (isEmpty()) throw new NoSuchElementException("calls ceiling() with empty symbol table");
Node x = ceiling(root, key);
if (x == null) throw new NoSuchElementException("argument to ceiling() is too small");
else return x.key;
}

// the smallest key in the subtree rooted at x greater than or equal to the given key
private Node ceiling(Node x, Key key)
{
if (x == null) return null;
int cmp = key.compareTo(x.key);
if (cmp == 0) return x;
if (cmp > 0) return ceiling(x.right, key);
Node t = ceiling(x.left, key);
if (t != null) return t;
else return x;
}

//Return the key in the symbol table of a given {@code rank}.
public Key select(int rank)
{
if (rank < 0 || rank >= size())
{
throw new IllegalArgumentException("argument to select() is invalid: " + rank);
}
return select(root, rank);
}

// Return key in BST rooted at x of given rank.
// Precondition: rank is in legal range.
private Key select(Node x, int rank)
{
if (x == null) return null;
int leftSize = size(x.left);
if (leftSize > rank) return select(x.left, rank);
else if (leftSize < rank) return select(x.right, rank - leftSize - 1);
else return x.key;
}

//Return the number of keys in the symbol table strictly less than {@code key}.
public int rank(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to rank() is null");
return rank(key, root);
}

// number of keys less than key in the subtree rooted at x
private int rank(Key key, Node x)
{
if (x == null) return 0;
int cmp = key.compareTo(x.key);
if (cmp < 0) return rank(key, x.left);
else if (cmp > 0) return 1 + size(x.left) + rank(key, x.right);
else return size(x.left);
}

/***************************************************************************
* Range count and range search.
***************************************************************************/

public Iterable<Key> keys()
{
if (isEmpty()) return new Queue<Key>();
return keys(min(), max());
}

public Iterable<Key> keys(Key lo, Key hi)
{
if (lo == null) throw new IllegalArgumentException("first argument to keys() is null");
if (hi == null) throw new IllegalArgumentException("second argument to keys() is null");

Queue<Key> queue = new Queue<Key>();
// if (isEmpty() || lo.compareTo(hi) > 0) return queue;
keys(root, queue, lo, hi);
return queue;
}

// add the keys between lo and hi in the subtree rooted at x
// to the queue
private void keys(Node x, Queue<Key> queue, Key lo, Key hi)
{
if (x == null) return;
int cmplo = lo.compareTo(x.key);
int cmphi = hi.compareTo(x.key);
if (cmplo < 0) keys(x.left, queue, lo, hi);
if (cmplo <= 0 && cmphi >= 0) queue.enqueue(x.key);
if (cmphi > 0) keys(x.right, queue, lo, hi);
}

//Returns the number of keys in the symbol table in the given range.
public int size(Key lo, Key hi)
{
if (lo == null) throw new IllegalArgumentException("first argument to size() is null");
if (hi == null) throw new IllegalArgumentException("second argument to size() is null");

if (lo.compareTo(hi) > 0) return 0;
if (contains(hi)) return rank(hi) - rank(lo) + 1;
else return rank(hi) - rank(lo);
}


/***************************************************************************
* Check integrity of red-black tree data structure.
***************************************************************************/
private boolean check()
{
if (!isBST()) StdOut.println("Not in symmetric order");
if (!isSizeConsistent()) StdOut.println("Subtree counts not consistent");
if (!isRankConsistent()) StdOut.println("Ranks not consistent");
if (!is23()) StdOut.println("Not a 2-3 tree");
if (!isBalanced()) StdOut.println("Not balanced");
return isBST() && isSizeConsistent() && isRankConsistent() && is23() && isBalanced();
}

// does this binary tree satisfy symmetric order?
// Note: this test also ensures that data structure is a binary tree since order is strict
private boolean isBST() { return isBST(root, null, null); }

// is the tree rooted at x a BST with all keys strictly between min and max
// (if min or max is null, treat as empty constraint)
// Credit: Bob Dondero's elegant solution
private boolean isBST(Node x, Key min, Key max)
{
if (x == null) return true;
if (min != null && x.key.compareTo(min) <= 0) return false;
if (max != null && x.key.compareTo(max) >= 0) return false;
return isBST(x.left, min, x.key) && isBST(x.right, x.key, max);
}

// are the size fields correct?
private boolean isSizeConsistent() { return isSizeConsistent(root); }
private boolean isSizeConsistent(Node x)
{
if (x == null) return true;
if (x.size != size(x.left) + size(x.right) + 1) return false;
return isSizeConsistent(x.left) && isSizeConsistent(x.right);
}

// check that ranks are consistent
private boolean isRankConsistent()
{
for (int i = 0; i < size(); i++)
if (i != rank(select(i))) return false;
for (Key key : keys())
if (key.compareTo(select(rank(key))) != 0) return false;
return true;
}

// Does the tree have no red right links, and at most one (left)
// red links in a row on any path?
private boolean is23() { return is23(root); }
private boolean is23(Node x)
{
if (x == null) return true;
if (isRed(x.right)) return false;
if (x != root && isRed(x) && isRed(x.left))
return false;
return is23(x.left) && is23(x.right);
}

// do all paths from root to leaf have same number of black edges?
private boolean isBalanced()
{
int black = 0; // number of black links on path from root to min
Node x = root;
while (x != null)
{
if (!isRed(x)) black++;
x = x.left;
}
return isBalanced(root, black);
}

// does every path from the root to a leaf have the given number of black links?
private boolean isBalanced(Node x, int black)
{
if (x == null) return black == 0;
if (!isRed(x)) black--;
return isBalanced(x.left, black) && isBalanced(x.right, black);
}


/**
* Unit tests the {@code RedBlackBST} data type.
*
* @param args the command-line arguments
*/
public static void main(String[] args)
{
RedBlackBST<String, Integer> st = new RedBlackBST<String, Integer>();
for (int i = 0; !StdIn.isEmpty(); i++)
{
String key = StdIn.readString();
st.put(key, i);
}
StdOut.println();
for (String s : st.keys())
StdOut.println(s + " " + st.get(s));
StdOut.println();
}
}

测试

1
2
3
4
5
6
7
8
9
10
11
12
13
% more tinyST.txt
S E A R C H E X A M P L E
% java RedBlackBST < tinyST.txt
A 8
C 4
E 12
H 5
L 11
M 9
P 10
R 3
S 0
X 7

散列表

软缓存:如果散列值的计算很耗时,那么我们或许可以将每个键的散列值缓存起来,即在每个键中使用一个$hash$变量来保存它的$hashcode()$的返回值

总的来说,要为一个数据结构实现一个优秀的散列方法需要满足三个条件:

一致性——等价的键必然产生相等的散列值

高效性——计算方便

均匀性——均匀地散列所有的键

基于拉链法的散列表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
public class SeparateChainingHashST<Key, Value>
{
private static final int INIT_CAPACITY = 4;

private int n; // number of key-value pairs
private int m; // hash table size
private SequentialSearchST<Key, Value>[] st; // array of linked-list symbol tables

public SeparateChainingHashST()
{
this(INIT_CAPACITY);
}

public SeparateChainingHashST(int m)
{
this.m = m;
st = (SequentialSearchST<Key, Value>[]) new SequentialSearchST[m];
for (int i = 0; i < m; i++)
st[i] = new SequentialSearchST<Key, Value>();
}

private void resize(int chains)
{
SeparateChainingHashST<Key, Value> temp = new SeparateChainingHashST<Key, Value>(chains);
for (int i = 0; i < m; i++)
{
for (Key key : st[i].keys())
{
temp.put(key, st[i].get(key));
}
}
this.m = temp.m;
this.n = temp.n;
this.st = temp.st;
}

// hash function for keys - returns value between 0 and m-1
private int hashTextbook(Key key)
{
return (key.hashCode() & 0x7fffffff) % m;
}

// hash function for keys - returns value between 0 and m-1 (assumes m is a power of 2)
// (from Java 7 implementation, protects against poor quality hashCode() implementations)
private int hash(Key key)
{
int h = key.hashCode();
h ^= (h >>> 20) ^ (h >>> 12) ^ (h >>> 7) ^ (h >>> 4);
return h & (m-1);
}

public int size()
{
return n;
}

public boolean isEmpty()
{
return size() == 0;
}

public boolean contains(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to contains() is null");
return get(key) != null;
}

public Value get(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to get() is null");
int i = hash(key);
return st[i].get(key);
}

public void put(Key key, Value val)
{
if (key == null) throw new IllegalArgumentException("first argument to put() is null");
if (val == null)
{
delete(key);
return;
}

// double table size if average length of list >= 10
if (n >= 10 * m) resize(2 * m);

int i = hash(key);
if (!st[i].contains(key)) n++;
st[i].put(key, val);
}

public void delete(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to delete() is null");

int i = hash(key);
if (st[i].contains(key)) n--;
st[i].delete(key);

// halve table size if average length of list <= 2
if (m > INIT_CAPACITY && n <= 2 * m) resize(m / 2);
}

// return keys in symbol table as an Iterable
public Iterable<Key> keys()
{
Queue<Key> queue = new Queue<Key>();
for (int i = 0; i < m; i++)
{
for (Key key : st[i].keys())
queue.enqueue(key);
}
return queue;
}

public static void main(String[] args)
{
SeparateChainingHashST<String, Integer> st = new SeparateChainingHashST<String, Integer>();
for (int i = 0; !StdIn.isEmpty(); i++)
{
String key = StdIn.readString();
st.put(key, i);
}

// print keys
for (String s : st.keys())
StdOut.println(s + " " + st.get(s));

}
}

性质。在一张含有M条链表和N个键的散列表中,未命中查找和插入操作所需的比较次数为~$N/M$。

基于线性探测法的散列表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
import edu.princeton.cs.algs4.StdIn;
import edu.princeton.cs.algs4.StdOut;

public class LinearProbingHashST<Key, Value>
{
// must be a power of 2
private static final int INIT_CAPACITY = 4;

private int n; // number of key-value pairs in the symbol table
private int m; // size of linear probing table
private Key[] keys; // the keys
private Value[] vals; // the values

public LinearProbingHashST()
{
this(INIT_CAPACITY);
}

public LinearProbingHashST(int capacity)
{
m = capacity;
n = 0;
keys = (Key[]) new Object[m];
vals = (Value[]) new Object[m];
}

public int size()
{
return n;
}

public boolean isEmpty()
{
return size() == 0;
}

public boolean contains(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to contains() is null");
return get(key) != null;
}

// hash function for keys - returns value between 0 and m-1
private int hashTextbook(Key key)
{
return (key.hashCode() & 0x7fffffff) % m;
}

// hash function for keys - returns value between 0 and m-1 (assumes m is a power of 2)
// (from Java 7 implementation, protects against poor quality hashCode() implementations)
private int hash(Key key)
{
int h = key.hashCode();
h ^= (h >>> 20) ^ (h >>> 12) ^ (h >>> 7) ^ (h >>> 4);
return h & (m-1);
}

// resizes the hash table to the given capacity by re-hashing all of the keys
private void resize(int capacity)
{
LinearProbingHashST<Key, Value> temp = new LinearProbingHashST<Key, Value>(capacity);
for (int i = 0; i < m; i++)
{
if (keys[i] != null)
{
temp.put(keys[i], vals[i]);
}
}
keys = temp.keys;
vals = temp.vals;
m = temp.m;
}

public void put(Key key, Value val)
{
if (key == null) throw new IllegalArgumentException("first argument to put() is null");

if (val == null)
{
delete(key);
return;
}

// double table size if 50% full
if (n >= m/2) resize(2*m);

int i;
for (i = hash(key); keys[i] != null; i = (i + 1) % m)
{
if (keys[i].equals(key))
{
vals[i] = val;
return;
}
}
keys[i] = key;
vals[i] = val;
n++;
}

public Value get(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to get() is null");
for (int i = hash(key); keys[i] != null; i = (i + 1) % m)
if (keys[i].equals(key))
return vals[i];
return null;
}

public void delete(Key key)
{
if (key == null) throw new IllegalArgumentException("argument to delete() is null");
if (!contains(key)) return;

// find position i of key
int i = hash(key);
while (!key.equals(keys[i]))
{
i = (i + 1) % m;
}

// delete key and associated value
keys[i] = null;
vals[i] = null;

// rehash all keys in same cluster
i = (i + 1) % m;
while (keys[i] != null)
{
// delete keys[i] an vals[i] and reinsert
Key keyToRehash = keys[i];
Value valToRehash = vals[i];
keys[i] = null;
vals[i] = null;
n--;
put(keyToRehash, valToRehash);
i = (i + 1) % m;
}

n--;

// halves size of array if it's 12.5% full or less
if (n > 0 && n <= m/8) resize(m/2);

assert check();
}

/**
* Returns all keys in this symbol table as an {@code Iterable}.
* To iterate over all of the keys in the symbol table named {@code st},
* use the foreach notation: {@code for (Key key : st.keys())}.
*
* @return all keys in this symbol table
*/
public Iterable<Key> keys()
{
Queue<Key> queue = new Queue<Key>();
for (int i = 0; i < m; i++)
if (keys[i] != null) queue.enqueue(keys[i]);
return queue;
}

// integrity check - don't check after each put() because
// integrity not maintained during a delete()
private boolean check()
{

// check that hash table is at most 50% full
if (m < 2 * n) {
System.err.println("Hash table size m = " + m + "; array size n = " + n);
return false;
}

// check that each key in table can be found by get()
for (int i = 0; i < m; i++)
{
if (keys[i] == null) continue;
else if (get(keys[i]) != vals[i])
{
System.err.println("get[" + keys[i] + "] = " + get(keys[i]) + "; vals[i] = " + vals[i]);
return false;
}
}
return true;
}


/**
* Unit tests the {@code LinearProbingHashST} data type.
*
* @param args the command-line arguments
*/
public static void main(String[] args)
{
LinearProbingHashST<String, Integer> st = new LinearProbingHashST<String, Integer>();
for (int i = 0; !StdIn.isEmpty(); i++)
{
String key = StdIn.readString();
st.put(key, i);
}

// print keys
for (String s : st.keys())
StdOut.println(s + " " + st.get(s));
}
}

线性探测的平均成本取绝于元素在插入数组聚集成的一组连续的条目,也叫做键簇

如何从基于线性探测的散列表中删除一个键?仔细想一想,你会发现直接将该键所在的位置设为null是不行的,因为这会使得在此位置之后的元素无法被查找。我们需要将簇中被删除键的右侧的所有键重新插入散列表。

假设J(均匀散列假设)。我们使用的散列函数能够均匀并独立地将所有的键散布于$0$到$M-1$之间。

命题M。在一张大小为$M$并含有$N=\alpha M$个键的基于线性探测的散列表中,基于假设J,命中和未命中的查找所需的探测次数分别近似为:

$$ \frac{1}{2}( 1+ \frac{1}{1 - \alpha} ) 和\frac{1}{2}( 1+ \frac{1}{(1 - \alpha)^2} ) $$

特别是当$\alpha$约为$1/2$时,查找命中所需要的探测次数约为$3/2$,未命中所需要的约为$5/2$。当$\alpha$趋近于$1$时,这些估计值的精确度会下降,但不需要担心这些情况,因为我们会保证散列表的使用率小于$1/2$

命题M告诉我们(在假设J的前提下)当散列表快满的时候查找所需的探测次数是巨大的($\alpha$越趋近于1,由公式可知探测的次数也越来越大),但当使用率$\alpha$小于$1/2$时探测的预计次数只在$1.5$到$2.5$之间。为此考虑动态调整散列表数组的大小。

应用

字典的查找

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
public class LookupCSV 
{

// Do not instantiate.
private LookupCSV() { }

public static void main(String[] args)
{
int keyField = Integer.parseInt(args[1]);
int valField = Integer.parseInt(args[2]);

// symbol table
ST<String, String> st = new ST<String, String>();

// read in the data from csv file
In in = new In(args[0]);
while (in.hasNextLine())
{
String line = in.readLine();
String[] tokens = line.split(",");
String key = tokens[keyField];
String val = tokens[valField];
st.put(key, val);
}

while (!StdIn.isEmpty())
{
String s = StdIn.readString();
if (st.contains(s)) StdOut.println(st.get(s));
else StdOut.println("Not found");
}
}
}

测试

1
2
3
4
5
6
7
8
9
%more ip.csv
...
www.ebay.com,66,135,192,87
www.princeton.edu,128.112.128.15
www.cs.princeton.edu,128.112.128.35
...
%java LookupCSV ip.csv 1 0
128.112.136.35
www.cs.princeton.edu

索引(以及反向索引) 的查找

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
public class LookupIndex { 

// Do not instantiate.
private LookupIndex() { }

public static void main(String[] args)
{
String filename = args[0];
String separator = args[1];
In in = new In(filename);

ST<String, Queue<String>> st = new ST<String, Queue<String>>();
ST<String, Queue<String>> ts = new ST<String, Queue<String>>();

while (in.hasNextLine())
{
String line = in.readLine();
String[] fields = line.split(separator);
String key = fields[0];
for (int i = 1; i < fields.length; i++)
{
String val = fields[i];
if (!st.contains(key)) st.put(key, new Queue<String>());
if (!ts.contains(val)) ts.put(val, new Queue<String>());
st.get(key).enqueue(val);
ts.get(val).enqueue(key);
}
}

StdOut.println("Done indexing");

// read queries from standard input, one per line
while (!StdIn.isEmpty())
{
String query = StdIn.readLine();
if (st.contains(query))
for (String vals : st.get(query))
StdOut.println(" " + vals);
if (ts.contains(query))
for (String keys : ts.get(query))
StdOut.println(" " + keys);
}

}
}

测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
% java LookupIndex movies.txt "/"
Bacon, Kevin
Animal House (1978)
Apollo 13 (1995)
Beauty Shop (2005)
Diner (1982)
Few Good Men, A (1992)
Flatliners (1990)
Footloose (1984)
Friday the 13th (1980)
...
Tin Men (1987)
DeBoy, David
Blumenfeld, Alan
...

文件索引

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import java.io.File;

public class FileIndex
{

// Do not instantiate.
private FileIndex() { }

public static void main(String[] args)
{

// key = word, value = set of files containing that word
ST<String, SET<File>> st = new ST<String, SET<File>>();

// create inverted index of all files
StdOut.println("Indexing files");
for (String filename : args)
{
StdOut.println(" " + filename);
File file = new File(filename);
In in = new In(file);
while (!in.isEmpty())
{
String word = in.readString();
if (!st.contains(word)) st.put(word, new SET<File>());
SET<File> set = st.get(word);
set.add(file);
}
}


// read queries from standard input, one per line
while (!StdIn.isEmpty())
{
String query = StdIn.readString();
if (st.contains(query))
{
SET<File> set = st.get(query);
for (File file : set)
{
StdOut.println(" " + file.getName());
}
}
}

}

}

测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
%more ex1.txt
it was the best of times

%more ex2.txt
it was the worst of times

%more ex3.txt
it was the age of wisdom

%more ex4.txt
it was the age of foolishness

%java FileIndex ex*.txt
age
ex3.txt
ex4.txt
best
ex1.txt
was
ex1.txt
ex2.txt
ex3.txt
ex4.txt

稀疏矩阵

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
public class SparseVector
{
private int d; // dimension
private ST<Integer, Double> st; // the vector, represented by index-value pairs

//Initializes a d-dimensional zero vector.
public SparseVector(int d)
{
this.d = d;
this.st = new ST<Integer, Double>();
}

//Sets the ith coordinate of this vector to the specified value.
public void put(int i, double value)
{
if (i < 0 || i >= d) throw new IllegalArgumentException("Illegal index");
if (value == 0.0) st.delete(i);
else st.put(i, value);
}

//Returns the ith coordinate of this vector.
public double get(int i)
{
if (i < 0 || i >= d) throw new IllegalArgumentException("Illegal index");
if (st.contains(i)) return st.get(i);
else return 0.0;
}

//Returns the number of nonzero entries in this vector.
public int nnz()
{
return st.size();
}

//Returns the dimension of this vector.
@Deprecated
public int size()
{
return d;
}

//Returns the dimension of this vector.
public int dimension()
{
return d;
}

//Returns the inner product of this vector with the specified vector.
public double dot(SparseVector that)
{
if (this.d != that.d) throw new IllegalArgumentException("Vector lengths disagree");
double sum = 0.0;

// iterate over the vector with the fewest nonzeros
if (this.st.size() <= that.st.size())
{
for (int i : this.st.keys())
if (that.st.contains(i)) sum += this.get(i) * that.get(i);
}
else
{
for (int i : that.st.keys())
if (this.st.contains(i)) sum += this.get(i) * that.get(i);
}
return sum;
}


//Returns the inner product of this vector with the specified array.
public double dot(double[] that)
{
double sum = 0.0;
for (int i : st.keys())
sum += that[i] * this.get(i);
return sum;
}

//Returns the magnitude of this vector.
public double magnitude()
{
return Math.sqrt(this.dot(this));
}

//Returns the Euclidean norm of this vector.
@Deprecated
public double norm()
{
return Math.sqrt(this.dot(this));
}

//Returns the scalar-vector product of this vector with the specified scalar.
public SparseVector scale(double alpha)
{
SparseVector c = new SparseVector(d);
for (int i : this.st.keys()) c.put(i, alpha * this.get(i));
return c;
}

//Returns the sum of this vector and the specified vector.
public SparseVector plus(SparseVector that)
{
if (this.d != that.d) throw new IllegalArgumentException("Vector lengths disagree");
SparseVector c = new SparseVector(d);
for (int i : this.st.keys()) c.put(i, this.get(i)); // c = this
for (int i : that.st.keys()) c.put(i, that.get(i) + c.get(i)); // c = c + that
return c;
}

//Returns a string representation of this vector.
public String toString()
{
StringBuilder s = new StringBuilder();
for (int i : st.keys()) {
s.append("(" + i + ", " + st.get(i) + ") ");
}
return s.toString();
}


/**
* Unit tests the {@code SparseVector} data type.
*
* @param args the command-line arguments
*/
public static void main(String[] args)
{
SparseVector a = new SparseVector(10);
SparseVector b = new SparseVector(10);
a.put(3, 0.50);
a.put(9, 0.75);
a.put(6, 0.11);
a.put(6, 0.00);
b.put(3, 0.60);
b.put(4, 0.90);
StdOut.println("a = " + a);
StdOut.println("b = " + b);
StdOut.println("a dot b = " + a.dot(b));
StdOut.println("a + b = " + a.plus(b));
}

}