1 module mutils.container.hash_map;
2 
3 import std.traits;
4 
5 import mutils.benchmark;
6 import mutils.container.vector;
7 import mutils.traits;
8 
9 private enum HASH_EMPTY = 0;
10 private enum HASH_DELETED = 0x1;
11 private enum HASH_FILLED_MARK = ulong(1) << 8 * ulong.sizeof - 1;
12 
13 ulong defaultHashFunc(T)(auto ref T t) {
14 	static if (isIntegral!(T)) {
15 		return hashInt(t);
16 	} else {
17 		return hashInt(t.hashOf); // hashOf is not giving proper distribution between H1 and H2 hash parts
18 	}
19 }
20 
21 // Can turn bad hash function to good one
22 ulong hashInt(ulong x) nothrow @nogc @safe {
23 	x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9;
24 	x = (x ^ (x >> 27)) * 0x94d049bb133111eb;
25 	x = x ^ (x >> 31);
26 	return x;
27 }
28 
29 struct HashMap(KeyPar, ValuePar, alias hashFunc = defaultHashFunc) {
30 	alias Key = KeyPar;
31 	alias Value = ValuePar;
32 
33 	enum rehashFactor = 0.75;
34 	enum size_t getIndexEmptyValue = size_t.max;
35 
36 	static struct KeyVal {
37 		Key key;
38 		Value value;
39 	}
40 
41 	static struct Bucket {
42 		ulong hash;
43 		KeyVal keyValue;
44 	}
45 
46 	Vector!Bucket elements; // Length should be always power of 2
47 	size_t length; // Used to compute loadFactor
48 	size_t markerdDeleted;
49 
50 	void clear() {
51 		elements.clear();
52 		length = 0;
53 		markerdDeleted = 0;
54 	}
55 
56 	void reset() {
57 		elements.reset();
58 		length = 0;
59 		markerdDeleted = 0;
60 	}
61 
62 	bool isIn(ref Key el) {
63 		return getIndex(el) != getIndexEmptyValue;
64 	}
65 
66 	bool isIn(Key el) {
67 		return getIndex(el) != getIndexEmptyValue;
68 	}
69 
70 	Value* getPtr()(auto ref Key k) {
71 		size_t index = getIndex(k);
72 		if (index == getIndexEmptyValue) {
73 			return null;
74 		} else {
75 			return &elements[index].keyValue.value;
76 		}
77 	}
78 
79 	ref Value get()(auto ref Key k) {
80 		size_t index = getIndex(k);
81 		assert(index != getIndexEmptyValue);
82 		return elements[index].keyValue.value;
83 	}
84 
85 	deprecated("Use get with second parameter.") auto ref Value getDefault()(
86 			auto ref Key k, auto ref Value defaultValue) {
87 		return get(k, defaultValue);
88 	}
89 
90 	auto ref Value get()(auto ref Key k, auto ref Value defaultValue) {
91 		size_t index = getIndex(k);
92 		if (index == getIndexEmptyValue) {
93 			return defaultValue;
94 		} else {
95 			return elements[index].keyValue.value;
96 		}
97 	}
98 
99 	ref Value getInsertDefault()(auto ref Key k, auto ref Value defaultValue) {
100 		size_t index = getIndex(k);
101 		if (index == getIndexEmptyValue) {
102 			add(k, defaultValue);
103 		}
104 		index = getIndex(k);
105 		assert(index != getIndexEmptyValue);
106 		return elements[index].keyValue.value;
107 
108 	}
109 
110 	bool tryRemove(Key el) {
111 		size_t index = getIndex(el);
112 		if (index == getIndexEmptyValue) {
113 			return false;
114 		}
115 		length--;
116 		elements[index].hash = HASH_DELETED;
117 		markerdDeleted++;
118 		return true;
119 	}
120 
121 	void remove(Key el) {
122 		bool ok = tryRemove(el);
123 		assert(ok);
124 	}
125 
126 	ref Value opIndex()(auto ref Key key) {
127 		return get(key);
128 	}
129 
130 	void opIndexAssign()(auto ref Value value, auto ref Key key) {
131 		add(key, value);
132 	}
133 
134 	void add()(auto ref Key key, auto ref Value value) {
135 		size_t index = getIndex(key);
136 		if (index != getIndexEmptyValue) {
137 			elements[index].keyValue.value = value;
138 			return;
139 		}
140 
141 		if (getLoadFactor(length + 1) > rehashFactor
142 				|| getLoadFactor(length + markerdDeleted) > rehashFactor) {
143 			rehash();
144 		}
145 		length++;
146 
147 		immutable ulong hash = hashFunc(key) | HASH_FILLED_MARK;
148 		immutable size_t rotateMask = elements.length - 1;
149 		index = hash & rotateMask; // Starting point
150 
151 		while (true) {
152 			Bucket* gr = &elements[index];
153 			if ((gr.hash & HASH_FILLED_MARK) == 0) {
154 				if (gr.hash == HASH_DELETED) {
155 					markerdDeleted--;
156 				}
157 				gr.hash = hash;
158 				gr.keyValue.key = key;
159 				gr.keyValue.value = value;
160 				return;
161 			}
162 
163 			index++;
164 			index = index & rotateMask;
165 		}
166 	}
167 
168 	// For debug
169 	//int numA;
170 	//int numB;
171 
172 	size_t getIndex(Key el) {
173 		return getIndex(el);
174 	}
175 
176 	size_t getIndex(ref Key el) {
177 		mixin(doNotInline);
178 
179 		immutable size_t groupsLength = elements.length;
180 		if (groupsLength == 0) {
181 			return getIndexEmptyValue;
182 		}
183 
184 		immutable ulong hash = hashFunc(el) | HASH_FILLED_MARK;
185 		immutable size_t rotateMask = groupsLength - 1;
186 		size_t index = hash & rotateMask; // Starting point
187 
188 		//numA++;
189 		while (true) {
190 			//numB++;
191 			Bucket* gr = &elements[index];
192 			if (gr.hash == hash && gr.keyValue.key == el) {
193 				return index;
194 			}
195 			if (gr.hash == HASH_EMPTY) {
196 				return getIndexEmptyValue;
197 			}
198 
199 			index++;
200 			index = index & rotateMask;
201 		}
202 
203 	}
204 
205 	float getLoadFactor(size_t forElementsNum) {
206 		if (elements.length == 0) {
207 			return 1;
208 		}
209 		return cast(float) forElementsNum / (elements.length);
210 	}
211 
212 	void rehash() {
213 		mixin(doNotInline);
214 		// Get all elements
215 		Vector!KeyVal allElements;
216 		allElements.reserve(elements.length);
217 
218 		foreach (ref Bucket el; elements) {
219 			if ((el.hash & HASH_FILLED_MARK) == 0) {
220 				el.hash = HASH_EMPTY;
221 				continue;
222 			}
223 			el.hash = HASH_EMPTY;
224 			allElements ~= el.keyValue;
225 
226 		}
227 
228 		if (getLoadFactor(length + 1) > rehashFactor) { // Reallocate
229 			elements.length = (elements.length ? elements.length : 4) << 1; // Power of two, initially 8 elements
230 		}
231 
232 		// Insert elements
233 		foreach (i, ref el; allElements) {
234 			add(el.key, el.value);
235 		}
236 		length = allElements.length;
237 		markerdDeleted = 0;
238 	}
239 
240 	// foreach support
241 	int opApply(DG)(scope DG dg) {
242 		int result;
243 		foreach (ref Bucket gr; elements) {
244 			if ((gr.hash & HASH_FILLED_MARK) == 0) {
245 				continue;
246 			}
247 			static if (isForeachDelegateWithTypes!(DG, Key)) {
248 				result = dg(gr.keyValue.key);
249 			} else static if (isForeachDelegateWithTypes!(DG, Value)) {
250 				result = dg(gr.keyValue.value);
251 			} else static if (isForeachDelegateWithTypes!(DG, Key, Value)) {
252 				result = dg(gr.keyValue.key, gr.keyValue.value);
253 			} else {
254 				static assert(0);
255 			}
256 			if (result)
257 				break;
258 
259 		}
260 
261 		return result;
262 	}
263 
264 	int byKey(scope int delegate(Key k) dg) {
265 		int result;
266 		foreach (ref Key k; this) {
267 			result = dg(k);
268 			if (result)
269 				break;
270 		}
271 		return result;
272 	}
273 
274 	int byValue(scope int delegate(ref Value k) dg) {
275 		int result;
276 		foreach (ref Value v; this) {
277 			result = dg(v);
278 			if (result)
279 				break;
280 		}
281 		return result;
282 	}
283 
284 	int byKeyValue(scope int delegate(ref Key k, ref Value v) dg) {
285 		int result;
286 		foreach (ref Key k, ref Value v; this) {
287 			result = dg(k, v);
288 			if (result)
289 				break;
290 		}
291 		return result;
292 	}
293 
294 	import std.format : FormatSpec, formatValue;
295 
296 	/**
297 	 * Preety print
298 	 */
299 	void toString(scope void delegate(const(char)[]) sink, FormatSpec!char fmt) {
300 		formatValue(sink, '[', fmt);
301 		foreach (ref k, ref v; &byKeyValue) {
302 			formatValue(sink, k, fmt);
303 			formatValue(sink, ':', fmt);
304 			formatValue(sink, v, fmt);
305 			formatValue(sink, ", ", fmt);
306 		}
307 		formatValue(sink, ']', fmt);
308 	}
309 
310 	// Make distripution plot
311 	void saveGroupDistributionPlot(string path) {
312 		enum distributionsNum = 1024 * 8;
313 		BenchmarkData!(1, distributionsNum) distr; // For now use benchamrk as a plotter
314 
315 		foreach (ref Key el; this) {
316 			immutable size_t rotateMask = elements.length - 1;
317 			size_t group = cast(size_t)(hashFunc(el) & rotateMask);
318 			if (group >= distributionsNum) {
319 				continue;
320 			}
321 			distr.times[0][group]++;
322 
323 		}
324 		distr.plotUsingGnuplot(path, ["group distribution"]);
325 	}
326 
327 }
328 
329 static void dumpHashMapToJson(T)(ref T map, string path = "HashMapDump.json") {
330 	Vector!char data;
331 	import std.file;
332 	import mutils.serializer.json;
333 
334 	JSONSerializer.instance.serialize!(Load.no)(map, data);
335 	std.file.write(path, data[]);
336 }
337 
338 static void printHashMap(T)(ref T map) {
339 	import std.stdio;
340 
341 	writeln(T.stringof, " dump:\n");
342 	foreach (k, v; &map.byKeyValue) {
343 		writefln("%20s : %20s", k, v);
344 	}
345 }
346 
347 unittest {
348 	HashMap!(int, int) map;
349 
350 	assert(map.isIn(123) == false);
351 	assert(map.markerdDeleted == 0);
352 	map.add(123, 1);
353 	map.add(123, 1);
354 	assert(map.isIn(123) == true);
355 	assert(map.isIn(122) == false);
356 	assert(map.length == 1);
357 	map.remove(123);
358 	assert(map.markerdDeleted == 1);
359 	assert(map.isIn(123) == false);
360 	assert(map.length == 0);
361 	assert(map.tryRemove(500) == false);
362 	map.add(123, 1);
363 	assert(map.markerdDeleted == 0);
364 	assert(map.tryRemove(123) == true);
365 
366 	foreach (i; 1 .. 130) {
367 		map.add(i, 1);
368 	}
369 
370 	foreach (i; 1 .. 130) {
371 		assert(map.isIn(i));
372 	}
373 
374 	foreach (i; 130 .. 500) {
375 		assert(!map.isIn(i));
376 	}
377 
378 	foreach (int el; map) {
379 		assert(map.isIn(el));
380 	}
381 }
382 
383 unittest {
384 	HashMap!(int, int) map;
385 	map.add(1, 10);
386 	assert(map.get(1) == 10);
387 	assert(map.get(2, 20) == 20);
388 	assert(!map.isIn(2));
389 	assert(map.getInsertDefault(2, 20) == 20);
390 	assert(map.get(2) == 20);
391 	map[5] = 50;
392 	assert(map[5] == 50);
393 	foreach (k; &map.byKey) {
394 	}
395 	foreach (k, v; &map.byKeyValue) {
396 	}
397 	foreach (v; &map.byValue) {
398 	}
399 }
400 
401 unittest {
402 	HashMap!(Vector!char, int) map;
403 	Vector!char vecA;
404 
405 	vecA ~= "AAA";
406 	map.add(vecA, 10);
407 	assert(map[vecA] == 10);
408 	map.add(vecA, 20);
409 	assert(map[vecA] == 20);
410 	//assert(vecA=="AAA");
411 	//assert(map["AAA"]==10);// TODO hashMap Vector!char and string
412 }
413 
414 void benchmarkHashMapInt() {
415 	HashMap!(int, int) map;
416 	byte[int] mapStandard;
417 	uint elementsNumToAdd = cast(uint)(65536 * 0.74);
418 	// Add elements
419 	foreach (int i; 0 .. elementsNumToAdd) {
420 		map.add(i, 1);
421 		mapStandard[i] = 1;
422 	}
423 	// Check if isIn is working
424 	foreach (int i; 0 .. elementsNumToAdd) {
425 		assert(map.isIn(i));
426 		assert((i in mapStandard) !is null);
427 	}
428 	// Check if isIn is returning false properly
429 	foreach (int i; elementsNumToAdd .. elementsNumToAdd + 10_000) {
430 		assert(!map.isIn(i));
431 		assert((i in mapStandard) is null);
432 	}
433 	//map.numA=map.numB=map.numC=0;
434 	enum itNum = 100;
435 	BenchmarkData!(2, itNum) bench;
436 	doNotOptimize(map); // Make some confusion for compiler
437 	doNotOptimize(mapStandard);
438 	ushort myResults;
439 	myResults = 0;
440 	//benchmark standard library implementation
441 	foreach (b; 0 .. itNum) {
442 		bench.start!(1)(b);
443 		foreach (i; 0 .. elementsNumToAdd) {
444 			auto ret = myResults in mapStandard;
445 			myResults += 1 + cast(bool) ret; //cast(typeof(myResults))(cast(bool)ret);
446 			doNotOptimize(ret);
447 		}
448 		bench.end!(1)(b);
449 	}
450 
451 	auto stResult = myResults;
452 	//benchmark this implementation
453 	myResults = 0;
454 	foreach (b; 0 .. itNum) {
455 		bench.start!(0)(b);
456 		foreach (i; 0 .. elementsNumToAdd) {
457 			auto ret = map.isIn(myResults);
458 			myResults += 1 + ret; //cast(typeof(myResults))(ret);
459 			doNotOptimize(ret);
460 		}
461 		bench.end!(0)(b);
462 	}
463 	assert(myResults == stResult); // Same behavior as standard map
464 	//writeln(map.getLoadFactor(map.length));
465 	//writeln(map.numA);
466 	//writeln(map.numB);
467 
468 	doNotOptimize(myResults);
469 	bench.plotUsingGnuplot("testA.png", ["my", "standard"]);
470 	map.saveGroupDistributionPlot("distrA.png");
471 }
472 
473 void benchmarkHashMapPerformancePerElement() {
474 	ushort trueResults;
475 	doNotOptimize(trueResults);
476 	enum itNum = 100;
477 	BenchmarkData!(2, itNum) bench;
478 	HashMap!(int, int) map;
479 	byte[int] mapStandard;
480 	//writeln(.getLoadFactor(map.length));
481 	//map.numA=map.numB=map.numC=0;
482 	size_t lastAdded;
483 	size_t numToAdd = 16 * 8;
484 
485 	foreach (b; 0 .. itNum) {
486 		foreach (i; lastAdded .. lastAdded + numToAdd) {
487 			mapStandard[cast(uint) i] = true;
488 		}
489 		lastAdded += numToAdd;
490 		bench.start!(1)(b);
491 		foreach (i; 0 .. 1000_00) {
492 			auto ret = trueResults in mapStandard;
493 			trueResults += 1; //cast(typeof(trueResults))(cast(bool)ret);
494 			doNotOptimize(ret);
495 		}
496 		bench.end!(1)(b);
497 	}
498 	lastAdded = 0;
499 	trueResults = 0;
500 	foreach (b; 0 .. itNum) {
501 		foreach (i; lastAdded .. lastAdded + numToAdd) {
502 			map.add(cast(int) i, 1);
503 		}
504 		lastAdded += numToAdd;
505 		bench.start!(0)(b);
506 		foreach (i; 0 .. 1000_00) {
507 			auto ret = map.isIn(trueResults);
508 			trueResults += 1; //cast(typeof(trueResults))(ret);
509 			doNotOptimize(ret);
510 		}
511 		bench.end!(0)(b);
512 	}
513 	//writeln(map.numA);
514 	//writeln(map.numB);
515 	doNotOptimize(trueResults);
516 	bench.plotUsingGnuplot("test.png", ["my", "standard"]);
517 	//map.saveGroupDistributionPlot("distr.png");
518 
519 }