1 module mutils.container.hash_map; 2 3 import std.traits; 4 5 import mutils.benchmark; 6 import mutils.container.vector; 7 import mutils.traits; 8 9 private enum HASH_EMPTY = 0; 10 private enum HASH_DELETED = 0x1; 11 private enum HASH_FILLED_MARK = ulong(1) << 8 * ulong.sizeof - 1; 12 13 ulong defaultHashFunc(T)(auto ref T t) { 14 static if (isIntegral!(T)) { 15 return hashInt(t); 16 } else { 17 return hashInt(t.hashOf); // hashOf is not giving proper distribution between H1 and H2 hash parts 18 } 19 } 20 21 // Can turn bad hash function to good one 22 ulong hashInt(ulong x) nothrow @nogc @safe { 23 x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9; 24 x = (x ^ (x >> 27)) * 0x94d049bb133111eb; 25 x = x ^ (x >> 31); 26 return x; 27 } 28 29 struct HashMap(KeyPar, ValuePar, alias hashFunc = defaultHashFunc) { 30 alias Key = KeyPar; 31 alias Value = ValuePar; 32 33 enum rehashFactor = 0.75; 34 enum size_t getIndexEmptyValue = size_t.max; 35 36 static struct KeyVal { 37 Key key; 38 Value value; 39 } 40 41 static struct Bucket { 42 ulong hash; 43 KeyVal keyValue; 44 } 45 46 Vector!Bucket elements; // Length should be always power of 2 47 size_t length; // Used to compute loadFactor 48 size_t markerdDeleted; 49 50 void clear() { 51 elements.clear(); 52 length = 0; 53 markerdDeleted = 0; 54 } 55 56 void reset() { 57 elements.reset(); 58 length = 0; 59 markerdDeleted = 0; 60 } 61 62 bool isIn(ref Key el) { 63 return getIndex(el) != getIndexEmptyValue; 64 } 65 66 bool isIn(Key el) { 67 return getIndex(el) != getIndexEmptyValue; 68 } 69 70 Value* getPtr()(auto ref Key k) { 71 size_t index = getIndex(k); 72 if (index == getIndexEmptyValue) { 73 return null; 74 } else { 75 return &elements[index].keyValue.value; 76 } 77 } 78 79 ref Value get()(auto ref Key k) { 80 size_t index = getIndex(k); 81 assert(index != getIndexEmptyValue); 82 return elements[index].keyValue.value; 83 } 84 85 deprecated("Use get with second parameter.") auto ref Value getDefault()( 86 auto ref Key k, auto ref Value defaultValue) { 87 return get(k, defaultValue); 88 } 89 90 auto ref Value get()(auto ref Key k, auto ref Value defaultValue) { 91 size_t index = getIndex(k); 92 if (index == getIndexEmptyValue) { 93 return defaultValue; 94 } else { 95 return elements[index].keyValue.value; 96 } 97 } 98 99 ref Value getInsertDefault()(auto ref Key k, auto ref Value defaultValue) { 100 size_t index = getIndex(k); 101 if (index == getIndexEmptyValue) { 102 add(k, defaultValue); 103 } 104 index = getIndex(k); 105 assert(index != getIndexEmptyValue); 106 return elements[index].keyValue.value; 107 108 } 109 110 bool tryRemove(Key el) { 111 size_t index = getIndex(el); 112 if (index == getIndexEmptyValue) { 113 return false; 114 } 115 length--; 116 elements[index].hash = HASH_DELETED; 117 markerdDeleted++; 118 return true; 119 } 120 121 void remove(Key el) { 122 bool ok = tryRemove(el); 123 assert(ok); 124 } 125 126 ref Value opIndex()(auto ref Key key) { 127 return get(key); 128 } 129 130 void opIndexAssign()(auto ref Value value, auto ref Key key) { 131 add(key, value); 132 } 133 134 void add()(auto ref Key key, auto ref Value value) { 135 size_t index = getIndex(key); 136 if (index != getIndexEmptyValue) { 137 elements[index].keyValue.value = value; 138 return; 139 } 140 141 if (getLoadFactor(length + 1) > rehashFactor 142 || getLoadFactor(length + markerdDeleted) > rehashFactor) { 143 rehash(); 144 } 145 length++; 146 147 immutable ulong hash = hashFunc(key) | HASH_FILLED_MARK; 148 immutable size_t rotateMask = elements.length - 1; 149 index = hash & rotateMask; // Starting point 150 151 while (true) { 152 Bucket* gr = &elements[index]; 153 if ((gr.hash & HASH_FILLED_MARK) == 0) { 154 if (gr.hash == HASH_DELETED) { 155 markerdDeleted--; 156 } 157 gr.hash = hash; 158 gr.keyValue.key = key; 159 gr.keyValue.value = value; 160 return; 161 } 162 163 index++; 164 index = index & rotateMask; 165 } 166 } 167 168 // For debug 169 //int numA; 170 //int numB; 171 172 size_t getIndex(Key el) { 173 return getIndex(el); 174 } 175 176 size_t getIndex(ref Key el) { 177 mixin(doNotInline); 178 179 immutable size_t groupsLength = elements.length; 180 if (groupsLength == 0) { 181 return getIndexEmptyValue; 182 } 183 184 immutable ulong hash = hashFunc(el) | HASH_FILLED_MARK; 185 immutable size_t rotateMask = groupsLength - 1; 186 size_t index = hash & rotateMask; // Starting point 187 188 //numA++; 189 while (true) { 190 //numB++; 191 Bucket* gr = &elements[index]; 192 if (gr.hash == hash && gr.keyValue.key == el) { 193 return index; 194 } 195 if (gr.hash == HASH_EMPTY) { 196 return getIndexEmptyValue; 197 } 198 199 index++; 200 index = index & rotateMask; 201 } 202 203 } 204 205 float getLoadFactor(size_t forElementsNum) { 206 if (elements.length == 0) { 207 return 1; 208 } 209 return cast(float) forElementsNum / (elements.length); 210 } 211 212 void rehash() { 213 mixin(doNotInline); 214 // Get all elements 215 Vector!KeyVal allElements; 216 allElements.reserve(elements.length); 217 218 foreach (ref Bucket el; elements) { 219 if ((el.hash & HASH_FILLED_MARK) == 0) { 220 el.hash = HASH_EMPTY; 221 continue; 222 } 223 el.hash = HASH_EMPTY; 224 allElements ~= el.keyValue; 225 226 } 227 228 if (getLoadFactor(length + 1) > rehashFactor) { // Reallocate 229 elements.length = (elements.length ? elements.length : 4) << 1; // Power of two, initially 8 elements 230 } 231 232 // Insert elements 233 foreach (i, ref el; allElements) { 234 add(el.key, el.value); 235 } 236 length = allElements.length; 237 markerdDeleted = 0; 238 } 239 240 // foreach support 241 int opApply(DG)(scope DG dg) { 242 int result; 243 foreach (ref Bucket gr; elements) { 244 if ((gr.hash & HASH_FILLED_MARK) == 0) { 245 continue; 246 } 247 static if (isForeachDelegateWithTypes!(DG, Key)) { 248 result = dg(gr.keyValue.key); 249 } else static if (isForeachDelegateWithTypes!(DG, Value)) { 250 result = dg(gr.keyValue.value); 251 } else static if (isForeachDelegateWithTypes!(DG, Key, Value)) { 252 result = dg(gr.keyValue.key, gr.keyValue.value); 253 } else { 254 static assert(0); 255 } 256 if (result) 257 break; 258 259 } 260 261 return result; 262 } 263 264 int byKey(scope int delegate(Key k) dg) { 265 int result; 266 foreach (ref Key k; this) { 267 result = dg(k); 268 if (result) 269 break; 270 } 271 return result; 272 } 273 274 int byValue(scope int delegate(ref Value k) dg) { 275 int result; 276 foreach (ref Value v; this) { 277 result = dg(v); 278 if (result) 279 break; 280 } 281 return result; 282 } 283 284 int byKeyValue(scope int delegate(ref Key k, ref Value v) dg) { 285 int result; 286 foreach (ref Key k, ref Value v; this) { 287 result = dg(k, v); 288 if (result) 289 break; 290 } 291 return result; 292 } 293 294 import std.format : FormatSpec, formatValue; 295 296 /** 297 * Preety print 298 */ 299 void toString(scope void delegate(const(char)[]) sink, FormatSpec!char fmt) { 300 formatValue(sink, '[', fmt); 301 foreach (ref k, ref v; &byKeyValue) { 302 formatValue(sink, k, fmt); 303 formatValue(sink, ':', fmt); 304 formatValue(sink, v, fmt); 305 formatValue(sink, ", ", fmt); 306 } 307 formatValue(sink, ']', fmt); 308 } 309 310 // Make distripution plot 311 void saveGroupDistributionPlot(string path) { 312 enum distributionsNum = 1024 * 8; 313 BenchmarkData!(1, distributionsNum) distr; // For now use benchamrk as a plotter 314 315 foreach (ref Key el; this) { 316 immutable size_t rotateMask = elements.length - 1; 317 size_t group = cast(size_t)(hashFunc(el) & rotateMask); 318 if (group >= distributionsNum) { 319 continue; 320 } 321 distr.times[0][group]++; 322 323 } 324 distr.plotUsingGnuplot(path, ["group distribution"]); 325 } 326 327 } 328 329 static void dumpHashMapToJson(T)(ref T map, string path = "HashMapDump.json") { 330 Vector!char data; 331 import std.file; 332 import mutils.serializer.json; 333 334 JSONSerializer.instance.serialize!(Load.no)(map, data); 335 std.file.write(path, data[]); 336 } 337 338 static void printHashMap(T)(ref T map) { 339 import std.stdio; 340 341 writeln(T.stringof, " dump:\n"); 342 foreach (k, v; &map.byKeyValue) { 343 writefln("%20s : %20s", k, v); 344 } 345 } 346 347 unittest { 348 HashMap!(int, int) map; 349 350 assert(map.isIn(123) == false); 351 assert(map.markerdDeleted == 0); 352 map.add(123, 1); 353 map.add(123, 1); 354 assert(map.isIn(123) == true); 355 assert(map.isIn(122) == false); 356 assert(map.length == 1); 357 map.remove(123); 358 assert(map.markerdDeleted == 1); 359 assert(map.isIn(123) == false); 360 assert(map.length == 0); 361 assert(map.tryRemove(500) == false); 362 map.add(123, 1); 363 assert(map.markerdDeleted == 0); 364 assert(map.tryRemove(123) == true); 365 366 foreach (i; 1 .. 130) { 367 map.add(i, 1); 368 } 369 370 foreach (i; 1 .. 130) { 371 assert(map.isIn(i)); 372 } 373 374 foreach (i; 130 .. 500) { 375 assert(!map.isIn(i)); 376 } 377 378 foreach (int el; map) { 379 assert(map.isIn(el)); 380 } 381 } 382 383 unittest { 384 HashMap!(int, int) map; 385 map.add(1, 10); 386 assert(map.get(1) == 10); 387 assert(map.get(2, 20) == 20); 388 assert(!map.isIn(2)); 389 assert(map.getInsertDefault(2, 20) == 20); 390 assert(map.get(2) == 20); 391 map[5] = 50; 392 assert(map[5] == 50); 393 foreach (k; &map.byKey) { 394 } 395 foreach (k, v; &map.byKeyValue) { 396 } 397 foreach (v; &map.byValue) { 398 } 399 } 400 401 unittest { 402 HashMap!(Vector!char, int) map; 403 Vector!char vecA; 404 405 vecA ~= "AAA"; 406 map.add(vecA, 10); 407 assert(map[vecA] == 10); 408 map.add(vecA, 20); 409 assert(map[vecA] == 20); 410 //assert(vecA=="AAA"); 411 //assert(map["AAA"]==10);// TODO hashMap Vector!char and string 412 } 413 414 void benchmarkHashMapInt() { 415 HashMap!(int, int) map; 416 byte[int] mapStandard; 417 uint elementsNumToAdd = cast(uint)(65536 * 0.74); 418 // Add elements 419 foreach (int i; 0 .. elementsNumToAdd) { 420 map.add(i, 1); 421 mapStandard[i] = 1; 422 } 423 // Check if isIn is working 424 foreach (int i; 0 .. elementsNumToAdd) { 425 assert(map.isIn(i)); 426 assert((i in mapStandard) !is null); 427 } 428 // Check if isIn is returning false properly 429 foreach (int i; elementsNumToAdd .. elementsNumToAdd + 10_000) { 430 assert(!map.isIn(i)); 431 assert((i in mapStandard) is null); 432 } 433 //map.numA=map.numB=map.numC=0; 434 enum itNum = 100; 435 BenchmarkData!(2, itNum) bench; 436 doNotOptimize(map); // Make some confusion for compiler 437 doNotOptimize(mapStandard); 438 ushort myResults; 439 myResults = 0; 440 //benchmark standard library implementation 441 foreach (b; 0 .. itNum) { 442 bench.start!(1)(b); 443 foreach (i; 0 .. elementsNumToAdd) { 444 auto ret = myResults in mapStandard; 445 myResults += 1 + cast(bool) ret; //cast(typeof(myResults))(cast(bool)ret); 446 doNotOptimize(ret); 447 } 448 bench.end!(1)(b); 449 } 450 451 auto stResult = myResults; 452 //benchmark this implementation 453 myResults = 0; 454 foreach (b; 0 .. itNum) { 455 bench.start!(0)(b); 456 foreach (i; 0 .. elementsNumToAdd) { 457 auto ret = map.isIn(myResults); 458 myResults += 1 + ret; //cast(typeof(myResults))(ret); 459 doNotOptimize(ret); 460 } 461 bench.end!(0)(b); 462 } 463 assert(myResults == stResult); // Same behavior as standard map 464 //writeln(map.getLoadFactor(map.length)); 465 //writeln(map.numA); 466 //writeln(map.numB); 467 468 doNotOptimize(myResults); 469 bench.plotUsingGnuplot("testA.png", ["my", "standard"]); 470 map.saveGroupDistributionPlot("distrA.png"); 471 } 472 473 void benchmarkHashMapPerformancePerElement() { 474 ushort trueResults; 475 doNotOptimize(trueResults); 476 enum itNum = 100; 477 BenchmarkData!(2, itNum) bench; 478 HashMap!(int, int) map; 479 byte[int] mapStandard; 480 //writeln(.getLoadFactor(map.length)); 481 //map.numA=map.numB=map.numC=0; 482 size_t lastAdded; 483 size_t numToAdd = 16 * 8; 484 485 foreach (b; 0 .. itNum) { 486 foreach (i; lastAdded .. lastAdded + numToAdd) { 487 mapStandard[cast(uint) i] = true; 488 } 489 lastAdded += numToAdd; 490 bench.start!(1)(b); 491 foreach (i; 0 .. 1000_00) { 492 auto ret = trueResults in mapStandard; 493 trueResults += 1; //cast(typeof(trueResults))(cast(bool)ret); 494 doNotOptimize(ret); 495 } 496 bench.end!(1)(b); 497 } 498 lastAdded = 0; 499 trueResults = 0; 500 foreach (b; 0 .. itNum) { 501 foreach (i; lastAdded .. lastAdded + numToAdd) { 502 map.add(cast(int) i, 1); 503 } 504 lastAdded += numToAdd; 505 bench.start!(0)(b); 506 foreach (i; 0 .. 1000_00) { 507 auto ret = map.isIn(trueResults); 508 trueResults += 1; //cast(typeof(trueResults))(ret); 509 doNotOptimize(ret); 510 } 511 bench.end!(0)(b); 512 } 513 //writeln(map.numA); 514 //writeln(map.numB); 515 doNotOptimize(trueResults); 516 bench.plotUsingGnuplot("test.png", ["my", "standard"]); 517 //map.saveGroupDistributionPlot("distr.png"); 518 519 }