Skip to content

Commit a71917a

Browse files
authored
Merge pull request #5941 from odin-lang/bill/typeid-sip-hash
Use SIP hash as name canonicalization hash
2 parents 1fb95da + 21116a7 commit a71917a

File tree

1 file changed

+146
-9
lines changed

1 file changed

+146
-9
lines changed

src/name_canonicalization.cpp

Lines changed: 146 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -242,9 +242,146 @@ gb_internal gb_inline void type_set_clear(TypeSet *s) {
242242
typedef TYPE_WRITER_PROC(TypeWriterProc);
243243

244244

245+
enum { SIP_BLOCK_SIZE = 8 };
246+
247+
struct SipHashContext {
248+
u64 v0, v1, v2, v3; // State values
249+
u64 k0, k1; // Split key
250+
isize c_rounds; // Number of message rounds
251+
isize d_rounds; // Number of finalization rounds
252+
u8 buf[SIP_BLOCK_SIZE]; // Provided data
253+
isize last_block; // offset from last block
254+
isize total_length;
255+
bool is_initialized;
256+
};
257+
258+
struct TypeidHashContext {
259+
SipHashContext sip;
260+
};
261+
262+
263+
void typeid_hash_context_init(TypeidHashContext *hash_ctx) {
264+
SipHashContext *sip = &hash_ctx->sip;
265+
sip->c_rounds = 2;
266+
sip->d_rounds = 4;
267+
268+
// some random numbers to act as the seed
269+
sip->k0 = 0xa6592ea25e04ac3cull;
270+
sip->k1 = 0xba3cba04ed28a9aeull;
271+
272+
//
273+
sip->v0 = 0x736f6d6570736575 ^ sip->k0;
274+
sip->v1 = 0x646f72616e646f6d ^ sip->k1;
275+
sip->v2 = 0x6c7967656e657261 ^ sip->k0;
276+
sip->v3 = 0x7465646279746573 ^ sip->k1;
277+
278+
sip->last_block = 0;
279+
sip->total_length = 0;
280+
281+
sip->is_initialized = true;
282+
}
283+
284+
u64 rotate_left64(u64 x, u64 k) {
285+
static u64 const n = 64;
286+
u64 s = k & (n-1);
287+
return (x<<s) | (x>>(n-2));
288+
}
289+
290+
void sip_compress(SipHashContext *sip) {
291+
sip->v0 += sip->v1;
292+
sip->v1 = rotate_left64(sip->v1, 13);
293+
sip->v1 ^= sip->v0;
294+
sip->v0 = rotate_left64(sip->v0, 32);
295+
sip->v2 += sip->v3;
296+
sip->v3 = rotate_left64(sip->v3, 16);
297+
sip->v3 ^= sip->v2;
298+
sip->v0 += sip->v3;
299+
sip->v3 = rotate_left64(sip->v3, 21);
300+
sip->v3 ^= sip->v0;
301+
sip->v2 += sip->v1;
302+
sip->v1 = rotate_left64(sip->v1, 17);
303+
sip->v1 ^= sip->v2;
304+
sip->v2 = rotate_left64(sip->v2, 32);
305+
}
306+
307+
void sip_block(SipHashContext *sip, void const *ptr, isize len) {
308+
u8 const *data = cast(u8 const *)ptr;
309+
while (len >= SIP_BLOCK_SIZE) {
310+
u64 m = 0;
311+
gb_memcopy(&m, data, 8);
312+
313+
sip->v3 ^= m;
314+
315+
for (isize i = 0; i < sip->c_rounds; i++) {
316+
sip_compress(sip);
317+
}
318+
319+
sip->v0 ^= m;
320+
321+
data += SIP_BLOCK_SIZE;
322+
len -= SIP_BLOCK_SIZE;
323+
}
324+
}
325+
326+
void typeid_hash_context_update(TypeidHashContext *ctx, void const *ptr, isize len) {
327+
GB_ASSERT(ctx->sip.is_initialized);
328+
SipHashContext *sip = &ctx->sip;
329+
330+
u8 const *data = cast(u8 const *)ptr;
331+
sip->total_length += len;
332+
if (sip->last_block > 0) {
333+
isize n = gb_min(SIP_BLOCK_SIZE - sip->last_block, len);
334+
gb_memcopy(sip->buf + sip->last_block, data, n);
335+
sip->last_block += n;
336+
if (sip->last_block == SIP_BLOCK_SIZE) {
337+
sip_block(sip, sip->buf, SIP_BLOCK_SIZE);
338+
sip->last_block = 0;
339+
}
340+
data += n;
341+
len -= n;
342+
}
343+
344+
if (len >= SIP_BLOCK_SIZE) {
345+
isize n = len & ~(SIP_BLOCK_SIZE-1);
346+
sip_block(sip, data, n);
347+
data += n;
348+
len -= n;
349+
}
350+
if (len > 0) {
351+
isize n = gb_min(SIP_BLOCK_SIZE, len);
352+
gb_memcopy(sip->buf, data, n);
353+
sip->last_block = n;
354+
}
355+
}
356+
357+
u64 typeid_hash_context_fini(TypeidHashContext *ctx) {
358+
GB_ASSERT(ctx->sip.is_initialized);
359+
SipHashContext *sip = &ctx->sip;
360+
361+
u8 tmp[SIP_BLOCK_SIZE] = {};
362+
gb_memcopy(tmp, sip->buf, gb_min(sip->last_block, SIP_BLOCK_SIZE));
363+
tmp[7] = u8(sip->total_length & 0xff);
364+
sip_block(sip, tmp, SIP_BLOCK_SIZE);
365+
366+
sip->v2 ^= 0xff;
367+
368+
for (isize i = 0; i < sip->d_rounds; i++) {
369+
sip_compress(sip);
370+
}
371+
372+
u64 res = sip->v0 ^ sip->v1 ^ sip->v2 ^ sip->v3;
373+
374+
*sip = {};
375+
376+
return res ? res : 1;
377+
}
378+
379+
380+
245381
struct TypeWriter {
246-
TypeWriterProc *proc;
247-
void *user_data;
382+
TypeWriterProc * proc;
383+
void * user_data;
384+
TypeidHashContext hash_ctx;
248385
};
249386

250387
bool type_writer_append(TypeWriter *w, void const *ptr, isize len) {
@@ -289,13 +426,14 @@ void type_writer_destroy_string(TypeWriter *w) {
289426

290427

291428
TYPE_WRITER_PROC(type_writer_hasher_writer_proc) {
292-
u64 *seed = cast(u64 *)w->user_data;
293-
*seed = fnv64a(ptr, len, *seed);
429+
TypeidHashContext *ctx = cast(TypeidHashContext *)w->user_data;
430+
typeid_hash_context_update(ctx, ptr, len);
294431
return true;
295432
}
296433

297-
void type_writer_make_hasher(TypeWriter *w, u64 *hash) {
298-
w->user_data = hash;
434+
void type_writer_make_hasher(TypeWriter *w, TypeidHashContext *ctx) {
435+
typeid_hash_context_init(ctx);
436+
w->user_data = ctx;
299437
w->proc = type_writer_hasher_writer_proc;
300438
}
301439

@@ -378,11 +516,10 @@ gb_internal u64 type_hash_canonical_type(Type *type) {
378516
return prev_hash;
379517
}
380518

381-
u64 hash = fnv64a(nullptr, 0);
382519
TypeWriter w = {};
383-
type_writer_make_hasher(&w, &hash);
520+
type_writer_make_hasher(&w, &w.hash_ctx);
384521
write_type_to_canonical_string(&w, type);
385-
hash = hash ? hash : 1;
522+
u64 hash = typeid_hash_context_fini(&w.hash_ctx);
386523

387524
type->canonical_hash.store(hash, std::memory_order_relaxed);
388525

0 commit comments

Comments
 (0)