Lindenii Project Forge
Basic swiss tables (SipHash only for now)
map_swiss_siphash: key-value map implemented with Swiss tables and SipHash
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 // SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu use bytes; // Deletes an item from a [[map]]. Returns the removed value or void. export fn del(m: *map, key: []u8) (*opaque | void) = { if (len(m.groups) == 0) return; let hv = hash64(m, key); let t = h2(hv); let mask = m.group_mask; let off: size = (h1(hv): size) & mask; let idx: size = 0; for (true) { let g = &m.groups[off]; for (let i = 0z; i < GROUP_SIZE; i += 1) { let c = g.ctrl[i]; if (is_full_ctrl(c) && c == t) { if (bytes::equal(g.keys[i], key)) { let v = g.vals[i]; g.ctrl[i] = CTRL_DELETED; g.keys[i] = []; g.vals[i] = null; m.used -= 1; m.tombs += 1; // elide the tombstones if exceed 1/3 of the capacity if (m.tombs * 3 >= capacity_slots(m)) { rehash_in_place(m); }; match (v) { case null => abort("map: null internal state escaped"); case let p: *opaque => return p; }; }; } else if (c == CTRL_EMPTY) { return; }; }; let next = probe_next(off, idx, mask); off = next.0; idx = next.1; }; };
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 // SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu // Frees resources associated with a [[map]]. export fn finish(m: *map) void = { if (len(m.groups) != 0) { free(m.groups); }; free(m); };
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 // SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu use bytes; // Gets an item from a [[map]] by key, returning void if not found. export fn get(m: *map, key: []u8) (*opaque | void) = { if (len(m.groups) == 0) return; let hv = hash64(m, key); let t = h2(hv); let mask = m.group_mask; let off: size = (h1(hv): size) & mask; let idx: size = 0; for (true) { let g = &m.groups[off]; for (let i = 0z; i < GROUP_SIZE; i += 1) { let c = g.ctrl[i]; if (is_full_ctrl(c) && c == t) { if (bytes::equal(g.keys[i], key)) { match (g.vals[i]) { case null => abort("map: null internal state escaped"); case let p: *opaque => return p; }; }; } else if (c == CTRL_EMPTY) { return; }; }; let next = probe_next(off, idx, mask); off = next.0; idx = next.1; }; };
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 // SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu use bytes; use hash; use hash::siphash; export def GROUP_SIZE: size = 8z; export def CTRL_EMPTY: u8 = 0x80; export def CTRL_DELETED: u8 = 0xFE; export type group = struct { ctrl: [GROUP_SIZE]u8, keys: [GROUP_SIZE][]u8, vals: [GROUP_SIZE]nullable *opaque, }; fn group_set_empty(g: *group) void = { for (let i = 0z; i < GROUP_SIZE; i += 1) { g.ctrl[i] = CTRL_EMPTY; g.keys[i] = []; g.vals[i] = null; }; }; fn is_full_ctrl(c: u8) bool = (c & 0x80) == 0 && c != CTRL_DELETED; fn hash64(m: *map, key: []u8) u64 = { let h = siphash::siphash(2, 4, &m.siphash_key); defer hash::close(&h); hash::write(&h, key); return siphash::sum(&h); }; fn h1(h: u64) u64 = h >> 7u64; fn h2(h: u64) u8 = (h & 0x7Fu64): u8; fn probe_next(off: size, idx: size, mask: size) (size, size) = { let nidx = idx + 1; let noff = (off + nidx) & mask; return (noff, nidx); }; fn capacity_slots(m: *map) size = (m.group_mask + 1) * GROUP_SIZE; fn max_used_with_tombs(m: *map) size = { return (capacity_slots(m) * 7z) / 8z; }; fn ensure_capacity_for_insert(m: *map) (void | nomem) = { if (m.used + m.tombs < max_used_with_tombs(m)) { return; }; return resize(m, (m.group_mask + 1) * 2); }; fn rehash_in_place(m: *map) void = { if (len(m.groups) == 0) return; let new_groups: []group = alloc([group{...}...], (m.group_mask + 1))!; for (let i = 0z; i < len(new_groups); i += 1) { group_set_empty(&new_groups[i]); }; let old = m.groups; m.groups = new_groups; let old_groups = old; let old_mask = m.group_mask; m.used = 0; m.tombs = 0; for (let gi = 0z; gi <= old_mask; gi += 1) { let g = &old_groups[gi]; for (let si = 0z; si < GROUP_SIZE; si += 1) { let c = g.ctrl[si]; if (!is_full_ctrl(c)) continue; let k = g.keys[si]; let v = g.vals[si]; unchecked_put(m, k, v); }; }; free(old_groups); }; fn resize(m: *map, new_groups_len: size) (void | nomem) = { if (new_groups_len == 0) new_groups_len = 1; let gs: []group = match (alloc([group{...}...], new_groups_len)) { case let a: []group => yield a; case nomem => return nomem; }; for (let i = 0z; i < len(gs); i += 1) { group_set_empty(&gs[i]); }; let old = m.groups; let old_mask = m.group_mask; m.groups = gs; m.group_mask = new_groups_len - 1; m.used = 0; m.tombs = 0; for (let gi = 0z; gi <= old_mask; gi += 1) { let g = &old[gi]; for (let si = 0z; si < GROUP_SIZE; si += 1) { let c = g.ctrl[si]; if (!is_full_ctrl(c)) continue; unchecked_put(m, g.keys[si], g.vals[si]); }; }; if (len(old) != 0) { free(old); }; }; fn unchecked_put(m: *map, key: []u8, val: nullable *opaque) void = { let hv = hash64(m, key); let t = h2(hv); let mask = m.group_mask; let off: size = (h1(hv): size) & mask; let idx: size = 0; for (true) { let g = &m.groups[off]; let first_dead: (size | void) = void; for (let i = 0z; i < GROUP_SIZE; i += 1) { let c = g.ctrl[i]; if (is_full_ctrl(c)) { continue; } else if (c == CTRL_DELETED) { if (first_dead is void) first_dead = i; } else { let slot = match (first_dead) { case void => yield i; case let di: size => yield di; }; g.keys[slot] = key; g.vals[slot] = val; g.ctrl[slot] = t; m.used += 1; if (slot == i) { void; } else { m.tombs -= 1; }; return; }; }; let next = probe_next(off, idx, mask); off = next.0; idx = next.1; }; };
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 // SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu use ds::map; // Swiss table based map from []u8 to *opaque. // // You are advised to create these with [[new]]. export type map = struct { vt: map::map, group_mask: size, used: size, tombs: size, siphash_key: [16]u8, groups: []group, }; const _vt: map::vtable = map::vtable { getter = &vt_get, setter = &vt_set, deleter = &vt_del, finisher = &vt_finish, }; fn vt_get(m: *map::map, key: []u8) (*opaque | void) = get(m: *map, key); fn vt_set(m: *map::map, key: []u8, v: *opaque) (void | nomem) = set(m: *map, key, v); fn vt_del(m: *map::map, key: []u8) (*opaque | void) = del(m: *map, key); fn vt_finish(m: *map::map) void = finish(m: *map);
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 // SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu use errors; use ds::map; // Creates a new [[map]] with an initial number of groups and SipHash key. // // n_groups must be greater than zero. export fn new(n_groups: size, siphash_key: [16]u8) (*map | errors::invalid | nomem) = { if (n_groups == 0) { return errors::invalid; }; let v: size = 1; for (v < n_groups) { v *= 2; }; let groups_count = v; let gs: []group = match (alloc([group{...}...]: []group, groups_count)) { case let a: []group => yield a; case nomem => return nomem; }; for (let i = 0z; i < len(gs); i += 1) { group_set_empty(&gs[i]); }; let m = alloc(map { vt = &_vt, group_mask = groups_count - 1, used = 0, tombs = 0, siphash_key = siphash_key, groups = gs, })?; return m; };
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 // SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu use bytes; // Sets an item in a [[map]], replacing any existing item with the same key. export fn set(m: *map, key: []u8, value: *opaque) (void | nomem) = { let need_insert = true; if (len(m.groups) != 0) { let hv0 = hash64(m, key); let t0 = h2(hv0); let mask0 = m.group_mask; let off0: size = (h1(hv0): size) & mask0; let idx0: size = 0; need_insert = false; for (true) { let g = &m.groups[off0]; for (let i = 0z; i < GROUP_SIZE; i += 1) { let c = g.ctrl[i]; if (is_full_ctrl(c) && c == t0) { if (bytes::equal(g.keys[i], key)) { g.vals[i] = value; return; }; } else if (c == CTRL_EMPTY) { need_insert = true; break; }; }; if (need_insert) { break; }; let next = probe_next(off0, idx0, mask0); off0 = next.0; idx0 = next.1; }; } else { need_insert = true; }; if (!need_insert) { return; }; match (ensure_capacity_for_insert(m)) { case void => yield; case nomem => return nomem; }; let hv = hash64(m, key); let t = h2(hv); let mask = m.group_mask; let off: size = (h1(hv): size) & mask; let idx: size = 0; for (true) { let g = &m.groups[off]; let first_dead: (size | void) = void; for (let i = 0z; i < GROUP_SIZE; i += 1) { let c = g.ctrl[i]; if (is_full_ctrl(c)) { if (c == t && bytes::equal(g.keys[i], key)) { g.vals[i] = value; return; }; continue; } else if (c == CTRL_DELETED) { if (first_dead is void) first_dead = i; } else { let slot = match (first_dead) { case void => yield i; case let di: size => yield di; }; g.keys[slot] = key; g.vals[slot] = value; g.ctrl[slot] = t; m.used += 1; if (slot != i) { m.tombs -= 1; }; return; }; }; let next = probe_next(off, idx, mask); off = next.0; idx = next.1; }; };
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 // SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu use crypto::random; use errors; use strings; use ds::map; @test fn roundtrip() void = { let key: [16]u8 = [0...]; random::buffer(&key); let m: *map = match (new(1, key)) { case let p: *map => yield p; case errors::invalid => abort("unexpected errors::invalid"); case nomem => abort("unexpected nomem"); }; defer finish(m); let v1 = 1, v2 = 2, v3 = 3; let p1: *opaque = (&v1: *opaque); let p2: *opaque = (&v2: *opaque); let p3: *opaque = (&v3: *opaque); let k1 = strings::toutf8("alpha"); let k2 = strings::toutf8("beta"); let k3 = strings::toutf8("gamma"); match (map::set(m, k1, p1)) { case void => yield; case nomem => abort("unexpected nomem in set(k1,p1)"); }; match (map::get(m, k1)) { case let got: *opaque => assert(got == p1, "get(k1) must return p1"); case void => abort("get(k1) unexpectedly void"); }; match (map::set(m, k1, p2)) { case void => yield; case nomem => abort("unexpected nomem in replace"); }; match (map::get(m, k1)) { case let got: *opaque => assert(got == p2, "replace must overwrite prior value"); case void => abort("get(k1) void after replace"); }; match (map::set(m, k2, p3)) { case void => yield; case nomem => abort("unexpected nomem in set(k2,p3)"); }; match (map::get(m, k3)) { case void => yield; case *opaque => abort("get(k3) must be void for missing key"); }; match (map::del(m, k2)) { case let got: *opaque => assert(got == p3, "del(k2) must return stored value"); case void => abort("del(k2) unexpectedly void"); }; match (map::del(m, k2)) { case void => yield; case *opaque => abort("del(k2) must be void after prior delete"); }; };