From 00deab2ef8de8b3e47825f47d8404d6a98143710 Mon Sep 17 00:00:00 2001 From: Runxi Yu Date: Tue, 16 Sep 2025 22:05:39 +0800 Subject: [PATCH] Basic swiss tables (SipHash only for now) --- ds/map/map_swiss_siphash/README | 1 + ds/map/map_swiss_siphash/del.ha | 47 +++++++++++++++++++++++++++++++++++++++++++++++ ds/map/map_swiss_siphash/finish.ha | 11 +++++++++++ ds/map/map_swiss_siphash/get.ha | 37 +++++++++++++++++++++++++++++++++++++ ds/map/map_swiss_siphash/internal.ha | 150 +++++++++++++++++++++++++++++++++++++++++++++++++++++ ds/map/map_swiss_siphash/map.ha | 29 +++++++++++++++++++++++++++++ ds/map/map_swiss_siphash/new.ha | 39 +++++++++++++++++++++++++++++++++++++++ ds/map/map_swiss_siphash/set.ha | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++ ds/map/map_swiss_siphash/test.ha | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++ diff --git a/ds/map/map_swiss_siphash/README b/ds/map/map_swiss_siphash/README new file mode 100644 index 0000000000000000000000000000000000000000..2078461fc8c5affae685c5f13929c150b8e33e55 --- /dev/null +++ b/ds/map/map_swiss_siphash/README @@ -0,0 +1 @@ +map_swiss_siphash: key-value map implemented with Swiss tables and SipHash diff --git a/ds/map/map_swiss_siphash/del.ha b/ds/map/map_swiss_siphash/del.ha new file mode 100644 index 0000000000000000000000000000000000000000..6be3062c7393342d00db2451f8853027872ba0cf --- /dev/null +++ b/ds/map/map_swiss_siphash/del.ha @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 +// SPDX-FileCopyrightText: 2024 The Cockroach Authors +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use bytes; + +// Deletes an item from a [[map]]. Returns the removed value or void. +export fn del(m: *map, key: []u8) (*opaque | void) = { + if (len(m.groups) == 0) return; + let hv = hash64(m, key); + let t = h2(hv); + let mask = m.group_mask; + let off: size = (h1(hv): size) & mask; + let idx: size = 0; + + for (true) { + let g = &m.groups[off]; + for (let i = 0z; i < GROUP_SIZE; i += 1) { + let c = g.ctrl[i]; + if (is_full_ctrl(c) && c == t) { + if (bytes::equal(g.keys[i], key)) { + let v = g.vals[i]; + g.ctrl[i] = CTRL_DELETED; + g.keys[i] = []; + g.vals[i] = null; + m.used -= 1; + m.tombs += 1; + // elide the tombstones if exceed 1/3 of the capacity + if (m.tombs * 3 >= capacity_slots(m)) { + rehash_in_place(m); + }; + match (v) { + case null => + abort("map: null internal state escaped"); + case let p: *opaque => + return p; + }; + }; + } else if (c == CTRL_EMPTY) { + return; + }; + }; + let next = probe_next(off, idx, mask); + off = next.0; + idx = next.1; + }; +}; diff --git a/ds/map/map_swiss_siphash/finish.ha b/ds/map/map_swiss_siphash/finish.ha new file mode 100644 index 0000000000000000000000000000000000000000..640dd673472c4a9817496e481eb97f2eded8d80f --- /dev/null +++ b/ds/map/map_swiss_siphash/finish.ha @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 +// SPDX-FileCopyrightText: 2024 The Cockroach Authors +// SPDX-FileCopyrightText: 2025 Runxi Yu + +// Frees resources associated with a [[map]]. +export fn finish(m: *map) void = { + if (len(m.groups) != 0) { + free(m.groups); + }; + free(m); +}; diff --git a/ds/map/map_swiss_siphash/get.ha b/ds/map/map_swiss_siphash/get.ha new file mode 100644 index 0000000000000000000000000000000000000000..daa9e0b3aa7c13ceebebbd614ec769b7cf200e5b --- /dev/null +++ b/ds/map/map_swiss_siphash/get.ha @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 +// SPDX-FileCopyrightText: 2024 The Cockroach Authors +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use bytes; + +// Gets an item from a [[map]] by key, returning void if not found. +export fn get(m: *map, key: []u8) (*opaque | void) = { + if (len(m.groups) == 0) return; + let hv = hash64(m, key); + let t = h2(hv); + let mask = m.group_mask; + let off: size = (h1(hv): size) & mask; + let idx: size = 0; + + for (true) { + let g = &m.groups[off]; + for (let i = 0z; i < GROUP_SIZE; i += 1) { + let c = g.ctrl[i]; + if (is_full_ctrl(c) && c == t) { + if (bytes::equal(g.keys[i], key)) { + match (g.vals[i]) { + case null => + abort("map: null internal state escaped"); + case let p: *opaque => + return p; + }; + }; + } else if (c == CTRL_EMPTY) { + return; + }; + }; + let next = probe_next(off, idx, mask); + off = next.0; + idx = next.1; + }; +}; diff --git a/ds/map/map_swiss_siphash/internal.ha b/ds/map/map_swiss_siphash/internal.ha new file mode 100644 index 0000000000000000000000000000000000000000..09fd5e9970e03c96ee3527bfbedaa39f80e3944b --- /dev/null +++ b/ds/map/map_swiss_siphash/internal.ha @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 +// SPDX-FileCopyrightText: 2024 The Cockroach Authors +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use bytes; +use hash; +use hash::siphash; + +export def GROUP_SIZE: size = 8z; +export def CTRL_EMPTY: u8 = 0x80; +export def CTRL_DELETED: u8 = 0xFE; + +export type group = struct { + ctrl: [GROUP_SIZE]u8, + keys: [GROUP_SIZE][]u8, + vals: [GROUP_SIZE]nullable *opaque, +}; + +fn group_set_empty(g: *group) void = { + for (let i = 0z; i < GROUP_SIZE; i += 1) { + g.ctrl[i] = CTRL_EMPTY; + g.keys[i] = []; + g.vals[i] = null; + }; +}; + +fn is_full_ctrl(c: u8) bool = (c & 0x80) == 0 && c != CTRL_DELETED; + +fn hash64(m: *map, key: []u8) u64 = { + let h = siphash::siphash(2, 4, &m.siphash_key); + defer hash::close(&h); + hash::write(&h, key); + return siphash::sum(&h); +}; + +fn h1(h: u64) u64 = h >> 7u64; +fn h2(h: u64) u8 = (h & 0x7Fu64): u8; + +fn probe_next(off: size, idx: size, mask: size) (size, size) = { + let nidx = idx + 1; + let noff = (off + nidx) & mask; + return (noff, nidx); +}; + +fn capacity_slots(m: *map) size = (m.group_mask + 1) * GROUP_SIZE; + +fn max_used_with_tombs(m: *map) size = { + return (capacity_slots(m) * 7z) / 8z; +}; + +fn ensure_capacity_for_insert(m: *map) (void | nomem) = { + if (m.used + m.tombs < max_used_with_tombs(m)) { + return; + }; + return resize(m, (m.group_mask + 1) * 2); +}; + +fn rehash_in_place(m: *map) void = { + if (len(m.groups) == 0) return; + let new_groups: []group = alloc([group{...}...], (m.group_mask + 1))!; + for (let i = 0z; i < len(new_groups); i += 1) { + group_set_empty(&new_groups[i]); + }; + let old = m.groups; + m.groups = new_groups; + let old_groups = old; + let old_mask = m.group_mask; + m.used = 0; + m.tombs = 0; + + for (let gi = 0z; gi <= old_mask; gi += 1) { + let g = &old_groups[gi]; + for (let si = 0z; si < GROUP_SIZE; si += 1) { + let c = g.ctrl[si]; + if (!is_full_ctrl(c)) continue; + let k = g.keys[si]; + let v = g.vals[si]; + unchecked_put(m, k, v); + }; + }; + free(old_groups); +}; + +fn resize(m: *map, new_groups_len: size) (void | nomem) = { + if (new_groups_len == 0) new_groups_len = 1; + let gs: []group = match (alloc([group{...}...], new_groups_len)) { + case let a: []group => yield a; + case nomem => return nomem; + }; + for (let i = 0z; i < len(gs); i += 1) { + group_set_empty(&gs[i]); + }; + let old = m.groups; + let old_mask = m.group_mask; + m.groups = gs; + m.group_mask = new_groups_len - 1; + m.used = 0; + m.tombs = 0; + + for (let gi = 0z; gi <= old_mask; gi += 1) { + let g = &old[gi]; + for (let si = 0z; si < GROUP_SIZE; si += 1) { + let c = g.ctrl[si]; + if (!is_full_ctrl(c)) continue; + unchecked_put(m, g.keys[si], g.vals[si]); + }; + }; + if (len(old) != 0) { + free(old); + }; +}; + +fn unchecked_put(m: *map, key: []u8, val: nullable *opaque) void = { + let hv = hash64(m, key); + let t = h2(hv); + let mask = m.group_mask; + let off: size = (h1(hv): size) & mask; + let idx: size = 0; + + for (true) { + let g = &m.groups[off]; + let first_dead: (size | void) = void; + for (let i = 0z; i < GROUP_SIZE; i += 1) { + let c = g.ctrl[i]; + if (is_full_ctrl(c)) { + continue; + } else if (c == CTRL_DELETED) { + if (first_dead is void) first_dead = i; + } else { + let slot = match (first_dead) { + case void => yield i; + case let di: size => yield di; + }; + g.keys[slot] = key; + g.vals[slot] = val; + g.ctrl[slot] = t; + m.used += 1; + if (slot == i) { + void; + } else { + m.tombs -= 1; + }; + return; + }; + }; + let next = probe_next(off, idx, mask); + off = next.0; + idx = next.1; + }; +}; diff --git a/ds/map/map_swiss_siphash/map.ha b/ds/map/map_swiss_siphash/map.ha new file mode 100644 index 0000000000000000000000000000000000000000..e70ef4ed44d0eed1067c55749d93bebac4ffeb19 --- /dev/null +++ b/ds/map/map_swiss_siphash/map.ha @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 +// SPDX-FileCopyrightText: 2024 The Cockroach Authors +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use ds::map; + +// Swiss table based map from []u8 to *opaque. +// +// You are advised to create these with [[new]]. +export type map = struct { + vt: map::map, + group_mask: size, + used: size, + tombs: size, + siphash_key: [16]u8, + groups: []group, +}; + +const _vt: map::vtable = map::vtable { + getter = &vt_get, + setter = &vt_set, + deleter = &vt_del, + finisher = &vt_finish, +}; + +fn vt_get(m: *map::map, key: []u8) (*opaque | void) = get(m: *map, key); +fn vt_set(m: *map::map, key: []u8, v: *opaque) (void | nomem) = set(m: *map, key, v); +fn vt_del(m: *map::map, key: []u8) (*opaque | void) = del(m: *map, key); +fn vt_finish(m: *map::map) void = finish(m: *map); diff --git a/ds/map/map_swiss_siphash/new.ha b/ds/map/map_swiss_siphash/new.ha new file mode 100644 index 0000000000000000000000000000000000000000..96e2dde3d2af665a179e8b305b0c321e03946b2a --- /dev/null +++ b/ds/map/map_swiss_siphash/new.ha @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 +// SPDX-FileCopyrightText: 2024 The Cockroach Authors +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use errors; +use ds::map; + +// Creates a new [[map]] with an initial number of groups and SipHash key. +// +// n_groups must be greater than zero. +export fn new(n_groups: size, siphash_key: [16]u8) (*map | errors::invalid | nomem) = { + if (n_groups == 0) { + return errors::invalid; + }; + + let v: size = 1; + for (v < n_groups) { + v *= 2; + }; + let groups_count = v; + + let gs: []group = match (alloc([group{...}...]: []group, groups_count)) { + case let a: []group => yield a; + case nomem => return nomem; + }; + for (let i = 0z; i < len(gs); i += 1) { + group_set_empty(&gs[i]); + }; + + let m = alloc(map { + vt = &_vt, + group_mask = groups_count - 1, + used = 0, + tombs = 0, + siphash_key = siphash_key, + groups = gs, + })?; + return m; +}; diff --git a/ds/map/map_swiss_siphash/set.ha b/ds/map/map_swiss_siphash/set.ha new file mode 100644 index 0000000000000000000000000000000000000000..6cc817d8ea99196cbf62efc2ec19592f42ea8d2c --- /dev/null +++ b/ds/map/map_swiss_siphash/set.ha @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 +// SPDX-FileCopyrightText: 2024 The Cockroach Authors +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use bytes; + +// Sets an item in a [[map]], replacing any existing item with the same key. +export fn set(m: *map, key: []u8, value: *opaque) (void | nomem) = { + let need_insert = true; + + if (len(m.groups) != 0) { + let hv0 = hash64(m, key); + let t0 = h2(hv0); + let mask0 = m.group_mask; + let off0: size = (h1(hv0): size) & mask0; + let idx0: size = 0; + + need_insert = false; + for (true) { + let g = &m.groups[off0]; + + for (let i = 0z; i < GROUP_SIZE; i += 1) { + let c = g.ctrl[i]; + if (is_full_ctrl(c) && c == t0) { + if (bytes::equal(g.keys[i], key)) { + g.vals[i] = value; + return; + }; + } else if (c == CTRL_EMPTY) { + need_insert = true; + break; + }; + }; + + if (need_insert) { + break; + }; + + let next = probe_next(off0, idx0, mask0); + off0 = next.0; + idx0 = next.1; + }; + } else { + need_insert = true; + }; + + if (!need_insert) { + return; + }; + + match (ensure_capacity_for_insert(m)) { + case void => yield; + case nomem => return nomem; + }; + + let hv = hash64(m, key); + let t = h2(hv); + let mask = m.group_mask; + let off: size = (h1(hv): size) & mask; + let idx: size = 0; + + for (true) { + let g = &m.groups[off]; + let first_dead: (size | void) = void; + + for (let i = 0z; i < GROUP_SIZE; i += 1) { + let c = g.ctrl[i]; + if (is_full_ctrl(c)) { + if (c == t && bytes::equal(g.keys[i], key)) { + g.vals[i] = value; + return; + }; + continue; + } else if (c == CTRL_DELETED) { + if (first_dead is void) first_dead = i; + } else { + let slot = match (first_dead) { + case void => yield i; + case let di: size => yield di; + }; + g.keys[slot] = key; + g.vals[slot] = value; + g.ctrl[slot] = t; + m.used += 1; + if (slot != i) { + m.tombs -= 1; + }; + return; + }; + }; + + let next = probe_next(off, idx, mask); + off = next.0; + idx = next.1; + }; +}; diff --git a/ds/map/map_swiss_siphash/test.ha b/ds/map/map_swiss_siphash/test.ha new file mode 100644 index 0000000000000000000000000000000000000000..7d146a4454b6f54f8ade08bc4bc7da6a1711b3c8 --- /dev/null +++ b/ds/map/map_swiss_siphash/test.ha @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 +// SPDX-FileCopyrightText: 2024 The Cockroach Authors +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use crypto::random; +use errors; +use strings; +use ds::map; + +@test fn roundtrip() void = { + let key: [16]u8 = [0...]; + random::buffer(&key); + let m: *map = match (new(1, key)) { + case let p: *map => yield p; + case errors::invalid => abort("unexpected errors::invalid"); + case nomem => abort("unexpected nomem"); + }; + defer finish(m); + + let v1 = 1, v2 = 2, v3 = 3; + let p1: *opaque = (&v1: *opaque); + let p2: *opaque = (&v2: *opaque); + let p3: *opaque = (&v3: *opaque); + + let k1 = strings::toutf8("alpha"); + let k2 = strings::toutf8("beta"); + let k3 = strings::toutf8("gamma"); + + match (map::set(m, k1, p1)) { + case void => yield; + case nomem => abort("unexpected nomem in set(k1,p1)"); + }; + + match (map::get(m, k1)) { + case let got: *opaque => + assert(got == p1, "get(k1) must return p1"); + case void => + abort("get(k1) unexpectedly void"); + }; + + match (map::set(m, k1, p2)) { + case void => yield; + case nomem => abort("unexpected nomem in replace"); + }; + match (map::get(m, k1)) { + case let got: *opaque => + assert(got == p2, "replace must overwrite prior value"); + case void => + abort("get(k1) void after replace"); + }; + + match (map::set(m, k2, p3)) { + case void => yield; + case nomem => abort("unexpected nomem in set(k2,p3)"); + }; + + match (map::get(m, k3)) { + case void => yield; + case *opaque => + abort("get(k3) must be void for missing key"); + }; + + match (map::del(m, k2)) { + case let got: *opaque => + assert(got == p3, "del(k2) must return stored value"); + case void => + abort("del(k2) unexpectedly void"); + }; + match (map::del(m, k2)) { + case void => yield; + case *opaque => + abort("del(k2) must be void after prior delete"); + }; +}; -- 2.48.1