Lindenii Project Forge
Add generic swiss tables implementation
swiss: generic swiss tables
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 // SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu use bytes; // Deletes an item from a [[map]]. Returns the removed value or void. export fn del(m: *map, key: []u8) (*opaque | void) = { if (len(m.groups) == 0) return; let hv = m.hash64(m.hash_params, key): u64; let t = h2(hv); let mask = m.group_mask; let off: size = (h1(hv): size) & mask; let idx: size = 0; for (true) { let g = &m.groups[off]; for (let i = 0z; i < GROUP_SIZE; i += 1) { let c = g.ctrl[i]; if (is_full_ctrl(c) && c == t) { if (bytes::equal(g.keys[i], key)) { let v = g.vals[i]; g.ctrl[i] = CTRL_DELETED; g.keys[i] = []; g.vals[i] = null; m.used -= 1; m.tombs += 1; // elide the tombstones if exceed 1/3 of the capacity if (m.tombs * 3 >= capacity_slots(m)) { rehash_in_place(m); }; match (v) { case null => abort("map: null internal state escaped"); case let p: *opaque => return p; }; }; } else if (c == CTRL_EMPTY) { return; }; }; let next = probe_next(off, idx, mask); off = next.0; idx = next.1; }; };
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 // SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu // Frees resources associated with a [[map]]. export fn finish(m: *map) void = { if (len(m.groups) != 0) { free(m.groups); }; free(m); };
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 // SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu use bytes; // Gets an item from a [[map]] by key, returning void if not found. export fn get(m: *map, key: []u8) (*opaque | void) = { if (len(m.groups) == 0) return; let hv = m.hash64(m.hash_params, key): u64; let t = h2(hv); let mask = m.group_mask; let off: size = (h1(hv): size) & mask; let idx: size = 0; for (true) { let g = &m.groups[off]; for (let i = 0z; i < GROUP_SIZE; i += 1) { let c = g.ctrl[i]; if (is_full_ctrl(c) && c == t) { if (bytes::equal(g.keys[i], key)) { match (g.vals[i]) { case null => abort("map: null internal state escaped"); case let p: *opaque => return p; }; }; } else if (c == CTRL_EMPTY) { return; }; }; let next = probe_next(off, idx, mask); off = next.0; idx = next.1; }; };
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 // SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu export def GROUP_SIZE: size = 8z; export def CTRL_EMPTY: u8 = 0x80; export def CTRL_DELETED: u8 = 0xFE; export type group = struct { ctrl: [GROUP_SIZE]u8, keys: [GROUP_SIZE][]u8, vals: [GROUP_SIZE]nullable *opaque, }; fn group_set_empty(g: *group) void = { for (let i = 0z; i < GROUP_SIZE; i += 1) { g.ctrl[i] = CTRL_EMPTY; g.keys[i] = []; g.vals[i] = null; }; }; fn is_full_ctrl(c: u8) bool = (c & 0x80) == 0 && c != CTRL_DELETED; fn h1(h: u64) u64 = h >> 7u64; fn h2(h: u64) u8 = (h & 0x7Fu64): u8; fn probe_next(off: size, idx: size, mask: size) (size, size) = { let nidx = idx + 1; let noff = (off + nidx) & mask; return (noff, nidx); }; fn capacity_slots(m: *map) size = (m.group_mask + 1) * GROUP_SIZE; fn max_used_with_tombs(m: *map) size = { return (capacity_slots(m) * 7z) / 8z; }; fn ensure_capacity_for_insert(m: *map) (void | nomem) = { if (m.used + m.tombs < max_used_with_tombs(m)) { return; }; return resize(m, (m.group_mask + 1) * 2); }; fn rehash_in_place(m: *map) void = { if (len(m.groups) == 0) return; let new_groups: []group = alloc([group{...}...], (m.group_mask + 1))!; for (let i = 0z; i < len(new_groups); i += 1) { group_set_empty(&new_groups[i]); }; let old = m.groups; m.groups = new_groups; let old_groups = old; let old_mask = m.group_mask; m.used = 0; m.tombs = 0; for (let gi = 0z; gi <= old_mask; gi += 1) { let g = &old_groups[gi]; for (let si = 0z; si < GROUP_SIZE; si += 1) { let c = g.ctrl[si]; if (!is_full_ctrl(c)) continue; let k = g.keys[si]; let v = g.vals[si]; unchecked_put(m, k, v); }; }; free(old_groups); }; fn resize(m: *map, new_groups_len: size) (void | nomem) = { if (new_groups_len == 0) new_groups_len = 1; let gs: []group = match (alloc([group{...}...]: []group, new_groups_len)) { case let a: []group => yield a; case nomem => return nomem; }; for (let i = 0z; i < len(gs); i += 1) { group_set_empty(&gs[i]); }; let old = m.groups; let old_mask = m.group_mask; m.groups = gs; m.group_mask = new_groups_len - 1; m.used = 0; m.tombs = 0; for (let gi = 0z; gi <= old_mask; gi += 1) { let g = &old[gi]; for (let si = 0z; si < GROUP_SIZE; si += 1) { let c = g.ctrl[si]; if (!is_full_ctrl(c)) continue; unchecked_put(m, g.keys[si], g.vals[si]); }; }; if (len(old) != 0) { free(old); }; }; fn unchecked_put(m: *map, key: []u8, val: nullable *opaque) void = { let hv = m.hash64(m.hash_params, key): u64; let t = h2(hv); let mask = m.group_mask; let off: size = (h1(hv): size) & mask; let idx: size = 0; for (true) { let g = &m.groups[off]; let first_dead: (size | void) = void; for (let i = 0z; i < GROUP_SIZE; i += 1) { let c = g.ctrl[i]; if (is_full_ctrl(c)) { continue; } else if (c == CTRL_DELETED) { if (first_dead is void) first_dead = i; } else { let slot = match (first_dead) { case void => yield i; case let di: size => yield di; }; g.keys[slot] = key; g.vals[slot] = val; g.ctrl[slot] = t; m.used += 1; if (slot == i) { void; } else { m.tombs -= 1; }; return; }; }; let next = probe_next(off, idx, mask); off = next.0; idx = next.1; }; };
// SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu use ds::map; // Generic map based on swiss tables from []u8 to *opaque. // // You are advised to create these with [[new]]. export type map = struct { vt: map::map, group_mask: size, used: size, tombs: size, hash64: *fn(hash_params: nullable *opaque, key: []u8) size, hash_params: nullable *opaque, groups: []group, }; const _vt: map::vtable = map::vtable { getter = &vt_get, setter = &vt_set, deleter = &vt_del, finisher = &vt_finish, }; fn vt_get(m: *map::map, key: []u8) (*opaque | void) = get(m: *map, key); fn vt_set(m: *map::map, key: []u8, v: *opaque) (void | nomem) = set(m: *map, key, v); fn vt_del(m: *map::map, key: []u8) (*opaque | void) = del(m: *map, key); fn vt_finish(m: *map::map) void = finish(m: *map);
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 // SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu use errors; // Creates a new [[map]] with an initial number of groups and hash function. // // n_groups must be greater than zero. export fn new( n_groups: size, hash64: *fn(hash_params: nullable *opaque, key: []u8) size, hash_params: nullable *opaque, ) (*map | errors::invalid | nomem) = { if (n_groups == 0) { return errors::invalid; }; let v: size = 1; for (v < n_groups) { v *= 2; }; let groups_count = v; let gs: []group = match (alloc([group{...}...]: []group, groups_count)) { case let a: []group => yield a; case nomem => return nomem; }; for (let i = 0z; i < len(gs); i += 1) { group_set_empty(&gs[i]); }; let m = alloc(map { vt = &_vt, group_mask = groups_count - 1, used = 0, tombs = 0, hash64 = hash64, hash_params = hash_params, groups = gs, })?; return m; };
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 // SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu use bytes; // Sets an item in a [[map]], replacing any existing item with the same key. export fn set(m: *map, key: []u8, value: *opaque) (void | nomem) = { let need_insert = true; if (len(m.groups) != 0) { let hv0 = m.hash64(m.hash_params, key); let t0 = h2(hv0: u64); let mask0 = m.group_mask; let off0: size = (h1(hv0: u64): size) & mask0; let idx0: size = 0; need_insert = false; for (true) { let g = &m.groups[off0]; for (let i = 0z; i < GROUP_SIZE; i += 1) { let c = g.ctrl[i]; if (is_full_ctrl(c) && c == t0) { if (bytes::equal(g.keys[i], key)) { g.vals[i] = value; return; }; } else if (c == CTRL_EMPTY) { need_insert = true; break; }; }; if (need_insert) { break; }; let next = probe_next(off0, idx0, mask0); off0 = next.0; idx0 = next.1; }; } else { need_insert = true; }; if (!need_insert) { return; }; match (ensure_capacity_for_insert(m)) { case void => yield; case nomem => return nomem; }; let hv = m.hash64(m.hash_params, key): u64; let t = h2(hv); let mask = m.group_mask; let off: size = (h1(hv): size) & mask; let idx: size = 0; for (true) { let g = &m.groups[off]; let first_dead: (size | void) = void; for (let i = 0z; i < GROUP_SIZE; i += 1) { let c = g.ctrl[i]; if (is_full_ctrl(c)) { if (c == t && bytes::equal(g.keys[i], key)) { g.vals[i] = value; return; }; continue; } else if (c == CTRL_DELETED) { if (first_dead is void) first_dead = i; } else { let slot = match (first_dead) { case void => yield i; case let di: size => yield di; }; g.keys[slot] = key; g.vals[slot] = value; g.ctrl[slot] = t; m.used += 1; if (slot != i) { m.tombs -= 1; }; return; }; }; let next = probe_next(off, idx, mask); off = next.0; idx = next.1; }; };
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0
// SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>
use bytes;
use ds::map;
// Deletes an item from a [[map]]. Returns the removed value or void.
// Deletes an item from a [[map]].
export fn del(m: *map, key: []u8) (*opaque | void) = {
if (len(m.groups) == 0) return; let hv = hash64(m, key): u64; let t = h2(hv); let mask = m.group_mask; let off: size = (h1(hv): size) & mask; let idx: size = 0; for (true) { let g = &m.groups[off]; for (let i = 0z; i < GROUP_SIZE; i += 1) { let c = g.ctrl[i]; if (is_full_ctrl(c) && c == t) { if (bytes::equal(g.keys[i], key)) { let v = g.vals[i]; g.ctrl[i] = CTRL_DELETED; g.keys[i] = []; g.vals[i] = null; m.used -= 1; m.tombs += 1; // elide the tombstones if exceed 1/3 of the capacity if (m.tombs * 3 >= capacity_slots(m)) { rehash_in_place(m); }; match (v) { case null => abort("map: null internal state escaped"); case let p: *opaque => return p; }; }; } else if (c == CTRL_EMPTY) { return; }; }; let next = probe_next(off, idx, mask); off = next.0; idx = next.1; };
return map::del(m.inner, key);
};
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0
// SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use ds::map;
// Frees resources associated with a [[map]]. export fn finish(m: *map) void = {
if (len(m.groups) != 0) { free(m.groups); };
map::finish(m.inner); free(m.key);
free(m); };
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0
// SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>
use bytes;
use ds::map;
// Gets an item from a [[map]] by key, returning void if not found. export fn get(m: *map, key: []u8) (*opaque | void) = {
if (len(m.groups) == 0) return; let hv = hash64(m, key): u64; let t = h2(hv); let mask = m.group_mask; let off: size = (h1(hv): size) & mask; let idx: size = 0; for (true) { let g = &m.groups[off]; for (let i = 0z; i < GROUP_SIZE; i += 1) { let c = g.ctrl[i]; if (is_full_ctrl(c) && c == t) { if (bytes::equal(g.keys[i], key)) { match (g.vals[i]) { case null => abort("map: null internal state escaped"); case let p: *opaque => return p; }; }; } else if (c == CTRL_EMPTY) { return; }; }; let next = probe_next(off, idx, mask); off = next.0; idx = next.1; };
return map::get(m.inner, key);
};
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0
// SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>
use bytes;
use hash; use hash::siphash;
export def GROUP_SIZE: size = 8z; export def CTRL_EMPTY: u8 = 0x80; export def CTRL_DELETED: u8 = 0xFE; export type group = struct { ctrl: [GROUP_SIZE]u8, keys: [GROUP_SIZE][]u8, vals: [GROUP_SIZE]nullable *opaque, }; fn group_set_empty(g: *group) void = { for (let i = 0z; i < GROUP_SIZE; i += 1) { g.ctrl[i] = CTRL_EMPTY; g.keys[i] = []; g.vals[i] = null;
fn hash64(params: nullable *opaque, key: []u8) size = { let keyptr = match (params) { case null => abort("ds::map::swiss_siphash: missing key"); case let p: *opaque => yield (p: *[16]u8);
};
}; fn is_full_ctrl(c: u8) bool = (c & 0x80) == 0 && c != CTRL_DELETED;
fn hash64(m: *map, key: []u8) size = { let h = siphash::siphash(2, 4, &m.siphash_key);
let h = siphash::siphash(2, 4, keyptr);
defer hash::close(&h); hash::write(&h, key); return siphash::sum(&h): size; };
fn h1(h: u64) u64 = h >> 7u64; fn h2(h: u64) u8 = (h & 0x7Fu64): u8; fn probe_next(off: size, idx: size, mask: size) (size, size) = { let nidx = idx + 1; let noff = (off + nidx) & mask; return (noff, nidx); }; fn capacity_slots(m: *map) size = (m.group_mask + 1) * GROUP_SIZE; fn max_used_with_tombs(m: *map) size = { return (capacity_slots(m) * 7z) / 8z; }; fn ensure_capacity_for_insert(m: *map) (void | nomem) = { if (m.used + m.tombs < max_used_with_tombs(m)) { return; }; return resize(m, (m.group_mask + 1) * 2); }; fn rehash_in_place(m: *map) void = { if (len(m.groups) == 0) return; let new_groups: []group = alloc([group{...}...], (m.group_mask + 1))!; for (let i = 0z; i < len(new_groups); i += 1) { group_set_empty(&new_groups[i]); }; let old = m.groups; m.groups = new_groups; let old_groups = old; let old_mask = m.group_mask; m.used = 0; m.tombs = 0; for (let gi = 0z; gi <= old_mask; gi += 1) { let g = &old_groups[gi]; for (let si = 0z; si < GROUP_SIZE; si += 1) { let c = g.ctrl[si]; if (!is_full_ctrl(c)) continue; let k = g.keys[si]; let v = g.vals[si]; unchecked_put(m, k, v); }; }; free(old_groups); }; fn resize(m: *map, new_groups_len: size) (void | nomem) = { if (new_groups_len == 0) new_groups_len = 1; let gs: []group = match (alloc([group{...}...], new_groups_len)) { case let a: []group => yield a; case nomem => return nomem; }; for (let i = 0z; i < len(gs); i += 1) { group_set_empty(&gs[i]); }; let old = m.groups; let old_mask = m.group_mask; m.groups = gs; m.group_mask = new_groups_len - 1; m.used = 0; m.tombs = 0; for (let gi = 0z; gi <= old_mask; gi += 1) { let g = &old[gi]; for (let si = 0z; si < GROUP_SIZE; si += 1) { let c = g.ctrl[si]; if (!is_full_ctrl(c)) continue; unchecked_put(m, g.keys[si], g.vals[si]); }; }; if (len(old) != 0) { free(old); }; }; fn unchecked_put(m: *map, key: []u8, val: nullable *opaque) void = { let hv = hash64(m, key): u64; let t = h2(hv); let mask = m.group_mask; let off: size = (h1(hv): size) & mask; let idx: size = 0; for (true) { let g = &m.groups[off]; let first_dead: (size | void) = void; for (let i = 0z; i < GROUP_SIZE; i += 1) { let c = g.ctrl[i]; if (is_full_ctrl(c)) { continue; } else if (c == CTRL_DELETED) { if (first_dead is void) first_dead = i; } else { let slot = match (first_dead) { case void => yield i; case let di: size => yield di; }; g.keys[slot] = key; g.vals[slot] = val; g.ctrl[slot] = t; m.used += 1; if (slot == i) { void; } else { m.tombs -= 1; }; return; }; }; let next = probe_next(off, idx, mask); off = next.0; idx = next.1; }; };
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0
// SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>
use ds::map;
// Swiss table based map from []u8 to *opaque.
// Swiss-table based map from []u8 to *opaque, using SipHash.
// // You are advised to create these with [[new]]. export type map = struct { vt: map::map,
group_mask: size, used: size, tombs: size, siphash_key: [16]u8, groups: []group,
inner: *map::map, key: *[16]u8,
}; const _vt: map::vtable = map::vtable { getter = &vt_get, setter = &vt_set, deleter = &vt_del, finisher = &vt_finish, }; fn vt_get(m: *map::map, key: []u8) (*opaque | void) = get(m: *map, key); fn vt_set(m: *map::map, key: []u8, v: *opaque) (void | nomem) = set(m: *map, key, v); fn vt_del(m: *map::map, key: []u8) (*opaque | void) = del(m: *map, key); fn vt_finish(m: *map::map) void = finish(m: *map);
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0
// SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>
use errors; use ds::map;
use ds::map::swiss;
// Creates a new [[map]] with an initial number of groups and SipHash key.
// Creates a new [[map]] with an initial number of groups and the SipHash key.
// // n_groups must be greater than zero.
export fn new(n_groups: size, siphash_key: [16]u8) (*map | errors::invalid | nomem) = { if (n_groups == 0) { return errors::invalid; }; let v: size = 1; for (v < n_groups) { v *= 2; }; let groups_count = v; let gs: []group = match (alloc([group{...}...]: []group, groups_count)) { case let a: []group => yield a;
export fn new( n_groups: size, siphash_key: [16]u8, ) (*map | errors::invalid | nomem) = { let keybox = match (alloc(siphash_key)) { case let kp: *[16]u8 => yield kp;
case nomem => return nomem; };
for (let i = 0z; i < len(gs); i += 1) { group_set_empty(&gs[i]);
let inner = match (swiss::new( n_groups, &hash64, (keybox: *opaque), )) { case let sm: *swiss::map => yield (sm: *map::map); case errors::invalid => free(keybox); return errors::invalid; case nomem => free(keybox); return nomem;
};
let m = alloc(map {
let m = match (alloc(map {
vt = &_vt,
group_mask = groups_count - 1, used = 0, tombs = 0, siphash_key = siphash_key, groups = gs, })?;
inner = inner, key = keybox, })) { case let p: *map => yield p; case nomem => map::finish(inner); free(keybox); return nomem; };
return m; };
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0
// SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>
use bytes;
use ds::map;
// Sets an item in a [[map]], replacing any existing item with the same key. export fn set(m: *map, key: []u8, value: *opaque) (void | nomem) = {
let need_insert = true; if (len(m.groups) != 0) { let hv0 = hash64(m, key); let t0 = h2(hv0); let mask0 = m.group_mask; let off0: size = (h1(hv0): size) & mask0; let idx0: size = 0; need_insert = false; for (true) { let g = &m.groups[off0]; for (let i = 0z; i < GROUP_SIZE; i += 1) { let c = g.ctrl[i]; if (is_full_ctrl(c) && c == t0) { if (bytes::equal(g.keys[i], key)) { g.vals[i] = value; return; }; } else if (c == CTRL_EMPTY) { need_insert = true; break; }; }; if (need_insert) { break; }; let next = probe_next(off0, idx0, mask0); off0 = next.0; idx0 = next.1; }; } else { need_insert = true; }; if (!need_insert) { return; }; match (ensure_capacity_for_insert(m)) { case void => yield; case nomem => return nomem; }; let hv = hash64(m, key); let t = h2(hv); let mask = m.group_mask; let off: size = (h1(hv): size) & mask; let idx: size = 0; for (true) { let g = &m.groups[off]; let first_dead: (size | void) = void; for (let i = 0z; i < GROUP_SIZE; i += 1) { let c = g.ctrl[i]; if (is_full_ctrl(c)) { if (c == t && bytes::equal(g.keys[i], key)) { g.vals[i] = value; return; }; continue; } else if (c == CTRL_DELETED) { if (first_dead is void) first_dead = i; } else { let slot = match (first_dead) { case void => yield i; case let di: size => yield di; }; g.keys[slot] = key; g.vals[slot] = value; g.ctrl[slot] = t; m.used += 1; if (slot != i) { m.tombs -= 1; }; return; }; }; let next = probe_next(off, idx, mask); off = next.0; idx = next.1; };
return map::set(m.inner, key, value);
};