Lindenii Project Forge
Login

hare-ds

Data structures for Hare
Commit info
ID
00deab2ef8de8b3e47825f47d8404d6a98143710
Author
Runxi Yu <me@runxiyu.org>
Author date
Tue, 16 Sep 2025 22:05:39 +0800
Committer
Runxi Yu <me@runxiyu.org>
Committer date
Tue, 16 Sep 2025 22:22:45 +0800
Actions
Basic swiss tables (SipHash only for now)
map_swiss_siphash: key-value map implemented with Swiss tables and SipHash
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0
// SPDX-FileCopyrightText: 2024 The Cockroach Authors
// SPDX-FileCopyrightText: 2025 Runxi Yu

use bytes;

// Deletes an item from a [[map]]. Returns the removed value or void.
export fn del(m: *map, key: []u8) (*opaque | void) = {
	if (len(m.groups) == 0) return;
	let hv = hash64(m, key);
	let t = h2(hv);
	let mask = m.group_mask;
	let off: size = (h1(hv): size) & mask;
	let idx: size = 0;

	for (true) {
		let g = &m.groups[off];
		for (let i = 0z; i < GROUP_SIZE; i += 1) {
			let c = g.ctrl[i];
			if (is_full_ctrl(c) && c == t) {
				if (bytes::equal(g.keys[i], key)) {
					let v = g.vals[i];
					g.ctrl[i] = CTRL_DELETED;
					g.keys[i] = [];
					g.vals[i] = null;
					m.used -= 1;
					m.tombs += 1;
					// elide the tombstones if exceed 1/3 of the capacity
					if (m.tombs * 3 >= capacity_slots(m)) {
						rehash_in_place(m);
					};
					match (v) {
					case null =>
						abort("map: null internal state escaped");
					case let p: *opaque =>
						return p;
					};
				};
			} else if (c == CTRL_EMPTY) {
				return;
			};
		};
		let next = probe_next(off, idx, mask);
		off = next.0;
		idx = next.1;
	};
};
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0
// SPDX-FileCopyrightText: 2024 The Cockroach Authors
// SPDX-FileCopyrightText: 2025 Runxi Yu

// Frees resources associated with a [[map]].
export fn finish(m: *map) void = {
	if (len(m.groups) != 0) {
		free(m.groups);
	};
	free(m);
};
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0
// SPDX-FileCopyrightText: 2024 The Cockroach Authors
// SPDX-FileCopyrightText: 2025 Runxi Yu

use bytes;

// Gets an item from a [[map]] by key, returning void if not found.
export fn get(m: *map, key: []u8) (*opaque | void) = {
	if (len(m.groups) == 0) return;
	let hv = hash64(m, key);
	let t = h2(hv);
	let mask = m.group_mask;
	let off: size = (h1(hv): size) & mask;
	let idx: size = 0;

	for (true) {
		let g = &m.groups[off];
		for (let i = 0z; i < GROUP_SIZE; i += 1) {
			let c = g.ctrl[i];
			if (is_full_ctrl(c) && c == t) {
				if (bytes::equal(g.keys[i], key)) {
					match (g.vals[i]) {
					case null =>
						abort("map: null internal state escaped");
					case let p: *opaque =>
						return p;
					};
				};
			} else if (c == CTRL_EMPTY) {
				return;
			};
		};
		let next = probe_next(off, idx, mask);
		off = next.0;
		idx = next.1;
	};
};
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0
// SPDX-FileCopyrightText: 2024 The Cockroach Authors
// SPDX-FileCopyrightText: 2025 Runxi Yu

use bytes;
use hash;
use hash::siphash;

export def GROUP_SIZE: size = 8z;
export def CTRL_EMPTY: u8 = 0x80;
export def CTRL_DELETED: u8 = 0xFE;

export type group = struct {
	ctrl: [GROUP_SIZE]u8,
	keys: [GROUP_SIZE][]u8,
	vals: [GROUP_SIZE]nullable *opaque,
};

fn group_set_empty(g: *group) void = {
	for (let i = 0z; i < GROUP_SIZE; i += 1) {
		g.ctrl[i] = CTRL_EMPTY;
		g.keys[i] = [];
		g.vals[i] = null;
	};
};

fn is_full_ctrl(c: u8) bool = (c & 0x80) == 0 && c != CTRL_DELETED;

fn hash64(m: *map, key: []u8) u64 = {
	let h = siphash::siphash(2, 4, &m.siphash_key);
	defer hash::close(&h);
	hash::write(&h, key);
	return siphash::sum(&h);
};

fn h1(h: u64) u64 = h >> 7u64;
fn h2(h: u64) u8 = (h & 0x7Fu64): u8;

fn probe_next(off: size, idx: size, mask: size) (size, size) = {
	let nidx = idx + 1;
	let noff = (off + nidx) & mask;
	return (noff, nidx);
};

fn capacity_slots(m: *map) size = (m.group_mask + 1) * GROUP_SIZE;

fn max_used_with_tombs(m: *map) size = {
	return (capacity_slots(m) * 7z) / 8z;
};

fn ensure_capacity_for_insert(m: *map) (void | nomem) = {
	if (m.used + m.tombs < max_used_with_tombs(m)) {
		return;
	};
	return resize(m, (m.group_mask + 1) * 2);
};

fn rehash_in_place(m: *map) void = {
	if (len(m.groups) == 0) return;
	let new_groups: []group = alloc([group{...}...], (m.group_mask + 1))!;
	for (let i = 0z; i < len(new_groups); i += 1) {
		group_set_empty(&new_groups[i]);
	};
	let old = m.groups;
	m.groups = new_groups;
	let old_groups = old;
	let old_mask = m.group_mask;
	m.used = 0;
	m.tombs = 0;

	for (let gi = 0z; gi <= old_mask; gi += 1) {
		let g = &old_groups[gi];
		for (let si = 0z; si < GROUP_SIZE; si += 1) {
			let c = g.ctrl[si];
			if (!is_full_ctrl(c)) continue;
			let k = g.keys[si];
			let v = g.vals[si];
			unchecked_put(m, k, v);
		};
	};
	free(old_groups);
};

fn resize(m: *map, new_groups_len: size) (void | nomem) = {
	if (new_groups_len == 0) new_groups_len = 1;
	let gs: []group = match (alloc([group{...}...], new_groups_len)) {
	case let a: []group => yield a;
	case nomem => return nomem;
	};
	for (let i = 0z; i < len(gs); i += 1) {
		group_set_empty(&gs[i]);
	};
	let old = m.groups;
	let old_mask = m.group_mask;
	m.groups = gs;
	m.group_mask = new_groups_len - 1;
	m.used = 0;
	m.tombs = 0;

	for (let gi = 0z; gi <= old_mask; gi += 1) {
		let g = &old[gi];
		for (let si = 0z; si < GROUP_SIZE; si += 1) {
			let c = g.ctrl[si];
			if (!is_full_ctrl(c)) continue;
			unchecked_put(m, g.keys[si], g.vals[si]);
		};
	};
	if (len(old) != 0) {
		free(old);
	};
};

fn unchecked_put(m: *map, key: []u8, val: nullable *opaque) void = {
	let hv = hash64(m, key);
	let t = h2(hv);
	let mask = m.group_mask;
	let off: size = (h1(hv): size) & mask;
	let idx: size = 0;

	for (true) {
		let g = &m.groups[off];
		let first_dead: (size | void) = void;
		for (let i = 0z; i < GROUP_SIZE; i += 1) {
			let c = g.ctrl[i];
			if (is_full_ctrl(c)) {
				continue;
			} else if (c == CTRL_DELETED) {
				if (first_dead is void) first_dead = i;
			} else {
				let slot = match (first_dead) {
				case void => yield i;
				case let di: size => yield di;
				};
				g.keys[slot] = key;
				g.vals[slot] = val;
				g.ctrl[slot] = t;
				m.used += 1;
				if (slot == i) {
					void;
				} else {
					m.tombs -= 1;
				};
				return;
			};
		};
		let next = probe_next(off, idx, mask);
		off = next.0;
		idx = next.1;
	};
};
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0
// SPDX-FileCopyrightText: 2024 The Cockroach Authors
// SPDX-FileCopyrightText: 2025 Runxi Yu

use ds::map;

// Swiss table based map from []u8 to *opaque.
//
// You are advised to create these with [[new]].
export type map = struct {
	vt: map::map,
	group_mask: size,
	used: size,
	tombs: size,
	siphash_key: [16]u8,
	groups: []group,
};

const _vt: map::vtable = map::vtable {
	getter   = &vt_get,
	setter   = &vt_set,
	deleter  = &vt_del,
	finisher = &vt_finish,
};

fn vt_get(m: *map::map, key: []u8) (*opaque | void) = get(m: *map, key);
fn vt_set(m: *map::map, key: []u8, v: *opaque) (void | nomem) = set(m: *map, key, v);
fn vt_del(m: *map::map, key: []u8) (*opaque | void) = del(m: *map, key);
fn vt_finish(m: *map::map) void = finish(m: *map);
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0
// SPDX-FileCopyrightText: 2024 The Cockroach Authors
// SPDX-FileCopyrightText: 2025 Runxi Yu

use errors;
use ds::map;

// Creates a new [[map]] with an initial number of groups and SipHash key.
//
// n_groups must be greater than zero.
export fn new(n_groups: size, siphash_key: [16]u8) (*map | errors::invalid | nomem) = {
	if (n_groups == 0) {
		return errors::invalid;
	};

	let v: size = 1;
	for (v < n_groups) {
		v *= 2;
	};
	let groups_count = v;

	let gs: []group = match (alloc([group{...}...]: []group, groups_count)) {
	case let a: []group => yield a;
	case nomem => return nomem;
	};
	for (let i = 0z; i < len(gs); i += 1) {
		group_set_empty(&gs[i]);
	};

	let m = alloc(map {
		vt = &_vt,
		group_mask = groups_count - 1,
		used = 0,
		tombs = 0,
		siphash_key = siphash_key,
		groups = gs,
	})?;
	return m;
};
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0
// SPDX-FileCopyrightText: 2024 The Cockroach Authors
// SPDX-FileCopyrightText: 2025 Runxi Yu

use bytes;

// Sets an item in a [[map]], replacing any existing item with the same key.
export fn set(m: *map, key: []u8, value: *opaque) (void | nomem) = {
	let need_insert = true;

	if (len(m.groups) != 0) {
		let hv0 = hash64(m, key);
		let t0 = h2(hv0);
		let mask0 = m.group_mask;
		let off0: size = (h1(hv0): size) & mask0;
		let idx0: size = 0;

		need_insert = false;
		for (true) {
			let g = &m.groups[off0];

			for (let i = 0z; i < GROUP_SIZE; i += 1) {
				let c = g.ctrl[i];
				if (is_full_ctrl(c) && c == t0) {
					if (bytes::equal(g.keys[i], key)) {
						g.vals[i] = value;
						return;
					};
				} else if (c == CTRL_EMPTY) {
					need_insert = true;
					break;
				};
			};

			if (need_insert) {
				break;
			};

			let next = probe_next(off0, idx0, mask0);
			off0 = next.0;
			idx0 = next.1;
		};
	} else {
		need_insert = true;
	};

	if (!need_insert) {
		return;
	};

	match (ensure_capacity_for_insert(m)) {
	case void => yield;
	case nomem => return nomem;
	};

	let hv = hash64(m, key);
	let t = h2(hv);
	let mask = m.group_mask;
	let off: size = (h1(hv): size) & mask;
	let idx: size = 0;

	for (true) {
		let g = &m.groups[off];
		let first_dead: (size | void) = void;

		for (let i = 0z; i < GROUP_SIZE; i += 1) {
			let c = g.ctrl[i];
			if (is_full_ctrl(c)) {
				if (c == t && bytes::equal(g.keys[i], key)) {
					g.vals[i] = value;
					return;
				};
				continue;
			} else if (c == CTRL_DELETED) {
				if (first_dead is void) first_dead = i;
			} else {
				let slot = match (first_dead) {
				case void => yield i;
				case let di: size => yield di;
				};
				g.keys[slot] = key;
				g.vals[slot] = value;
				g.ctrl[slot] = t;
				m.used += 1;
				if (slot != i) {
					m.tombs -= 1;
				};
				return;
			};
		};

		let next = probe_next(off, idx, mask);
		off = next.0;
		idx = next.1;
	};
};
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0
// SPDX-FileCopyrightText: 2024 The Cockroach Authors
// SPDX-FileCopyrightText: 2025 Runxi Yu

use crypto::random;
use errors;
use strings;
use ds::map;

@test fn roundtrip() void = {
	let key: [16]u8 = [0...];
	random::buffer(&key);
	let m: *map = match (new(1, key)) {
	case let p: *map => yield p;
	case errors::invalid => abort("unexpected errors::invalid");
	case nomem => abort("unexpected nomem");
	};
	defer finish(m);

	let v1 = 1, v2 = 2, v3 = 3;
	let p1: *opaque = (&v1: *opaque);
	let p2: *opaque = (&v2: *opaque);
	let p3: *opaque = (&v3: *opaque);

	let k1 = strings::toutf8("alpha");
	let k2 = strings::toutf8("beta");
	let k3 = strings::toutf8("gamma");

	match (map::set(m, k1, p1)) {
	case void => yield;
	case nomem => abort("unexpected nomem in set(k1,p1)");
	};

	match (map::get(m, k1)) {
	case let got: *opaque =>
		assert(got == p1, "get(k1) must return p1");
	case void =>
		abort("get(k1) unexpectedly void");
	};

	match (map::set(m, k1, p2)) {
	case void => yield;
	case nomem => abort("unexpected nomem in replace");
	};
	match (map::get(m, k1)) {
	case let got: *opaque =>
		assert(got == p2, "replace must overwrite prior value");
	case void =>
		abort("get(k1) void after replace");
	};

	match (map::set(m, k2, p3)) {
	case void => yield;
	case nomem => abort("unexpected nomem in set(k2,p3)");
	};

	match (map::get(m, k3)) {
	case void => yield;
	case *opaque =>
		abort("get(k3) must be void for missing key");
	};

	match (map::del(m, k2)) {
	case let got: *opaque =>
		assert(got == p3, "del(k2) must return stored value");
	case void =>
		abort("del(k2) unexpectedly void");
	};
	match (map::del(m, k2)) {
	case void => yield;
	case *opaque =>
		abort("del(k2) must be void after prior delete");
	};
};