Lindenii Project Forge
Add maps via FNV, SipHash, and B-tree
ds: data structures The "ds" module provides a collection of data structures implemented in pure Hare.
map: general-purpose map data structures You should create a map with the `new` function in each submodule. Then, you may use functions defined in this module to manipulate the map.
// A map is a pointer to a [[vtable]] which allows for map types to implement // common operations. export type map = *vtable; // The vtable type defines a set of virtual functions for a [[map]]. export type vtable = struct { getter: *getter, setter: *setter, deleter: *deleter, finisher: *finisher, }; // The interface for a map which could be used to get values. Returns either a // pointer to the value, or void if the key does not exist. export type getter = fn(m: *map, key: []u8) (*opaque | void); // Gets an item from a [[map]]. Returns a pointer to the value or void. export fn get(m: *map, key: []u8) (*opaque | void) = { return m.getter(m, key); }; // The interface for a map which could be used to set values. Returns void on // success, or nomem if memory allocation failed. If the value already exists, // it is replaced. export type setter = fn(m: *map, key: []u8, value: *opaque) (void | nomem); // Sets an item in a [[map]], replacing any existing item with the same key. export fn set(m: *map, key: []u8, value: *opaque) (void | nomem) = { return m.setter(m, key, value); }; // The interface for a map which could be used to delete values. Returns a // pointer to the deleted value, or void if the key does not exist. export type deleter = fn(m: *map, key: []u8) (*opaque | void); // Deletes an item from a [[map]]. Returns the removed value or void. export fn del(m: *map, key: []u8) (*opaque | void) = { return m.deleter(m, key); }; // The interface for a map which requires a finisher function to free it. export type finisher = fn(m: *map) void; // Frees the map and all of its resources. Do not use the map after calling. export fn finish(m: *map) void = { m.finisher(m); };
map_btree: key-value map implemented with a b-tree
// SPDX-License-Identifier: MPL-2.0 use bytes; use sort; // Deletes an item from a [[map]]. Returns the removed value or void. export fn del(m: *map, key: []u8) (*opaque | void) = { const r = delete_rec(m, m.root, key); if (len(m.root.keys) == 0 && !m.root.leaf) { m.root = m.root.children[0]; }; return r; };
// SPDX-License-Identifier: MPL-2.0 // Frees resources associated with a [[map]]. export fn finish(m: *map) void = { node_finish(m.root); free(m); }; fn node_finish(n: *node) void = { if (!n.leaf) { for (let i = 0z; i < len(n.children); i += 1) { node_finish(n.children[i]); }; free(n.children); }; free(n.keys); free(n.vals); free(n); };
// SPDX-License-Identifier: MPL-2.0 use bytes; use sort; // Gets an item from a [[map]] by key, returning void if not found. export fn get(m: *map, key: []u8) (*opaque | void) = { let x = m.root; for (true) { let i = sort::lbisect((x.keys: []const opaque), size([]u8), (&key: const *opaque), &cmp_u8slice); if (i < len(x.keys) && bytes::equal(x.keys[i], key)) { return x.vals[i]; }; if (x.leaf) { return; }; x = x.children[i]; }; };
// SPDX-License-Identifier: MPL-2.0 use bytes; use sort; fn keycmp(a: []u8, b: []u8) int = { let n = if (len(a) < len(b)) len(a) else len(b); for (let i = 0z; i < n; i += 1) { if (a[i] < b[i]) return -1; if (a[i] > b[i]) return 1; }; if (len(a) < len(b)) return -1; if (len(a) > len(b)) return 1; return 0; }; fn cmp_u8slice(a: const *opaque, b: const *opaque) int = { let sa = *(a: *[]u8); let sb = *(b: *[]u8); return keycmp(sa, sb); }; fn node_new(t: size, leaf: bool) (*node | nomem) = { let capk = 2 * t - 1; let capc = if (leaf) 0z else 2z * t; let empty_keys: [][]u8 = []; let keys = alloc(empty_keys, capk)?; let empty_vals: []*opaque = []; let vals = alloc(empty_vals, capk)?; let children: []*node = if (leaf) { yield []; } else { let empty_children: []*node = []; yield alloc(empty_children, capc)?; }; let nd = alloc(node { leaf = leaf, keys = keys, vals = vals, children = children, })?; return nd; }; fn split_child(m: *map, x: *node, i: size) (void | nomem) = { const t = m.t; let y = x.children[i]; let z = node_new(t, y.leaf)?; let medk = y.keys[t - 1]; let medv = y.vals[t - 1]; append(z.keys, y.keys[t..]...)?; append(z.vals, y.vals[t..]...)?; if (!y.leaf) { append(z.children, y.children[t..]...)?; }; y.keys = y.keys[..t - 1]; y.vals = y.vals[..t - 1]; if (!y.leaf) { y.children = y.children[..t]; }; insert(x.keys[i], medk)?; insert(x.vals[i], medv)?; insert(x.children[i + 1], z)?; }; fn insert_nonfull(m: *map, x: *node, key: []u8, val: *opaque) (void | nomem) = { let i = sort::lbisect((x.keys: []const opaque), size([]u8), (&key: const *opaque), &cmp_u8slice); if (i < len(x.keys) && bytes::equal(x.keys[i], key)) { x.vals[i] = val; return; }; if (x.leaf) { insert(x.keys[i], key)?; insert(x.vals[i], val)?; return; }; if (len(x.children[i].keys) == 2 * m.t - 1) { split_child(m, x, i)?; if (cmp_u8slice((&key: const *opaque), (&x.keys[i]: const *opaque)) > 0) { insert_nonfull(m, x.children[i + 1], key, val)?; return; }; }; insert_nonfull(m, x.children[i], key, val)?; }; fn merge_children(m: *map, x: *node, i: size) void = { let left = x.children[i]; let right = x.children[i + 1]; insert(left.keys[len(left.keys)], x.keys[i])!; insert(left.vals[len(left.vals)], x.vals[i])!; append(left.keys, right.keys...)!; append(left.vals, right.vals...)!; if (!left.leaf) { append(left.children, right.children...)!; }; delete(x.keys[i]); delete(x.vals[i]); delete(x.children[i + 1]); }; fn ensure_child_has_space(m: *map, x: *node, i: size) void = { const t = m.t; let c = x.children[i]; if (len(c.keys) >= t) return; if (i > 0 && len(x.children[i - 1].keys) >= t) { let ls = x.children[i - 1]; insert(c.keys[0], x.keys[i - 1])!; insert(c.vals[0], x.vals[i - 1])!; if (!c.leaf) { let moved = ls.children[len(ls.children) - 1]; insert(c.children[0], moved)!; delete(ls.children[len(ls.children) - 1]); }; x.keys[i - 1] = ls.keys[len(ls.keys) - 1]; x.vals[i - 1] = ls.vals[len(ls.vals) - 1]; delete(ls.keys[len(ls.keys) - 1]); delete(ls.vals[len(ls.vals) - 1]); return; }; if (i + 1 < len(x.children) && len(x.children[i + 1].keys) >= t) { let rs = x.children[i + 1]; insert(c.keys[len(c.keys)], x.keys[i])!; insert(c.vals[len(c.vals)], x.vals[i])!; if (!c.leaf) { let moved = rs.children[0]; insert(c.children[len(c.children)], moved)!; delete(rs.children[0]); }; x.keys[i] = rs.keys[0]; x.vals[i] = rs.vals[0]; delete(rs.keys[0]); delete(rs.vals[0]); return; }; if (i + 1 < len(x.children)) { merge_children(m, x, i); } else { merge_children(m, x, i - 1); }; }; fn pop_max(m: *map, x: *node) ([]u8, *opaque) = { let cur = x; for (!cur.leaf) { let last = len(cur.children) - 1; ensure_child_has_space(m, cur, last); cur = cur.children[last]; }; let k = cur.keys[len(cur.keys) - 1]; let v = cur.vals[len(cur.vals) - 1]; delete(cur.keys[len(cur.keys) - 1]); delete(cur.vals[len(cur.vals) - 1]); return (k, v); }; fn pop_min(m: *map, x: *node) ([]u8, *opaque) = { let cur = x; for (!cur.leaf) { ensure_child_has_space(m, cur, 0); cur = cur.children[0]; }; let k = cur.keys[0]; let v = cur.vals[0]; delete(cur.keys[0]); delete(cur.vals[0]); return (k, v); }; fn delete_rec(m: *map, x: *node, key: []u8) (*opaque | void) = { let i = sort::lbisect((x.keys: []const opaque), size([]u8), (&key: const *opaque), &cmp_u8slice); if (i < len(x.keys) && bytes::equal(x.keys[i], key)) { if (x.leaf) { let ret = x.vals[i]; delete(x.keys[i]); delete(x.vals[i]); return ret; }; const t = m.t; let y = x.children[i]; let z = x.children[i + 1]; if (len(y.keys) >= t) { let (pk, pv) = pop_max(m, y); let ret = x.vals[i]; x.keys[i] = pk; x.vals[i] = pv; return ret; } else if (len(z.keys) >= t) { let (sk, sv) = pop_min(m, z); let ret = x.vals[i]; x.keys[i] = sk; x.vals[i] = sv; return ret; } else { merge_children(m, x, i); return delete_rec(m, y, key); }; }; if (x.leaf) { return; }; ensure_child_has_space(m, x, i); return delete_rec(m, x.children[i], key); };
// SPDX-License-Identifier: MPL-2.0 use ds::map; // B-tree-based map from []u8 to *opaque. // // You are advised to create these with [[new]]. export type map = struct { vt: map::map, // Min degree t: size, root: *node, }; const _vt: map::vtable = map::vtable { getter = &vt_get, setter = &vt_set, deleter = &vt_del, finisher = &vt_finish, }; fn vt_get(m: *map::map, key: []u8) (*opaque | void) = get(m: *map, key); fn vt_set(m: *map::map, key: []u8, v: *opaque) (void | nomem) = set(m: *map, key, v); fn vt_del(m: *map::map, key: []u8) (*opaque | void) = del(m: *map, key); fn vt_finish(m: *map::map) void = finish(m: *map);
// SPDX-License-Identifier: MPL-2.0 use errors; // Creates a new [[map]] with minimum degree t. // // t must be greater than or equal to 2. export fn new(t: size) (*map | errors::invalid | nomem) = { if (t < 2) { return errors::invalid; }; let r = node_new(t, true)?; let m = alloc(map { vt = &_vt, t = t, root = r, })?; return m; };
export type node = struct { leaf: bool, keys: [][]u8, vals: []*opaque, children: []*node, };
// SPDX-License-Identifier: MPL-2.0 use bytes; use sort; // Sets an item in a [[map]], replacing any existing item with the same key. export fn set(m: *map, key: []u8, value: *opaque) (void | nomem) = { if (len(m.root.keys) == 2 * m.t - 1) { let s = node_new(m.t, false)?; append(s.children, m.root)?; m.root = s; split_child(m, s, 0)?; }; insert_nonfull(m, m.root, key, value)?; };
use errors; use strings; use ds::map; @test fn roundtrip() void = { let m: *map = match (new(2)) { case let p: *map => yield p; case errors::invalid => abort("unexpected errors::invalid"); case nomem => abort("unexpected nomem"); }; defer finish(m); let v1 = 1, v2 = 2, v3 = 3; let p1: *opaque = (&v1: *opaque); let p2: *opaque = (&v2: *opaque); let p3: *opaque = (&v3: *opaque); let k1 = strings::toutf8("alpha"); let k2 = strings::toutf8("beta"); let k3 = strings::toutf8("gamma"); match (map::set(m, k1, p1)) { case void => yield; case nomem => abort("unexpected nomem in set(k1,p1)"); }; match (map::get(m, k1)) { case let got: *opaque => assert(got == p1, "get(k1) must return p1"); case void => abort("get(k1) unexpectedly void"); }; match (map::set(m, k1, p2)) { case void => yield; case nomem => abort("unexpected nomem in replace"); }; match (map::get(m, k1)) { case let got: *opaque => assert(got == p2, "replace must overwrite prior value"); case void => abort("get(k1) void after replace"); }; match (map::set(m, k2, p3)) { case void => yield; case nomem => abort("unexpected nomem in set(k2,p3)"); }; match (map::get(m, k3)) { case void => yield; case *opaque => abort("get(k3) must be void for missing key"); }; match (map::del(m, k2)) { case let got: *opaque => assert(got == p3, "del(k2) must return stored value"); case void => abort("del(k2) unexpectedly void"); }; match (map::del(m, k2)) { case void => yield; case *opaque => abort("del(k2) must be void after prior delete"); }; };
map_fnv: key-value map implemented as a Fowler-Noll-Vo hashmap This module provides a simple implementation of a hashmap using the Fowler-Noll-Vo (FNV) hashing algorithm. FNV is not collision-resistant, so it should only be used for trusted keys (i.e., not user input that could be deliberately chosen to cause collisions).
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2024 Drew DeVault <drew@ddevault.org> // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use bytes; use hash; use hash::fnv; // Deletes an item from a [[map]]. export fn del(m: *map, key: []u8) (*opaque | void) = { let hash = fnv::fnv64a(); hash::write(&hash, key); let bucket = &m.buckets[fnv::sum64(&hash): size % m.n]; for (let i = 0z; i < len(bucket); i += 1) { if (bytes::equal(bucket[i].0, key)) { let item = bucket[i]; delete(bucket[i]); return item.1; }; }; };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2024 Drew DeVault <drew@ddevault.org> // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> // Frees resources associated with a [[map]]. export fn finish(m: *map) void = { for (let i = 0z; i < m.n; i += 1) { free(m.buckets[i]); }; free(m.buckets); free(m); };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2024 Drew DeVault <drew@ddevault.org> // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use bytes; use hash; use hash::fnv; // Gets an item from a [[map]] by key, returning void if not found. export fn get(m: *map, key: []u8) (*opaque | void) = { let hash = fnv::fnv64a(); hash::write(&hash, key); let bucket = &m.buckets[fnv::sum64(&hash): size % m.n]; for (let i = 0z; i < len(bucket); i += 1) { if (bytes::equal(bucket[i].0, key)) { return bucket[i].1; }; }; };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2024 Drew DeVault <drew@ddevault.org> // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use ds::map; // A simple hash map from byte strings to opaque pointers, using SipHash for // hashing and a linked list to resolve collisions. // // You are advised to create these with [[new]]. export type map = struct { vt: map::map, n: size, buckets: [][]([]u8, *opaque), }; const _vt: map::vtable = map::vtable { getter = &vt_get, setter = &vt_set, deleter = &vt_del, finisher = &vt_finish, }; fn vt_get(m: *map::map, key: []u8) (*opaque | void) = get(m: *map, key); fn vt_set(m: *map::map, key: []u8, v: *opaque) (void | nomem) = set(m: *map, key, v); fn vt_del(m: *map::map, key: []u8) (*opaque | void) = del(m: *map, key); fn vt_finish(m: *map::map) void = finish(m: *map);
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2024 Drew DeVault <drew@ddevault.org> // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use bytes; use errors; use hash; use hash::fnv; // Creates a new [[map]] with the given number of buckets. export fn new(n: size) (*map | errors::invalid | nomem) = { if (n == 0) { return errors::invalid; }; let empty_bucket: []([]u8, *opaque) = []; let buckets: [][]([]u8, *opaque) = alloc([empty_bucket...], n)?; let m = alloc(map { vt = &_vt, n = n, buckets = buckets, })?; return m; };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2024 Drew DeVault <drew@ddevault.org> // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use bytes; use hash; use hash::fnv; // Sets an item in a [[map]], replacing any existing item with the same key. export fn set(m: *map, key: []u8, value: *opaque) (void | nomem) = { let hash = fnv::fnv64a(); hash::write(&hash, key); let bucket = &m.buckets[fnv::sum64(&hash): size % m.n]; for (let i = 0z; i < len(bucket); i += 1) { if (bytes::equal(bucket[i].0, key)) { bucket[i].1 = value; return; }; }; append(bucket, (key, value))?; };
use errors; use strings; use ds::map; @test fn roundtrip() void = { let m: *map = match (new(16)) { case let p: *map => yield p; case errors::invalid => abort("unexpected errors::invalid"); case nomem => abort("unexpected nomem"); }; defer finish(m); let v1 = 1, v2 = 2, v3 = 3; let p1: *opaque = (&v1: *opaque); let p2: *opaque = (&v2: *opaque); let p3: *opaque = (&v3: *opaque); let k1 = strings::toutf8("alpha"); let k2 = strings::toutf8("beta"); let k3 = strings::toutf8("gamma"); match (map::set(m, k1, p1)) { case void => yield; case nomem => abort("unexpected nomem in set(k1,p1)"); }; match (map::get(m, k1)) { case let got: *opaque => assert(got == p1, "get(k1) must return p1"); case void => abort("get(k1) unexpectedly void"); }; match (map::set(m, k1, p2)) { case void => yield; case nomem => abort("unexpected nomem in replace"); }; match (map::get(m, k1)) { case let got: *opaque => assert(got == p2, "replace must overwrite prior value"); case void => abort("get(k1) void after replace"); }; match (map::set(m, k2, p3)) { case void => yield; case nomem => abort("unexpected nomem in set(k2,p3)"); }; match (map::get(m, k3)) { case void => yield; case *opaque => abort("get(k3) must be void for missing key"); }; match (map::del(m, k2)) { case let got: *opaque => assert(got == p3, "del(k2) must return stored value"); case void => abort("del(k2) unexpectedly void"); }; match (map::del(m, k2)) { case void => yield; case *opaque => abort("del(k2) must be void after prior delete"); }; };
map_siphash: key-value map implemented as a SipHash hashmap This module provides a simple implementation of a hashmap using the SipHash hashing algorithm for collision-resistant mapping. It is designed for situations where keys may be untrusted.
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2024 Drew DeVault <drew@ddevault.org> // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use bytes; use hash; use hash::siphash; // Deletes an item from a [[map]]. export fn del(m: *map, key: []u8) (*opaque | void) = { let hash = siphash::siphash(2, 4, &m.siphash_key); defer hash::close(&hash); hash::write(&hash, key); let bucket = &m.buckets[siphash::sum(&hash): size % m.n]; for (let i = 0z; i < len(bucket); i += 1) { if (bytes::equal(bucket[i].0, key)) { let item = bucket[i]; delete(bucket[i]); return item.1; }; }; };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2024 Drew DeVault <drew@ddevault.org> // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> // Frees resources associated with a [[map]]. export fn finish(m: *map) void = { for (let i = 0z; i < m.n; i += 1) { free(m.buckets[i]); }; free(m.buckets); free(m); };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2024 Drew DeVault <drew@ddevault.org> // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use bytes; use hash; use hash::siphash; // Gets an item from a [[map]] by key, returning void if not found. export fn get(m: *map, key: []u8) (*opaque | void) = { let hash = siphash::siphash(2, 4, &m.siphash_key); defer hash::close(&hash); hash::write(&hash, key); let bucket = &m.buckets[siphash::sum(&hash): size % m.n]; for (let i = 0z; i < len(bucket); i += 1) { if (bytes::equal(bucket[i].0, key)) { return bucket[i].1; }; }; };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2024 Drew DeVault <drew@ddevault.org> // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use ds::map; // A simple hash map from byte strings to opaque pointers, using SipHash for // hashing and a linked list to resolve collisions. // // You are advised to create these with [[new]]. export type map = struct { vt: map::map, n: size, siphash_key: [16]u8, buckets: [][]([]u8, *opaque), }; const _vt: map::vtable = map::vtable { getter = &vt_get, setter = &vt_set, deleter = &vt_del, finisher = &vt_finish, }; fn vt_get(m: *map::map, key: []u8) (*opaque | void) = get(m: *map, key); fn vt_set(m: *map::map, key: []u8, v: *opaque) (void | nomem) = set(m: *map, key, v); fn vt_del(m: *map::map, key: []u8) (*opaque | void) = del(m: *map, key); fn vt_finish(m: *map::map) void = finish(m: *map);
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2024 Drew DeVault <drew@ddevault.org> // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use bytes; use errors; use hash; use hash::siphash; // Creates a new [[map]] with the given number of buckets and SipHash key. export fn new(n: size, siphash_key: [16]u8) (*map | errors::invalid | nomem) = { if (n == 0) { return errors::invalid; }; let empty_bucket: []([]u8, *opaque) = []; let buckets: [][]([]u8, *opaque) = alloc([empty_bucket...], n)?; let m = alloc(map { vt = &_vt, n = n, siphash_key = siphash_key, buckets = buckets, })?; return m; };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2024 Drew DeVault <drew@ddevault.org> // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use bytes; use hash; use hash::siphash; // Sets an item in a [[map]], replacing any existing item with the same key. export fn set(m: *map, key: []u8, value: *opaque) (void | nomem) = { let hash = siphash::siphash(2, 4, &m.siphash_key); defer hash::close(&hash); hash::write(&hash, key); let bucket = &m.buckets[siphash::sum(&hash): size % m.n]; for (let i = 0z; i < len(bucket); i += 1) { if (bytes::equal(bucket[i].0, key)) { bucket[i].1 = value; return; }; }; append(bucket, (key, value))?; };
use errors; use strings; use ds::map; @test fn roundtrip() void = { const key: [16]u8 = [0...]; let m: *map = match (new(16, key)) { case let p: *map => yield p; case errors::invalid => abort("unexpected errors::invalid"); case nomem => abort("unexpected nomem"); }; defer finish(m); let v1 = 1, v2 = 2, v3 = 3; let p1: *opaque = (&v1: *opaque); let p2: *opaque = (&v2: *opaque); let p3: *opaque = (&v3: *opaque); let k1 = strings::toutf8("alpha"); let k2 = strings::toutf8("beta"); let k3 = strings::toutf8("gamma"); match (map::set(m, k1, p1)) { case void => yield; case nomem => abort("unexpected nomem in set(k1,p1)"); }; match (map::get(m, k1)) { case let got: *opaque => assert(got == p1, "get(k1) must return p1"); case void => abort("get(k1) unexpectedly void"); }; match (map::set(m, k1, p2)) { case void => yield; case nomem => abort("unexpected nomem in replace"); }; match (map::get(m, k1)) { case let got: *opaque => assert(got == p2, "replace must overwrite prior value"); case void => abort("get(k1) void after replace"); }; match (map::set(m, k2, p3)) { case void => yield; case nomem => abort("unexpected nomem in set(k2,p3)"); }; match (map::get(m, k3)) { case void => yield; case *opaque => abort("get(k3) must be void for missing key"); }; match (map::del(m, k2)) { case let got: *opaque => assert(got == p3, "del(k2) must return stored value"); case void => abort("del(k2) unexpectedly void"); }; match (map::del(m, k2)) { case void => yield; case *opaque => abort("del(k2) must be void after prior delete"); }; };