From 5a1db0bed83a24d46f579cee8d8934113614013d Mon Sep 17 00:00:00 2001 From: Runxi Yu Date: Tue, 16 Sep 2025 16:56:02 +0800 Subject: [PATCH] Add maps via FNV, SipHash, and B-tree --- ds/README | 4 ++++ ds/map/README | 4 ++++ ds/map/map.ha | 47 +++++++++++++++++++++++++++++++++++++++++++++++ ds/map/map_btree/README | 1 + ds/map/map_btree/del.ha | 14 ++++++++++++++ ds/map/map_btree/finish.ha | 19 +++++++++++++++++++ ds/map/map_btree/get.ha | 19 +++++++++++++++++++ ds/map/map_btree/internal.ha | 236 +++++++++++++++++++++++++++++++++++++++++++++++++++++ ds/map/map_btree/map.ha | 25 +++++++++++++++++++++++++ ds/map/map_btree/new.ha | 19 +++++++++++++++++++ ds/map/map_btree/node.ha | 7 +++++++ ds/map/map_btree/set.ha | 15 +++++++++++++++ ds/map/map_btree/test.ha | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++ ds/map/map_fnv/README | 7 +++++++ ds/map/map_fnv/del.ha | 22 ++++++++++++++++++++++ ds/map/map_fnv/finish.ha | 13 +++++++++++++ ds/map/map_fnv/get.ha | 19 +++++++++++++++++++ ds/map/map_fnv/map.ha | 27 +++++++++++++++++++++++++++ ds/map/map_fnv/new.ha | 26 ++++++++++++++++++++++++++ ds/map/map_fnv/set.ha | 21 +++++++++++++++++++++ ds/map/map_fnv/test.ha | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++ ds/map/map_siphash/README | 5 +++++ ds/map/map_siphash/del.ha | 23 +++++++++++++++++++++++ ds/map/map_siphash/finish.ha | 13 +++++++++++++ ds/map/map_siphash/get.ha | 20 ++++++++++++++++++++ ds/map/map_siphash/map.ha | 28 ++++++++++++++++++++++++++++ ds/map/map_siphash/new.ha | 27 +++++++++++++++++++++++++++ ds/map/map_siphash/set.ha | 22 ++++++++++++++++++++++ ds/map/map_siphash/test.ha | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++ diff --git a/ds/README b/ds/README new file mode 100644 index 0000000000000000000000000000000000000000..81591ed57b9a78c59cafc58d7944790ed0d51a7b --- /dev/null +++ b/ds/README @@ -0,0 +1,4 @@ +ds: data structures + +The "ds" module provides a collection of data structures implemented in +pure Hare. diff --git a/ds/map/README b/ds/map/README new file mode 100644 index 0000000000000000000000000000000000000000..bd63a93daa4f04f6bbe8043cd069f308a4a34bdb --- /dev/null +++ b/ds/map/README @@ -0,0 +1,4 @@ +map: general-purpose map data structures + +You should create a map with the `new` function in each submodule. +Then, you may use functions defined in this module to manipulate the map. diff --git a/ds/map/map.ha b/ds/map/map.ha new file mode 100644 index 0000000000000000000000000000000000000000..8b6fc22b95bc894131617be51805a0651900c759 --- /dev/null +++ b/ds/map/map.ha @@ -0,0 +1,47 @@ +// A map is a pointer to a [[vtable]] which allows for map types to implement +// common operations. +export type map = *vtable; + +// The vtable type defines a set of virtual functions for a [[map]]. +export type vtable = struct { + getter: *getter, + setter: *setter, + deleter: *deleter, + finisher: *finisher, +}; + +// The interface for a map which could be used to get values. Returns either a +// pointer to the value, or void if the key does not exist. +export type getter = fn(m: *map, key: []u8) (*opaque | void); + +// Gets an item from a [[map]]. Returns a pointer to the value or void. +export fn get(m: *map, key: []u8) (*opaque | void) = { + return m.getter(m, key); +}; + +// The interface for a map which could be used to set values. Returns void on +// success, or nomem if memory allocation failed. If the value already exists, +// it is replaced. +export type setter = fn(m: *map, key: []u8, value: *opaque) (void | nomem); + +// Sets an item in a [[map]], replacing any existing item with the same key. +export fn set(m: *map, key: []u8, value: *opaque) (void | nomem) = { + return m.setter(m, key, value); +}; + +// The interface for a map which could be used to delete values. Returns a +// pointer to the deleted value, or void if the key does not exist. +export type deleter = fn(m: *map, key: []u8) (*opaque | void); + +// Deletes an item from a [[map]]. Returns the removed value or void. +export fn del(m: *map, key: []u8) (*opaque | void) = { + return m.deleter(m, key); +}; + +// The interface for a map which requires a finisher function to free it. +export type finisher = fn(m: *map) void; + +// Frees the map and all of its resources. Do not use the map after calling. +export fn finish(m: *map) void = { + m.finisher(m); +}; diff --git a/ds/map/map_btree/README b/ds/map/map_btree/README new file mode 100644 index 0000000000000000000000000000000000000000..fd40e753e4485fef0f4f236732133f601224a9f6 --- /dev/null +++ b/ds/map/map_btree/README @@ -0,0 +1 @@ +map_btree: key-value map implemented with a b-tree diff --git a/ds/map/map_btree/del.ha b/ds/map/map_btree/del.ha new file mode 100644 index 0000000000000000000000000000000000000000..edc6a8e7d22da33512f58f68ab146768fc802f64 --- /dev/null +++ b/ds/map/map_btree/del.ha @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: MPL-2.0 + +use bytes; +use sort; + +// Deletes an item from a [[map]]. Returns the removed value or void. +export fn del(m: *map, key: []u8) (*opaque | void) = { + const r = delete_rec(m, m.root, key); + if (len(m.root.keys) == 0 && !m.root.leaf) { + m.root = m.root.children[0]; + }; + return r; +}; + diff --git a/ds/map/map_btree/finish.ha b/ds/map/map_btree/finish.ha new file mode 100644 index 0000000000000000000000000000000000000000..e8a6f231a1e25d99ad642bbd01848ca438a454f5 --- /dev/null +++ b/ds/map/map_btree/finish.ha @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: MPL-2.0 + +// Frees resources associated with a [[map]]. +export fn finish(m: *map) void = { + node_finish(m.root); + free(m); +}; + +fn node_finish(n: *node) void = { + if (!n.leaf) { + for (let i = 0z; i < len(n.children); i += 1) { + node_finish(n.children[i]); + }; + free(n.children); + }; + free(n.keys); + free(n.vals); + free(n); +}; diff --git a/ds/map/map_btree/get.ha b/ds/map/map_btree/get.ha new file mode 100644 index 0000000000000000000000000000000000000000..a5b8a9728877451c9013523357c9e65730d785a6 --- /dev/null +++ b/ds/map/map_btree/get.ha @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: MPL-2.0 + +use bytes; +use sort; + +// Gets an item from a [[map]] by key, returning void if not found. +export fn get(m: *map, key: []u8) (*opaque | void) = { + let x = m.root; + for (true) { + let i = sort::lbisect((x.keys: []const opaque), size([]u8), (&key: const *opaque), &cmp_u8slice); + if (i < len(x.keys) && bytes::equal(x.keys[i], key)) { + return x.vals[i]; + }; + if (x.leaf) { + return; + }; + x = x.children[i]; + }; +}; diff --git a/ds/map/map_btree/internal.ha b/ds/map/map_btree/internal.ha new file mode 100644 index 0000000000000000000000000000000000000000..f16cbd67dc5dde90ef5986204157120af2aed0c5 --- /dev/null +++ b/ds/map/map_btree/internal.ha @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: MPL-2.0 + +use bytes; +use sort; + +fn keycmp(a: []u8, b: []u8) int = { + let n = if (len(a) < len(b)) len(a) else len(b); + for (let i = 0z; i < n; i += 1) { + if (a[i] < b[i]) return -1; + if (a[i] > b[i]) return 1; + }; + if (len(a) < len(b)) return -1; + if (len(a) > len(b)) return 1; + return 0; +}; + +fn cmp_u8slice(a: const *opaque, b: const *opaque) int = { + let sa = *(a: *[]u8); + let sb = *(b: *[]u8); + return keycmp(sa, sb); +}; + +fn node_new(t: size, leaf: bool) (*node | nomem) = { + let capk = 2 * t - 1; + let capc = if (leaf) 0z else 2z * t; + + let empty_keys: [][]u8 = []; + let keys = alloc(empty_keys, capk)?; + + let empty_vals: []*opaque = []; + let vals = alloc(empty_vals, capk)?; + + let children: []*node = if (leaf) { + yield []; + } else { + let empty_children: []*node = []; + yield alloc(empty_children, capc)?; + }; + + let nd = alloc(node { + leaf = leaf, + keys = keys, + vals = vals, + children = children, + })?; + return nd; +}; + +fn split_child(m: *map, x: *node, i: size) (void | nomem) = { + const t = m.t; + let y = x.children[i]; + let z = node_new(t, y.leaf)?; + + let medk = y.keys[t - 1]; + let medv = y.vals[t - 1]; + + append(z.keys, y.keys[t..]...)?; + append(z.vals, y.vals[t..]...)?; + if (!y.leaf) { + append(z.children, y.children[t..]...)?; + }; + + y.keys = y.keys[..t - 1]; + y.vals = y.vals[..t - 1]; + if (!y.leaf) { + y.children = y.children[..t]; + }; + + insert(x.keys[i], medk)?; + insert(x.vals[i], medv)?; + insert(x.children[i + 1], z)?; +}; + +fn insert_nonfull(m: *map, x: *node, key: []u8, val: *opaque) (void | nomem) = { + let i = sort::lbisect((x.keys: []const opaque), size([]u8), + (&key: const *opaque), &cmp_u8slice); + + if (i < len(x.keys) && bytes::equal(x.keys[i], key)) { + x.vals[i] = val; + return; + }; + + if (x.leaf) { + insert(x.keys[i], key)?; + insert(x.vals[i], val)?; + return; + }; + + if (len(x.children[i].keys) == 2 * m.t - 1) { + split_child(m, x, i)?; + if (cmp_u8slice((&key: const *opaque), + (&x.keys[i]: const *opaque)) > 0) { + insert_nonfull(m, x.children[i + 1], key, val)?; + return; + }; + }; + insert_nonfull(m, x.children[i], key, val)?; +}; + +fn merge_children(m: *map, x: *node, i: size) void = { + let left = x.children[i]; + let right = x.children[i + 1]; + + insert(left.keys[len(left.keys)], x.keys[i])!; + insert(left.vals[len(left.vals)], x.vals[i])!; + + append(left.keys, right.keys...)!; + append(left.vals, right.vals...)!; + if (!left.leaf) { + append(left.children, right.children...)!; + }; + + delete(x.keys[i]); + delete(x.vals[i]); + delete(x.children[i + 1]); +}; + +fn ensure_child_has_space(m: *map, x: *node, i: size) void = { + const t = m.t; + let c = x.children[i]; + + if (len(c.keys) >= t) return; + + if (i > 0 && len(x.children[i - 1].keys) >= t) { + let ls = x.children[i - 1]; + + insert(c.keys[0], x.keys[i - 1])!; + insert(c.vals[0], x.vals[i - 1])!; + + if (!c.leaf) { + let moved = ls.children[len(ls.children) - 1]; + insert(c.children[0], moved)!; + delete(ls.children[len(ls.children) - 1]); + }; + + x.keys[i - 1] = ls.keys[len(ls.keys) - 1]; + x.vals[i - 1] = ls.vals[len(ls.vals) - 1]; + delete(ls.keys[len(ls.keys) - 1]); + delete(ls.vals[len(ls.vals) - 1]); + return; + }; + + if (i + 1 < len(x.children) && len(x.children[i + 1].keys) >= t) { + let rs = x.children[i + 1]; + + insert(c.keys[len(c.keys)], x.keys[i])!; + insert(c.vals[len(c.vals)], x.vals[i])!; + + if (!c.leaf) { + let moved = rs.children[0]; + insert(c.children[len(c.children)], moved)!; + delete(rs.children[0]); + }; + + x.keys[i] = rs.keys[0]; + x.vals[i] = rs.vals[0]; + delete(rs.keys[0]); + delete(rs.vals[0]); + return; + }; + + if (i + 1 < len(x.children)) { + merge_children(m, x, i); + } else { + merge_children(m, x, i - 1); + }; +}; + +fn pop_max(m: *map, x: *node) ([]u8, *opaque) = { + let cur = x; + for (!cur.leaf) { + let last = len(cur.children) - 1; + ensure_child_has_space(m, cur, last); + cur = cur.children[last]; + }; + let k = cur.keys[len(cur.keys) - 1]; + let v = cur.vals[len(cur.vals) - 1]; + delete(cur.keys[len(cur.keys) - 1]); + delete(cur.vals[len(cur.vals) - 1]); + return (k, v); +}; + +fn pop_min(m: *map, x: *node) ([]u8, *opaque) = { + let cur = x; + for (!cur.leaf) { + ensure_child_has_space(m, cur, 0); + cur = cur.children[0]; + }; + let k = cur.keys[0]; + let v = cur.vals[0]; + delete(cur.keys[0]); + delete(cur.vals[0]); + return (k, v); +}; + +fn delete_rec(m: *map, x: *node, key: []u8) (*opaque | void) = { + let i = sort::lbisect((x.keys: []const opaque), size([]u8), + (&key: const *opaque), &cmp_u8slice); + + if (i < len(x.keys) && bytes::equal(x.keys[i], key)) { + if (x.leaf) { + let ret = x.vals[i]; + delete(x.keys[i]); + delete(x.vals[i]); + return ret; + }; + + const t = m.t; + let y = x.children[i]; + let z = x.children[i + 1]; + + if (len(y.keys) >= t) { + let (pk, pv) = pop_max(m, y); + let ret = x.vals[i]; + x.keys[i] = pk; + x.vals[i] = pv; + return ret; + } else if (len(z.keys) >= t) { + let (sk, sv) = pop_min(m, z); + let ret = x.vals[i]; + x.keys[i] = sk; + x.vals[i] = sv; + return ret; + } else { + merge_children(m, x, i); + return delete_rec(m, y, key); + }; + }; + + if (x.leaf) { + return; + }; + + ensure_child_has_space(m, x, i); + return delete_rec(m, x.children[i], key); +}; diff --git a/ds/map/map_btree/map.ha b/ds/map/map_btree/map.ha new file mode 100644 index 0000000000000000000000000000000000000000..f838b77d77b164b615d0da2d552c4e29a76948d7 --- /dev/null +++ b/ds/map/map_btree/map.ha @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: MPL-2.0 + +use ds::map; + +// B-tree-based map from []u8 to *opaque. +// +// You are advised to create these with [[new]]. +export type map = struct { + vt: map::map, + // Min degree + t: size, + root: *node, +}; + +const _vt: map::vtable = map::vtable { + getter = &vt_get, + setter = &vt_set, + deleter = &vt_del, + finisher = &vt_finish, +}; + +fn vt_get(m: *map::map, key: []u8) (*opaque | void) = get(m: *map, key); +fn vt_set(m: *map::map, key: []u8, v: *opaque) (void | nomem) = set(m: *map, key, v); +fn vt_del(m: *map::map, key: []u8) (*opaque | void) = del(m: *map, key); +fn vt_finish(m: *map::map) void = finish(m: *map); diff --git a/ds/map/map_btree/new.ha b/ds/map/map_btree/new.ha new file mode 100644 index 0000000000000000000000000000000000000000..4b015025eff4ad1b84e9601288ee17a70fc0c670 --- /dev/null +++ b/ds/map/map_btree/new.ha @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: MPL-2.0 + +use errors; + +// Creates a new [[map]] with minimum degree t. +// +// t must be greater than or equal to 2. +export fn new(t: size) (*map | errors::invalid | nomem) = { + if (t < 2) { + return errors::invalid; + }; + let r = node_new(t, true)?; + let m = alloc(map { + vt = &_vt, + t = t, + root = r, + })?; + return m; +}; diff --git a/ds/map/map_btree/node.ha b/ds/map/map_btree/node.ha new file mode 100644 index 0000000000000000000000000000000000000000..a3511c500326e38233f0db0190e50625a5f01464 --- /dev/null +++ b/ds/map/map_btree/node.ha @@ -0,0 +1,7 @@ +export type node = struct { + leaf: bool, + keys: [][]u8, + vals: []*opaque, + children: []*node, +}; + diff --git a/ds/map/map_btree/set.ha b/ds/map/map_btree/set.ha new file mode 100644 index 0000000000000000000000000000000000000000..4178a437b2dd3bf289578b6e17c543c9d8b5fdc3 --- /dev/null +++ b/ds/map/map_btree/set.ha @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: MPL-2.0 + +use bytes; +use sort; + +// Sets an item in a [[map]], replacing any existing item with the same key. +export fn set(m: *map, key: []u8, value: *opaque) (void | nomem) = { + if (len(m.root.keys) == 2 * m.t - 1) { + let s = node_new(m.t, false)?; + append(s.children, m.root)?; + m.root = s; + split_child(m, s, 0)?; + }; + insert_nonfull(m, m.root, key, value)?; +}; diff --git a/ds/map/map_btree/test.ha b/ds/map/map_btree/test.ha new file mode 100644 index 0000000000000000000000000000000000000000..386320d0821abe9c110aa5640ba4466b442f010a --- /dev/null +++ b/ds/map/map_btree/test.ha @@ -0,0 +1,67 @@ +use errors; +use strings; +use ds::map; + +@test fn roundtrip() void = { + let m: *map = match (new(2)) { + case let p: *map => yield p; + case errors::invalid => abort("unexpected errors::invalid"); + case nomem => abort("unexpected nomem"); + }; + defer finish(m); + + let v1 = 1, v2 = 2, v3 = 3; + let p1: *opaque = (&v1: *opaque); + let p2: *opaque = (&v2: *opaque); + let p3: *opaque = (&v3: *opaque); + + let k1 = strings::toutf8("alpha"); + let k2 = strings::toutf8("beta"); + let k3 = strings::toutf8("gamma"); + + match (map::set(m, k1, p1)) { + case void => yield; + case nomem => abort("unexpected nomem in set(k1,p1)"); + }; + + match (map::get(m, k1)) { + case let got: *opaque => + assert(got == p1, "get(k1) must return p1"); + case void => + abort("get(k1) unexpectedly void"); + }; + + match (map::set(m, k1, p2)) { + case void => yield; + case nomem => abort("unexpected nomem in replace"); + }; + match (map::get(m, k1)) { + case let got: *opaque => + assert(got == p2, "replace must overwrite prior value"); + case void => + abort("get(k1) void after replace"); + }; + + match (map::set(m, k2, p3)) { + case void => yield; + case nomem => abort("unexpected nomem in set(k2,p3)"); + }; + + match (map::get(m, k3)) { + case void => yield; + case *opaque => + abort("get(k3) must be void for missing key"); + }; + + match (map::del(m, k2)) { + case let got: *opaque => + assert(got == p3, "del(k2) must return stored value"); + case void => + abort("del(k2) unexpectedly void"); + }; + match (map::del(m, k2)) { + case void => yield; + case *opaque => + abort("del(k2) must be void after prior delete"); + }; +}; diff --git a/ds/map/map_fnv/README b/ds/map/map_fnv/README new file mode 100644 index 0000000000000000000000000000000000000000..4105f9f80eb29babe876f7c26a0320ea46a4eac0 --- /dev/null +++ b/ds/map/map_fnv/README @@ -0,0 +1,7 @@ +map_fnv: key-value map implemented as a Fowler-Noll-Vo hashmap + +This module provides a simple implementation of a hashmap using the +Fowler-Noll-Vo (FNV) hashing algorithm. + +FNV is not collision-resistant, so it should only be used for trusted keys +(i.e., not user input that could be deliberately chosen to cause collisions). diff --git a/ds/map/map_fnv/del.ha b/ds/map/map_fnv/del.ha new file mode 100644 index 0000000000000000000000000000000000000000..de03d94d0a54b35dd25e8686b7b12dcf4780971c --- /dev/null +++ b/ds/map/map_fnv/del.ha @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: MPL-2.0 +// SPDX-FileCopyrightText: 2024 Drew DeVault +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use bytes; +use hash; +use hash::fnv; + +// Deletes an item from a [[map]]. +export fn del(m: *map, key: []u8) (*opaque | void) = { + let hash = fnv::fnv64a(); + hash::write(&hash, key); + let bucket = &m.buckets[fnv::sum64(&hash): size % m.n]; + for (let i = 0z; i < len(bucket); i += 1) { + if (bytes::equal(bucket[i].0, key)) { + let item = bucket[i]; + delete(bucket[i]); + return item.1; + }; + }; +}; + diff --git a/ds/map/map_fnv/finish.ha b/ds/map/map_fnv/finish.ha new file mode 100644 index 0000000000000000000000000000000000000000..7573e1ffce717596a14e6514b6b80339a2c98a8d --- /dev/null +++ b/ds/map/map_fnv/finish.ha @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: MPL-2.0 +// SPDX-FileCopyrightText: 2024 Drew DeVault +// SPDX-FileCopyrightText: 2025 Runxi Yu + +// Frees resources associated with a [[map]]. +export fn finish(m: *map) void = { + for (let i = 0z; i < m.n; i += 1) { + free(m.buckets[i]); + }; + free(m.buckets); + free(m); +}; + diff --git a/ds/map/map_fnv/get.ha b/ds/map/map_fnv/get.ha new file mode 100644 index 0000000000000000000000000000000000000000..b935ea1704e10fabe147aa6f7e9be661da8e60f2 --- /dev/null +++ b/ds/map/map_fnv/get.ha @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: MPL-2.0 +// SPDX-FileCopyrightText: 2024 Drew DeVault +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use bytes; +use hash; +use hash::fnv; + +// Gets an item from a [[map]] by key, returning void if not found. +export fn get(m: *map, key: []u8) (*opaque | void) = { + let hash = fnv::fnv64a(); + hash::write(&hash, key); + let bucket = &m.buckets[fnv::sum64(&hash): size % m.n]; + for (let i = 0z; i < len(bucket); i += 1) { + if (bytes::equal(bucket[i].0, key)) { + return bucket[i].1; + }; + }; +}; diff --git a/ds/map/map_fnv/map.ha b/ds/map/map_fnv/map.ha new file mode 100644 index 0000000000000000000000000000000000000000..1089429ecd04293998406a3b36f09f52890674b5 --- /dev/null +++ b/ds/map/map_fnv/map.ha @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: MPL-2.0 +// SPDX-FileCopyrightText: 2024 Drew DeVault +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use ds::map; + +// A simple hash map from byte strings to opaque pointers, using SipHash for +// hashing and a linked list to resolve collisions. +// +// You are advised to create these with [[new]]. +export type map = struct { + vt: map::map, + n: size, + buckets: [][]([]u8, *opaque), +}; + +const _vt: map::vtable = map::vtable { + getter = &vt_get, + setter = &vt_set, + deleter = &vt_del, + finisher = &vt_finish, +}; + +fn vt_get(m: *map::map, key: []u8) (*opaque | void) = get(m: *map, key); +fn vt_set(m: *map::map, key: []u8, v: *opaque) (void | nomem) = set(m: *map, key, v); +fn vt_del(m: *map::map, key: []u8) (*opaque | void) = del(m: *map, key); +fn vt_finish(m: *map::map) void = finish(m: *map); diff --git a/ds/map/map_fnv/new.ha b/ds/map/map_fnv/new.ha new file mode 100644 index 0000000000000000000000000000000000000000..695489ae0052cdddced9b19ece78753d661aab15 --- /dev/null +++ b/ds/map/map_fnv/new.ha @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: MPL-2.0 +// SPDX-FileCopyrightText: 2024 Drew DeVault +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use bytes; +use errors; +use hash; +use hash::fnv; + +// Creates a new [[map]] with the given number of buckets. +export fn new(n: size) (*map | errors::invalid | nomem) = { + if (n == 0) { + return errors::invalid; + }; + + let empty_bucket: []([]u8, *opaque) = []; + let buckets: [][]([]u8, *opaque) = alloc([empty_bucket...], n)?; + + let m = alloc(map { + vt = &_vt, + n = n, + buckets = buckets, + })?; + + return m; +}; diff --git a/ds/map/map_fnv/set.ha b/ds/map/map_fnv/set.ha new file mode 100644 index 0000000000000000000000000000000000000000..e03337ae9de7fabf1165cf5b09875a5d1109d3d3 --- /dev/null +++ b/ds/map/map_fnv/set.ha @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: MPL-2.0 +// SPDX-FileCopyrightText: 2024 Drew DeVault +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use bytes; +use hash; +use hash::fnv; + +// Sets an item in a [[map]], replacing any existing item with the same key. +export fn set(m: *map, key: []u8, value: *opaque) (void | nomem) = { + let hash = fnv::fnv64a(); + hash::write(&hash, key); + let bucket = &m.buckets[fnv::sum64(&hash): size % m.n]; + for (let i = 0z; i < len(bucket); i += 1) { + if (bytes::equal(bucket[i].0, key)) { + bucket[i].1 = value; + return; + }; + }; + append(bucket, (key, value))?; +}; diff --git a/ds/map/map_fnv/test.ha b/ds/map/map_fnv/test.ha new file mode 100644 index 0000000000000000000000000000000000000000..3676ea7597df9a105e806f8cc03f81d6ac07278d --- /dev/null +++ b/ds/map/map_fnv/test.ha @@ -0,0 +1,67 @@ +use errors; +use strings; +use ds::map; + +@test fn roundtrip() void = { + let m: *map = match (new(16)) { + case let p: *map => yield p; + case errors::invalid => abort("unexpected errors::invalid"); + case nomem => abort("unexpected nomem"); + }; + defer finish(m); + + let v1 = 1, v2 = 2, v3 = 3; + let p1: *opaque = (&v1: *opaque); + let p2: *opaque = (&v2: *opaque); + let p3: *opaque = (&v3: *opaque); + + let k1 = strings::toutf8("alpha"); + let k2 = strings::toutf8("beta"); + let k3 = strings::toutf8("gamma"); + + match (map::set(m, k1, p1)) { + case void => yield; + case nomem => abort("unexpected nomem in set(k1,p1)"); + }; + + match (map::get(m, k1)) { + case let got: *opaque => + assert(got == p1, "get(k1) must return p1"); + case void => + abort("get(k1) unexpectedly void"); + }; + + match (map::set(m, k1, p2)) { + case void => yield; + case nomem => abort("unexpected nomem in replace"); + }; + match (map::get(m, k1)) { + case let got: *opaque => + assert(got == p2, "replace must overwrite prior value"); + case void => + abort("get(k1) void after replace"); + }; + + match (map::set(m, k2, p3)) { + case void => yield; + case nomem => abort("unexpected nomem in set(k2,p3)"); + }; + + match (map::get(m, k3)) { + case void => yield; + case *opaque => + abort("get(k3) must be void for missing key"); + }; + + match (map::del(m, k2)) { + case let got: *opaque => + assert(got == p3, "del(k2) must return stored value"); + case void => + abort("del(k2) unexpectedly void"); + }; + match (map::del(m, k2)) { + case void => yield; + case *opaque => + abort("del(k2) must be void after prior delete"); + }; +}; diff --git a/ds/map/map_siphash/README b/ds/map/map_siphash/README new file mode 100644 index 0000000000000000000000000000000000000000..f9f6fb5f561de5defd1c90b006483d3ebd5a72b2 --- /dev/null +++ b/ds/map/map_siphash/README @@ -0,0 +1,5 @@ +map_siphash: key-value map implemented as a SipHash hashmap + +This module provides a simple implementation of a hashmap using the SipHash +hashing algorithm for collision-resistant mapping. It is designed for +situations where keys may be untrusted. diff --git a/ds/map/map_siphash/del.ha b/ds/map/map_siphash/del.ha new file mode 100644 index 0000000000000000000000000000000000000000..863e9020ceb9d7559d28218f7c26b3db1e8b9f57 --- /dev/null +++ b/ds/map/map_siphash/del.ha @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: MPL-2.0 +// SPDX-FileCopyrightText: 2024 Drew DeVault +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use bytes; +use hash; +use hash::siphash; + +// Deletes an item from a [[map]]. +export fn del(m: *map, key: []u8) (*opaque | void) = { + let hash = siphash::siphash(2, 4, &m.siphash_key); + defer hash::close(&hash); + hash::write(&hash, key); + let bucket = &m.buckets[siphash::sum(&hash): size % m.n]; + for (let i = 0z; i < len(bucket); i += 1) { + if (bytes::equal(bucket[i].0, key)) { + let item = bucket[i]; + delete(bucket[i]); + return item.1; + }; + }; +}; + diff --git a/ds/map/map_siphash/finish.ha b/ds/map/map_siphash/finish.ha new file mode 100644 index 0000000000000000000000000000000000000000..7573e1ffce717596a14e6514b6b80339a2c98a8d --- /dev/null +++ b/ds/map/map_siphash/finish.ha @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: MPL-2.0 +// SPDX-FileCopyrightText: 2024 Drew DeVault +// SPDX-FileCopyrightText: 2025 Runxi Yu + +// Frees resources associated with a [[map]]. +export fn finish(m: *map) void = { + for (let i = 0z; i < m.n; i += 1) { + free(m.buckets[i]); + }; + free(m.buckets); + free(m); +}; + diff --git a/ds/map/map_siphash/get.ha b/ds/map/map_siphash/get.ha new file mode 100644 index 0000000000000000000000000000000000000000..ecedcb04e8e88d5e61f8ea6741000ae0ba50897d --- /dev/null +++ b/ds/map/map_siphash/get.ha @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: MPL-2.0 +// SPDX-FileCopyrightText: 2024 Drew DeVault +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use bytes; +use hash; +use hash::siphash; + +// Gets an item from a [[map]] by key, returning void if not found. +export fn get(m: *map, key: []u8) (*opaque | void) = { + let hash = siphash::siphash(2, 4, &m.siphash_key); + defer hash::close(&hash); + hash::write(&hash, key); + let bucket = &m.buckets[siphash::sum(&hash): size % m.n]; + for (let i = 0z; i < len(bucket); i += 1) { + if (bytes::equal(bucket[i].0, key)) { + return bucket[i].1; + }; + }; +}; diff --git a/ds/map/map_siphash/map.ha b/ds/map/map_siphash/map.ha new file mode 100644 index 0000000000000000000000000000000000000000..c3cff26ede9a162440c1420d11d88b444cd433ec --- /dev/null +++ b/ds/map/map_siphash/map.ha @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: MPL-2.0 +// SPDX-FileCopyrightText: 2024 Drew DeVault +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use ds::map; + +// A simple hash map from byte strings to opaque pointers, using SipHash for +// hashing and a linked list to resolve collisions. +// +// You are advised to create these with [[new]]. +export type map = struct { + vt: map::map, + n: size, + siphash_key: [16]u8, + buckets: [][]([]u8, *opaque), +}; + +const _vt: map::vtable = map::vtable { + getter = &vt_get, + setter = &vt_set, + deleter = &vt_del, + finisher = &vt_finish, +}; + +fn vt_get(m: *map::map, key: []u8) (*opaque | void) = get(m: *map, key); +fn vt_set(m: *map::map, key: []u8, v: *opaque) (void | nomem) = set(m: *map, key, v); +fn vt_del(m: *map::map, key: []u8) (*opaque | void) = del(m: *map, key); +fn vt_finish(m: *map::map) void = finish(m: *map); diff --git a/ds/map/map_siphash/new.ha b/ds/map/map_siphash/new.ha new file mode 100644 index 0000000000000000000000000000000000000000..c3c0349b5047b97e12a83bce8428ecfabaf70761 --- /dev/null +++ b/ds/map/map_siphash/new.ha @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: MPL-2.0 +// SPDX-FileCopyrightText: 2024 Drew DeVault +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use bytes; +use errors; +use hash; +use hash::siphash; + +// Creates a new [[map]] with the given number of buckets and SipHash key. +export fn new(n: size, siphash_key: [16]u8) (*map | errors::invalid | nomem) = { + if (n == 0) { + return errors::invalid; + }; + + let empty_bucket: []([]u8, *opaque) = []; + let buckets: [][]([]u8, *opaque) = alloc([empty_bucket...], n)?; + + let m = alloc(map { + vt = &_vt, + n = n, + siphash_key = siphash_key, + buckets = buckets, + })?; + + return m; +}; diff --git a/ds/map/map_siphash/set.ha b/ds/map/map_siphash/set.ha new file mode 100644 index 0000000000000000000000000000000000000000..f53f64a5af6e1334d3db4ec241d665a2687ecd77 --- /dev/null +++ b/ds/map/map_siphash/set.ha @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: MPL-2.0 +// SPDX-FileCopyrightText: 2024 Drew DeVault +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use bytes; +use hash; +use hash::siphash; + +// Sets an item in a [[map]], replacing any existing item with the same key. +export fn set(m: *map, key: []u8, value: *opaque) (void | nomem) = { + let hash = siphash::siphash(2, 4, &m.siphash_key); + defer hash::close(&hash); + hash::write(&hash, key); + let bucket = &m.buckets[siphash::sum(&hash): size % m.n]; + for (let i = 0z; i < len(bucket); i += 1) { + if (bytes::equal(bucket[i].0, key)) { + bucket[i].1 = value; + return; + }; + }; + append(bucket, (key, value))?; +}; diff --git a/ds/map/map_siphash/test.ha b/ds/map/map_siphash/test.ha new file mode 100644 index 0000000000000000000000000000000000000000..f9e97e4cb24be8cd10c837f519768673df92cdf1 --- /dev/null +++ b/ds/map/map_siphash/test.ha @@ -0,0 +1,69 @@ +use errors; +use strings; +use ds::map; + +@test fn roundtrip() void = { + const key: [16]u8 = [0...]; + + let m: *map = match (new(16, key)) { + case let p: *map => yield p; + case errors::invalid => abort("unexpected errors::invalid"); + case nomem => abort("unexpected nomem"); + }; + defer finish(m); + + let v1 = 1, v2 = 2, v3 = 3; + let p1: *opaque = (&v1: *opaque); + let p2: *opaque = (&v2: *opaque); + let p3: *opaque = (&v3: *opaque); + + let k1 = strings::toutf8("alpha"); + let k2 = strings::toutf8("beta"); + let k3 = strings::toutf8("gamma"); + + match (map::set(m, k1, p1)) { + case void => yield; + case nomem => abort("unexpected nomem in set(k1,p1)"); + }; + + match (map::get(m, k1)) { + case let got: *opaque => + assert(got == p1, "get(k1) must return p1"); + case void => + abort("get(k1) unexpectedly void"); + }; + + match (map::set(m, k1, p2)) { + case void => yield; + case nomem => abort("unexpected nomem in replace"); + }; + match (map::get(m, k1)) { + case let got: *opaque => + assert(got == p2, "replace must overwrite prior value"); + case void => + abort("get(k1) void after replace"); + }; + + match (map::set(m, k2, p3)) { + case void => yield; + case nomem => abort("unexpected nomem in set(k2,p3)"); + }; + + match (map::get(m, k3)) { + case void => yield; + case *opaque => + abort("get(k3) must be void for missing key"); + }; + + match (map::del(m, k2)) { + case let got: *opaque => + assert(got == p3, "del(k2) must return stored value"); + case void => + abort("del(k2) unexpectedly void"); + }; + match (map::del(m, k2)) { + case void => yield; + case *opaque => + abort("del(k2) must be void after prior delete"); + }; +}; -- 2.48.1