Lindenii Project Forge
Add bloom filters
set: general-purpose set data structures You should create a set with the `new` function in each submodule. Then, you may use functions defined in this module to manipulate the set.
bloom: Bloom filter set
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> // Adds an item to a [[set]]. export fn add(s: *set, key: []u8) (void | nomem) = { let acc = s.hash64(s.hash_params, key): u64; for (let i = 0z; i < s.k; i += 1) { let pos = (acc % (s.m: u64)): size; let byte = pos / 8; let bit = pos % 8; s.bits[byte] |= (1u8 << (bit: u8)); acc += STEP; }; };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> // Tests whether an item is present in a [[set]]. export fn contains(s: *set, key: []u8) bool = { let acc = s.hash64(s.hash_params, key): u64; for (let i = 0z; i < s.k; i += 1) { let pos = (acc % (s.m: u64)): size; let byte = pos / 8; let bit = pos % 8; let mask = 1u8 << (bit: u8); if ((s.bits[byte] & mask) == 0) { return false; }; acc += STEP; }; return true; };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> // Frees resources associated with a [[set]]. export fn finish(s: *set) void = { free(s.bits); free(s); };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> const STEP: u64 = 0x9E3779B97F4A7C15u64;
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use errors; use ds::set; // Creates a new [[set]] with the given number of bits and hash functions. // // m controls how many bits are available in the filter. k controls how many // hash probes are used per element. Both must be greater than zero. export fn new( m: size, k: size, hash64: *fn(hash_params: nullable *opaque, key: []u8) size, hash_params: nullable *opaque, ) (*set | errors::invalid | nomem) = { if (m == 0 || k == 0) { return errors::invalid; }; let nbytes = (m + 7) / 8; let bits = match (alloc([0u8...], nbytes)) { case let b: []u8 => yield b; case nomem => return nomem; }; let s = match (alloc(set { vt = &_vt, bits = bits, m = m, k = k, hash64 = hash64, hash_params = hash_params, })) { case let sp: *set => yield sp; case nomem => free(bits); return nomem; }; return s; };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use ds::set; // A Bloom filter set from byte strings to membership bits. // // You are advised to create these with [[new]]. export type set = struct { vt: set::set, bits: []u8, m: size, k: size, hash64: *fn(hash_params: nullable *opaque, key: []u8) size, hash_params: nullable *opaque, }; const _vt: set::vtable = set::vtable { adder = &vt_add, tester = &vt_contains, finisher = &vt_finish, }; fn vt_add(s: *set::set, key: []u8) (void | nomem) = add(s: *set, key); fn vt_contains(s: *set::set, key: []u8) bool = contains(s: *set, key); fn vt_finish(s: *set::set) void = finish(s: *set);
bloom_fnv: FNV Bloom filter set
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use ds::set; // Adds an item to a [[set]]. export fn add(s: *set, key: []u8) (void | nomem) = { return set::add(s.inner, key); };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use ds::set; // Tests whether an item is present in a [[set]]. export fn contains(s: *set, key: []u8) bool = { return set::contains(s.inner, key); };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use ds::set; // Frees resources associated with a [[set]]. export fn finish(s: *set) void = { set::finish(s.inner); free(s); };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use hash; use hash::fnv; fn hash64(_params: nullable *opaque, key: []u8) size = { let h = fnv::fnv64a(); hash::write(&h, key); return fnv::sum64(&h): size; };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use errors; use ds::set; use ds::set::bloom; // Creates a new [[set]] with the given number of bits and hash count. export fn new( m: size, k: size, ) (*set | errors::invalid | nomem) = { let inner = match (bloom::new(m, k, &hash64, null)) { case let bs: *bloom::set => yield (bs: *set::set); case errors::invalid => return errors::invalid; case nomem => return nomem; }; let s = match (alloc(set { vt = &_vt, inner = inner, })) { case let sp: *set => yield sp; case nomem => set::finish(inner); return nomem; }; return s; };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use ds::set; // A Bloom filter set using FNV for hashing. // // You are advised to create these with [[new]]. export type set = struct { vt: set::set, inner: *set::set, }; const _vt: set::vtable = set::vtable { adder = &vt_add, tester = &vt_contains, finisher = &vt_finish, }; fn vt_add(s: *set::set, key: []u8) (void | nomem) = add(s: *set, key); fn vt_contains(s: *set::set, key: []u8) bool = contains(s: *set, key); fn vt_finish(s: *set::set) void = finish(s: *set);
use ds::set; use errors; fn put_le64(dst: *[8]u8, v: u64) []u8 = { for (let i = 0z; i < 8z; i += 1) { dst[i] = ((v >> (8u64 * (i: u64))) & 0xFFu64): u8; }; return dst[..]; }; @test fn invalid() void = { match (new(0, 1)) { case errors::invalid => void; case *set => abort("bloom_fnv: accepted m=0"); case nomem => abort("bloom_fnv: nomem for m=0"); }; match (new(64, 0)) { case errors::invalid => void; case *set => abort("bloom_fnv: accepted k=0"); case nomem => abort("bloom_fnv: nomem for k=0"); }; }; @test fn test() void = { const ms: [2]size = [256z, 512z]; const ks: [2]size = [2z, 3z]; let buf: [8]u8 = [0...]; const inserted: [4]u64 = [1u64, 5u64, 21u64, 45u64]; const missing: [3]u64 = [2u64, 7u64, 88u64]; for (let mi = 0z; mi < len(ms); mi += 1) { for (let ki = 0z; ki < len(ks); ki += 1) { let s = match (new(ms[mi], ks[ki])) { case let sp: *set => yield sp; case errors::invalid => abort("bloom_fnv: invalid parameters"); case nomem => abort("bloom_fnv: nomem"); }; defer finish(s); let iface: *set::set = (s: *set::set); for (let i = 0z; i < len(inserted); i += 1) { let key = put_le64(&buf, inserted[i]); match (set::add(iface, key)) { case void => void; case nomem => abort("bloom_fnv: add nomem"); }; assert(set::contains(iface, key), "bloom_fnv: contains after add"); }; for (let i = 0z; i < len(missing); i += 1) { let key = put_le64(&buf, missing[i]); assert(!set::contains(iface, key), "bloom_fnv: false positive"); }; }; }; };
bloom_siphash: SipHash Bloom filter set
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use ds::set; // Adds an item to a [[set]]. export fn add(s: *set, key: []u8) (void | nomem) = { return set::add(s.inner, key); };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use ds::set; // Tests whether an item is present in a [[set]]. export fn contains(s: *set, key: []u8) bool = { return set::contains(s.inner, key); };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use ds::set; // Frees resources associated with a [[set]]. export fn finish(s: *set) void = { set::finish(s.inner); free(s.key); free(s); };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use hash; use hash::siphash; fn hash64(params: nullable *opaque, key: []u8) size = { let keyptr = match (params) { case null => abort("ds::set::bloom_siphash: missing key"); case let p: *opaque => yield (p: *[16]u8); }; let h = siphash::siphash(2, 4, keyptr); defer hash::close(&h); hash::write(&h, key); return siphash::sum(&h): size; };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use errors; use ds::set; use ds::set::bloom; // Creates a new [[set]] with the given number of bits, hash count, and SipHash key. export fn new( m: size, k: size, siphash_key: [16]u8, ) (*set | errors::invalid | nomem) = { let keybox = match (alloc(siphash_key)) { case let kp: *[16]u8 => yield kp; case nomem => return nomem; }; let inner = match (bloom::new(m, k, &hash64, (keybox: *opaque))) { case let bs: *bloom::set => yield (bs: *set::set); case errors::invalid => free(keybox); return errors::invalid; case nomem => free(keybox); return nomem; }; let s = match (alloc(set { vt = &_vt, inner = inner, key = keybox, })) { case let sp: *set => yield sp; case nomem => set::finish(inner); free(keybox); return nomem; }; return s; };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> use ds::set; // A Bloom filter set using SipHash for hashing. // // You are advised to create these with [[new]]. export type set = struct { vt: set::set, inner: *set::set, key: *[16]u8, }; const _vt: set::vtable = set::vtable { adder = &vt_add, tester = &vt_contains, finisher = &vt_finish, }; fn vt_add(s: *set::set, key: []u8) (void | nomem) = add(s: *set, key); fn vt_contains(s: *set::set, key: []u8) bool = contains(s: *set, key); fn vt_finish(s: *set::set) void = finish(s: *set);
use crypto::random; use ds::set; use errors; fn put_le64(dst: *[8]u8, v: u64) []u8 = { for (let i = 0z; i < 8z; i += 1) { dst[i] = ((v >> (8u64 * (i: u64))) & 0xFFu64): u8; }; return dst[..]; }; @test fn invalid() void = { let key: [16]u8 = [0...]; match (new(0, 1, key)) { case errors::invalid => void; case *set => abort("bloom_siphash: accepted m=0"); case nomem => abort("bloom_siphash: nomem for m=0"); }; match (new(64, 0, key)) { case errors::invalid => void; case *set => abort("bloom_siphash: accepted k=0"); case nomem => abort("bloom_siphash: nomem for k=0"); }; }; @test fn test() void = { let key1: [16]u8 = [0...]; let key2: [16]u8 = [0...]; random::buffer(&key1); random::buffer(&key2); const keys: [2]*[16]u8 = [&key1, &key2]; const ms: [2]size = [256z, 512z]; const ks: [2]size = [2z, 3z]; let buf: [8]u8 = [0...]; const inserted: [4]u64 = [4u64, 12u64, 30u64, 102u64]; const missing: [3]u64 = [3u64, 7u64, 55u64]; for (let mi = 0z; mi < len(ms); mi += 1) { for (let ki = 0z; ki < len(ks); ki += 1) { for (let keyi = 0z; keyi < len(keys); keyi += 1) { let s = match (new(ms[mi], ks[ki], *keys[keyi])) { case let sp: *set => yield sp; case errors::invalid => abort("bloom_siphash: invalid parameters"); case nomem => abort("bloom_siphash: nomem"); }; defer finish(s); let iface: *set::set = (s: *set::set); for (let i = 0z; i < len(inserted); i += 1) { let key = put_le64(&buf, inserted[i]); match (set::add(iface, key)) { case void => void; case nomem => abort("bloom_siphash: add nomem"); }; assert(set::contains(iface, key), "bloom_siphash: contains after add"); }; for (let i = 0z; i < len(missing); i += 1) { let key = put_le64(&buf, missing[i]); assert(!set::contains(iface, key), "bloom_siphash: false positive"); }; }; }; }; };
// SPDX-License-Identifier: MPL-2.0 // SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org> // A set is a pointer to a [[vtable]] which allows for set types to implement // common operations. export type set = *vtable; // The vtable type defines a set of virtual functions for a [[set]]. export type vtable = struct { adder: *adder, tester: *tester, finisher: *finisher, }; // The interface for a set which could be used to add values. Returns void on // success or nomem if memory allocation failed. export type adder = fn(s: *set, key: []u8) (void | nomem); // Adds an item to a [[set]]. export fn add(s: *set, key: []u8) (void | nomem) = { return s.adder(s, key); }; // The interface for a set which could be used to test membership. Returns true // if the item may be present, false otherwise. export type tester = fn(s: *set, key: []u8) bool; // Tests whether an item is present in a [[set]]. export fn contains(s: *set, key: []u8) bool = { return s.tester(s, key); }; // The interface for a set which requires a finisher function to free it. export type finisher = fn(s: *set) void; // Frees the set and all of its resources. export fn finish(s: *set) void = { s.finisher(s); };