Lindenii Project Forge
Login

hare-ds

Data structures for Hare
Commit info
ID
c4e1ef3acca807d30fb28356565b09c4d9bd2d96
Author
Runxi Yu <me@runxiyu.org>
Author date
Wed, 17 Sep 2025 03:48:56 +0800
Committer
Runxi Yu <me@runxiyu.org>
Committer date
Wed, 17 Sep 2025 03:59:58 +0800
Actions
Add bloom filters
set: general-purpose set data structures

You should create a set with the `new` function in each submodule.
Then, you may use functions defined in this module to manipulate the set.
bloom: Bloom filter set
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

// Adds an item to a [[set]].
export fn add(s: *set, key: []u8) (void | nomem) = {
	let acc = s.hash64(s.hash_params, key): u64;
	for (let i = 0z; i < s.k; i += 1) {
		let pos = (acc % (s.m: u64)): size;
		let byte = pos / 8;
		let bit = pos % 8;
		s.bits[byte] |= (1u8 << (bit: u8));
		acc += STEP;
	};
};
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

// Tests whether an item is present in a [[set]].
export fn contains(s: *set, key: []u8) bool = {
	let acc = s.hash64(s.hash_params, key): u64;
	for (let i = 0z; i < s.k; i += 1) {
		let pos = (acc % (s.m: u64)): size;
		let byte = pos / 8;
		let bit = pos % 8;
		let mask = 1u8 << (bit: u8);
		if ((s.bits[byte] & mask) == 0) {
			return false;
		};
		acc += STEP;
	};
	return true;
};
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

// Frees resources associated with a [[set]].
export fn finish(s: *set) void = {
	free(s.bits);
	free(s);
};
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

const STEP: u64 = 0x9E3779B97F4A7C15u64;
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

use errors;
use ds::set;

// Creates a new [[set]] with the given number of bits and hash functions.
//
// m controls how many bits are available in the filter. k controls how many
// hash probes are used per element. Both must be greater than zero.
export fn new(
	m: size,
	k: size,
	hash64: *fn(hash_params: nullable *opaque, key: []u8) size,
	hash_params: nullable *opaque,
) (*set | errors::invalid | nomem) = {
	if (m == 0 || k == 0) {
		return errors::invalid;
	};

	let nbytes = (m + 7) / 8;
	let bits = match (alloc([0u8...], nbytes)) {
	case let b: []u8 => yield b;
	case nomem => return nomem;
	};

	let s = match (alloc(set {
		vt = &_vt,
		bits = bits,
		m = m,
		k = k,
		hash64 = hash64,
		hash_params = hash_params,
	})) {
	case let sp: *set => yield sp;
	case nomem =>
		free(bits);
		return nomem;
	};
	return s;
};
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

use ds::set;

// A Bloom filter set from byte strings to membership bits.
//
// You are advised to create these with [[new]].
export type set = struct {
	vt: set::set,
	bits: []u8,
	m: size,
	k: size,
	hash64: *fn(hash_params: nullable *opaque, key: []u8) size,
	hash_params: nullable *opaque,
};

const _vt: set::vtable = set::vtable {
	adder    = &vt_add,
	tester   = &vt_contains,
	finisher = &vt_finish,
};

fn vt_add(s: *set::set, key: []u8) (void | nomem) = add(s: *set, key);
fn vt_contains(s: *set::set, key: []u8) bool = contains(s: *set, key);
fn vt_finish(s: *set::set) void = finish(s: *set);
bloom_fnv: FNV Bloom filter set
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

use ds::set;

// Adds an item to a [[set]].
export fn add(s: *set, key: []u8) (void | nomem) = {
	return set::add(s.inner, key);
};
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

use ds::set;

// Tests whether an item is present in a [[set]].
export fn contains(s: *set, key: []u8) bool = {
	return set::contains(s.inner, key);
};
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

use ds::set;

// Frees resources associated with a [[set]].
export fn finish(s: *set) void = {
	set::finish(s.inner);
	free(s);
};
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

use hash;
use hash::fnv;

fn hash64(_params: nullable *opaque, key: []u8) size = {
	let h = fnv::fnv64a();
	hash::write(&h, key);
	return fnv::sum64(&h): size;
};
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

use errors;
use ds::set;
use ds::set::bloom;

// Creates a new [[set]] with the given number of bits and hash count.
export fn new(
	m: size,
	k: size,
) (*set | errors::invalid | nomem) = {
	let inner = match (bloom::new(m, k, &hash64, null)) {
	case let bs: *bloom::set =>
		yield (bs: *set::set);
	case errors::invalid =>
		return errors::invalid;
	case nomem =>
		return nomem;
	};

	let s = match (alloc(set {
		vt = &_vt,
		inner = inner,
	})) {
	case let sp: *set => yield sp;
	case nomem =>
		set::finish(inner);
		return nomem;
	};
	return s;
};
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

use ds::set;

// A Bloom filter set using FNV for hashing.
//
// You are advised to create these with [[new]].
export type set = struct {
	vt: set::set,
	inner: *set::set,
};

const _vt: set::vtable = set::vtable {
	adder    = &vt_add,
	tester   = &vt_contains,
	finisher = &vt_finish,
};

fn vt_add(s: *set::set, key: []u8) (void | nomem) = add(s: *set, key);
fn vt_contains(s: *set::set, key: []u8) bool = contains(s: *set, key);
fn vt_finish(s: *set::set) void = finish(s: *set);
use ds::set;
use errors;

fn put_le64(dst: *[8]u8, v: u64) []u8 = {
	for (let i = 0z; i < 8z; i += 1) {
		dst[i] = ((v >> (8u64 * (i: u64))) & 0xFFu64): u8;
	};
	return dst[..];
};

@test fn invalid() void = {
	match (new(0, 1)) {
	case errors::invalid => void;
	case *set => abort("bloom_fnv: accepted m=0");
	case nomem => abort("bloom_fnv: nomem for m=0");
	};
	match (new(64, 0)) {
	case errors::invalid => void;
	case *set => abort("bloom_fnv: accepted k=0");
	case nomem => abort("bloom_fnv: nomem for k=0");
	};
};

@test fn test() void = {
	const ms: [2]size = [256z, 512z];
	const ks: [2]size = [2z, 3z];
	let buf: [8]u8 = [0...];
	const inserted: [4]u64 = [1u64, 5u64, 21u64, 45u64];
	const missing: [3]u64 = [2u64, 7u64, 88u64];

	for (let mi = 0z; mi < len(ms); mi += 1) {
		for (let ki = 0z; ki < len(ks); ki += 1) {
			let s = match (new(ms[mi], ks[ki])) {
			case let sp: *set => yield sp;
			case errors::invalid => abort("bloom_fnv: invalid parameters");
			case nomem => abort("bloom_fnv: nomem");
			};
			defer finish(s);
			let iface: *set::set = (s: *set::set);

			for (let i = 0z; i < len(inserted); i += 1) {
				let key = put_le64(&buf, inserted[i]);
				match (set::add(iface, key)) {
				case void => void;
				case nomem => abort("bloom_fnv: add nomem");
				};
				assert(set::contains(iface, key), "bloom_fnv: contains after add");
			};

			for (let i = 0z; i < len(missing); i += 1) {
				let key = put_le64(&buf, missing[i]);
				assert(!set::contains(iface, key), "bloom_fnv: false positive");
			};
		};
	};
};
bloom_siphash: SipHash Bloom filter set
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

use ds::set;

// Adds an item to a [[set]].
export fn add(s: *set, key: []u8) (void | nomem) = {
	return set::add(s.inner, key);
};
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

use ds::set;

// Tests whether an item is present in a [[set]].
export fn contains(s: *set, key: []u8) bool = {
	return set::contains(s.inner, key);
};
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

use ds::set;

// Frees resources associated with a [[set]].
export fn finish(s: *set) void = {
	set::finish(s.inner);
	free(s.key);
	free(s);
};
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

use hash;
use hash::siphash;

fn hash64(params: nullable *opaque, key: []u8) size = {
	let keyptr = match (params) {
	case null =>
		abort("ds::set::bloom_siphash: missing key");
	case let p: *opaque =>
		yield (p: *[16]u8);
	};

	let h = siphash::siphash(2, 4, keyptr);
	defer hash::close(&h);
	hash::write(&h, key);
	return siphash::sum(&h): size;
};
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

use errors;
use ds::set;
use ds::set::bloom;

// Creates a new [[set]] with the given number of bits, hash count, and SipHash key.
export fn new(
	m: size,
	k: size,
	siphash_key: [16]u8,
) (*set | errors::invalid | nomem) = {
	let keybox = match (alloc(siphash_key)) {
	case let kp: *[16]u8 => yield kp;
	case nomem => return nomem;
	};

	let inner = match (bloom::new(m, k, &hash64, (keybox: *opaque))) {
	case let bs: *bloom::set =>
		yield (bs: *set::set);
	case errors::invalid =>
		free(keybox);
		return errors::invalid;
	case nomem =>
		free(keybox);
		return nomem;
	};

	let s = match (alloc(set {
		vt = &_vt,
		inner = inner,
		key = keybox,
	})) {
	case let sp: *set => yield sp;
	case nomem =>
		set::finish(inner);
		free(keybox);
		return nomem;
	};
	return s;
};
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

use ds::set;

// A Bloom filter set using SipHash for hashing.
//
// You are advised to create these with [[new]].
export type set = struct {
	vt: set::set,
	inner: *set::set,
	key: *[16]u8,
};

const _vt: set::vtable = set::vtable {
	adder    = &vt_add,
	tester   = &vt_contains,
	finisher = &vt_finish,
};

fn vt_add(s: *set::set, key: []u8) (void | nomem) = add(s: *set, key);
fn vt_contains(s: *set::set, key: []u8) bool = contains(s: *set, key);
fn vt_finish(s: *set::set) void = finish(s: *set);
use crypto::random;
use ds::set;
use errors;

fn put_le64(dst: *[8]u8, v: u64) []u8 = {
	for (let i = 0z; i < 8z; i += 1) {
		dst[i] = ((v >> (8u64 * (i: u64))) & 0xFFu64): u8;
	};
	return dst[..];
};

@test fn invalid() void = {
	let key: [16]u8 = [0...];
	match (new(0, 1, key)) {
	case errors::invalid => void;
	case *set => abort("bloom_siphash: accepted m=0");
	case nomem => abort("bloom_siphash: nomem for m=0");
	};
	match (new(64, 0, key)) {
	case errors::invalid => void;
	case *set => abort("bloom_siphash: accepted k=0");
	case nomem => abort("bloom_siphash: nomem for k=0");
	};
};

@test fn test() void = {
	let key1: [16]u8 = [0...];
	let key2: [16]u8 = [0...];
	random::buffer(&key1);
	random::buffer(&key2);
	const keys: [2]*[16]u8 = [&key1, &key2];
	const ms: [2]size = [256z, 512z];
	const ks: [2]size = [2z, 3z];

	let buf: [8]u8 = [0...];
	const inserted: [4]u64 = [4u64, 12u64, 30u64, 102u64];
	const missing: [3]u64 = [3u64, 7u64, 55u64];

	for (let mi = 0z; mi < len(ms); mi += 1) {
		for (let ki = 0z; ki < len(ks); ki += 1) {
			for (let keyi = 0z; keyi < len(keys); keyi += 1) {
				let s = match (new(ms[mi], ks[ki], *keys[keyi])) {
				case let sp: *set => yield sp;
				case errors::invalid => abort("bloom_siphash: invalid parameters");
				case nomem => abort("bloom_siphash: nomem");
				};
				defer finish(s);
				let iface: *set::set = (s: *set::set);

				for (let i = 0z; i < len(inserted); i += 1) {
					let key = put_le64(&buf, inserted[i]);
					match (set::add(iface, key)) {
					case void => void;
					case nomem => abort("bloom_siphash: add nomem");
					};
					assert(set::contains(iface, key), "bloom_siphash: contains after add");
				};

				for (let i = 0z; i < len(missing); i += 1) {
					let key = put_le64(&buf, missing[i]);
					assert(!set::contains(iface, key), "bloom_siphash: false positive");
				};
			};
		};
	};
};
// SPDX-License-Identifier: MPL-2.0
// SPDX-FileCopyrightText: 2025 Runxi Yu <me@runxiyu.org>

// A set is a pointer to a [[vtable]] which allows for set types to implement
// common operations.
export type set = *vtable;

// The vtable type defines a set of virtual functions for a [[set]].
export type vtable = struct {
	adder: *adder,
	tester: *tester,
	finisher: *finisher,
};

// The interface for a set which could be used to add values. Returns void on
// success or nomem if memory allocation failed.
export type adder = fn(s: *set, key: []u8) (void | nomem);

// Adds an item to a [[set]].
export fn add(s: *set, key: []u8) (void | nomem) = {
	return s.adder(s, key);
};

// The interface for a set which could be used to test membership. Returns true
// if the item may be present, false otherwise.
export type tester = fn(s: *set, key: []u8) bool;

// Tests whether an item is present in a [[set]].
export fn contains(s: *set, key: []u8) bool = {
	return s.tester(s, key);
};

// The interface for a set which requires a finisher function to free it.
export type finisher = fn(s: *set) void;

// Frees the set and all of its resources.
export fn finish(s: *set) void = {
	s.finisher(s);
};