From bebe0271b418c7f8ac6aa598695e88c9f4d7cb25 Mon Sep 17 00:00:00 2001 From: Runxi Yu Date: Wed, 17 Sep 2025 00:37:47 +0800 Subject: [PATCH] Add generic swiss tables implementation --- ds/map/swiss/README | 1 + ds/map/swiss/del.ha | 47 +++++++++++++++++++++++++++++++++++++++++++++++ ds/map/swiss/finish.ha | 11 +++++++++++ ds/map/swiss/get.ha | 37 +++++++++++++++++++++++++++++++++++++ ds/map/swiss/internal.ha | 139 +++++++++++++++++++++++++++++++++++++++++++++++++++++ ds/map/swiss/map.ha | 29 +++++++++++++++++++++++++++++ ds/map/swiss/new.ha | 43 +++++++++++++++++++++++++++++++++++++++++++ ds/map/swiss/set.ha | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++ ds/map/swiss_siphash/del.ha | 46 ++++------------------------------------------ ds/map/swiss_siphash/finish.ha | 10 +++++----- ds/map/swiss_siphash/get.ha | 34 +++------------------------------- ds/map/swiss_siphash/internal.ha | 147 +++-------------------------------------------------- ds/map/swiss_siphash/map.ha | 12 ++++-------- ds/map/swiss_siphash/new.ha | 56 +++++++++++++++++++++++++++++------------------------ ds/map/swiss_siphash/set.ha | 93 ++--------------------------------------------------- diff --git a/ds/map/swiss/README b/ds/map/swiss/README new file mode 100644 index 0000000000000000000000000000000000000000..003c65098f452ce799cf8439dd1ae875eb5e8e32 --- /dev/null +++ b/ds/map/swiss/README @@ -0,0 +1 @@ +swiss: generic swiss tables diff --git a/ds/map/swiss/del.ha b/ds/map/swiss/del.ha new file mode 100644 index 0000000000000000000000000000000000000000..653066fa12edcd0f9b256a2063744e4bfb4a5ca7 --- /dev/null +++ b/ds/map/swiss/del.ha @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 +// SPDX-FileCopyrightText: 2024 The Cockroach Authors +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use bytes; + +// Deletes an item from a [[map]]. Returns the removed value or void. +export fn del(m: *map, key: []u8) (*opaque | void) = { + if (len(m.groups) == 0) return; + let hv = m.hash64(m.hash_params, key): u64; + let t = h2(hv); + let mask = m.group_mask; + let off: size = (h1(hv): size) & mask; + let idx: size = 0; + + for (true) { + let g = &m.groups[off]; + for (let i = 0z; i < GROUP_SIZE; i += 1) { + let c = g.ctrl[i]; + if (is_full_ctrl(c) && c == t) { + if (bytes::equal(g.keys[i], key)) { + let v = g.vals[i]; + g.ctrl[i] = CTRL_DELETED; + g.keys[i] = []; + g.vals[i] = null; + m.used -= 1; + m.tombs += 1; + // elide the tombstones if exceed 1/3 of the capacity + if (m.tombs * 3 >= capacity_slots(m)) { + rehash_in_place(m); + }; + match (v) { + case null => + abort("map: null internal state escaped"); + case let p: *opaque => + return p; + }; + }; + } else if (c == CTRL_EMPTY) { + return; + }; + }; + let next = probe_next(off, idx, mask); + off = next.0; + idx = next.1; + }; +}; diff --git a/ds/map/swiss/finish.ha b/ds/map/swiss/finish.ha new file mode 100644 index 0000000000000000000000000000000000000000..640dd673472c4a9817496e481eb97f2eded8d80f --- /dev/null +++ b/ds/map/swiss/finish.ha @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 +// SPDX-FileCopyrightText: 2024 The Cockroach Authors +// SPDX-FileCopyrightText: 2025 Runxi Yu + +// Frees resources associated with a [[map]]. +export fn finish(m: *map) void = { + if (len(m.groups) != 0) { + free(m.groups); + }; + free(m); +}; diff --git a/ds/map/swiss/get.ha b/ds/map/swiss/get.ha new file mode 100644 index 0000000000000000000000000000000000000000..d1040a45e01741d4de1205fac0c99906689a4a5a --- /dev/null +++ b/ds/map/swiss/get.ha @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 +// SPDX-FileCopyrightText: 2024 The Cockroach Authors +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use bytes; + +// Gets an item from a [[map]] by key, returning void if not found. +export fn get(m: *map, key: []u8) (*opaque | void) = { + if (len(m.groups) == 0) return; + let hv = m.hash64(m.hash_params, key): u64; + let t = h2(hv); + let mask = m.group_mask; + let off: size = (h1(hv): size) & mask; + let idx: size = 0; + + for (true) { + let g = &m.groups[off]; + for (let i = 0z; i < GROUP_SIZE; i += 1) { + let c = g.ctrl[i]; + if (is_full_ctrl(c) && c == t) { + if (bytes::equal(g.keys[i], key)) { + match (g.vals[i]) { + case null => + abort("map: null internal state escaped"); + case let p: *opaque => + return p; + }; + }; + } else if (c == CTRL_EMPTY) { + return; + }; + }; + let next = probe_next(off, idx, mask); + off = next.0; + idx = next.1; + }; +}; diff --git a/ds/map/swiss/internal.ha b/ds/map/swiss/internal.ha new file mode 100644 index 0000000000000000000000000000000000000000..125d034d0a24993150dcc2ddba968efd2fae14fb --- /dev/null +++ b/ds/map/swiss/internal.ha @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 +// SPDX-FileCopyrightText: 2024 The Cockroach Authors +// SPDX-FileCopyrightText: 2025 Runxi Yu + +export def GROUP_SIZE: size = 8z; +export def CTRL_EMPTY: u8 = 0x80; +export def CTRL_DELETED: u8 = 0xFE; + +export type group = struct { + ctrl: [GROUP_SIZE]u8, + keys: [GROUP_SIZE][]u8, + vals: [GROUP_SIZE]nullable *opaque, +}; + +fn group_set_empty(g: *group) void = { + for (let i = 0z; i < GROUP_SIZE; i += 1) { + g.ctrl[i] = CTRL_EMPTY; + g.keys[i] = []; + g.vals[i] = null; + }; +}; + +fn is_full_ctrl(c: u8) bool = (c & 0x80) == 0 && c != CTRL_DELETED; + +fn h1(h: u64) u64 = h >> 7u64; +fn h2(h: u64) u8 = (h & 0x7Fu64): u8; + +fn probe_next(off: size, idx: size, mask: size) (size, size) = { + let nidx = idx + 1; + let noff = (off + nidx) & mask; + return (noff, nidx); +}; + +fn capacity_slots(m: *map) size = (m.group_mask + 1) * GROUP_SIZE; + +fn max_used_with_tombs(m: *map) size = { + return (capacity_slots(m) * 7z) / 8z; +}; + +fn ensure_capacity_for_insert(m: *map) (void | nomem) = { + if (m.used + m.tombs < max_used_with_tombs(m)) { + return; + }; + return resize(m, (m.group_mask + 1) * 2); +}; + +fn rehash_in_place(m: *map) void = { + if (len(m.groups) == 0) return; + let new_groups: []group = alloc([group{...}...], (m.group_mask + 1))!; + for (let i = 0z; i < len(new_groups); i += 1) { + group_set_empty(&new_groups[i]); + }; + let old = m.groups; + m.groups = new_groups; + let old_groups = old; + let old_mask = m.group_mask; + m.used = 0; + m.tombs = 0; + + for (let gi = 0z; gi <= old_mask; gi += 1) { + let g = &old_groups[gi]; + for (let si = 0z; si < GROUP_SIZE; si += 1) { + let c = g.ctrl[si]; + if (!is_full_ctrl(c)) continue; + let k = g.keys[si]; + let v = g.vals[si]; + unchecked_put(m, k, v); + }; + }; + free(old_groups); +}; + +fn resize(m: *map, new_groups_len: size) (void | nomem) = { + if (new_groups_len == 0) new_groups_len = 1; + let gs: []group = match (alloc([group{...}...]: []group, new_groups_len)) { + case let a: []group => yield a; + case nomem => return nomem; + }; + for (let i = 0z; i < len(gs); i += 1) { + group_set_empty(&gs[i]); + }; + let old = m.groups; + let old_mask = m.group_mask; + m.groups = gs; + m.group_mask = new_groups_len - 1; + m.used = 0; + m.tombs = 0; + + for (let gi = 0z; gi <= old_mask; gi += 1) { + let g = &old[gi]; + for (let si = 0z; si < GROUP_SIZE; si += 1) { + let c = g.ctrl[si]; + if (!is_full_ctrl(c)) continue; + unchecked_put(m, g.keys[si], g.vals[si]); + }; + }; + if (len(old) != 0) { + free(old); + }; +}; + +fn unchecked_put(m: *map, key: []u8, val: nullable *opaque) void = { + let hv = m.hash64(m.hash_params, key): u64; + let t = h2(hv); + let mask = m.group_mask; + let off: size = (h1(hv): size) & mask; + let idx: size = 0; + + for (true) { + let g = &m.groups[off]; + let first_dead: (size | void) = void; + for (let i = 0z; i < GROUP_SIZE; i += 1) { + let c = g.ctrl[i]; + if (is_full_ctrl(c)) { + continue; + } else if (c == CTRL_DELETED) { + if (first_dead is void) first_dead = i; + } else { + let slot = match (first_dead) { + case void => yield i; + case let di: size => yield di; + }; + g.keys[slot] = key; + g.vals[slot] = val; + g.ctrl[slot] = t; + m.used += 1; + if (slot == i) { + void; + } else { + m.tombs -= 1; + }; + return; + }; + }; + let next = probe_next(off, idx, mask); + off = next.0; + idx = next.1; + }; +}; diff --git a/ds/map/swiss/map.ha b/ds/map/swiss/map.ha new file mode 100644 index 0000000000000000000000000000000000000000..daf694690c3ea8876a429439b952c3c7d9b30b01 --- /dev/null +++ b/ds/map/swiss/map.ha @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: 2024 The Cockroach Authors +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use ds::map; + +// Generic map based on swiss tables from []u8 to *opaque. +// +// You are advised to create these with [[new]]. +export type map = struct { + vt: map::map, + group_mask: size, + used: size, + tombs: size, + hash64: *fn(hash_params: nullable *opaque, key: []u8) size, + hash_params: nullable *opaque, + groups: []group, +}; + +const _vt: map::vtable = map::vtable { + getter = &vt_get, + setter = &vt_set, + deleter = &vt_del, + finisher = &vt_finish, +}; + +fn vt_get(m: *map::map, key: []u8) (*opaque | void) = get(m: *map, key); +fn vt_set(m: *map::map, key: []u8, v: *opaque) (void | nomem) = set(m: *map, key, v); +fn vt_del(m: *map::map, key: []u8) (*opaque | void) = del(m: *map, key); +fn vt_finish(m: *map::map) void = finish(m: *map); diff --git a/ds/map/swiss/new.ha b/ds/map/swiss/new.ha new file mode 100644 index 0000000000000000000000000000000000000000..3a342c40296ed4480531407086c945826cdfadaf --- /dev/null +++ b/ds/map/swiss/new.ha @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 +// SPDX-FileCopyrightText: 2024 The Cockroach Authors +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use errors; + +// Creates a new [[map]] with an initial number of groups and hash function. +// +// n_groups must be greater than zero. +export fn new( + n_groups: size, + hash64: *fn(hash_params: nullable *opaque, key: []u8) size, + hash_params: nullable *opaque, +) (*map | errors::invalid | nomem) = { + if (n_groups == 0) { + return errors::invalid; + }; + + let v: size = 1; + for (v < n_groups) { + v *= 2; + }; + let groups_count = v; + + let gs: []group = match (alloc([group{...}...]: []group, groups_count)) { + case let a: []group => yield a; + case nomem => return nomem; + }; + for (let i = 0z; i < len(gs); i += 1) { + group_set_empty(&gs[i]); + }; + + let m = alloc(map { + vt = &_vt, + group_mask = groups_count - 1, + used = 0, + tombs = 0, + hash64 = hash64, + hash_params = hash_params, + groups = gs, + })?; + return m; +}; diff --git a/ds/map/swiss/set.ha b/ds/map/swiss/set.ha new file mode 100644 index 0000000000000000000000000000000000000000..c8cac47251f5b01049a1a0584574f71fdd1f542b --- /dev/null +++ b/ds/map/swiss/set.ha @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 +// SPDX-FileCopyrightText: 2024 The Cockroach Authors +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use bytes; + +// Sets an item in a [[map]], replacing any existing item with the same key. +export fn set(m: *map, key: []u8, value: *opaque) (void | nomem) = { + let need_insert = true; + + if (len(m.groups) != 0) { + let hv0 = m.hash64(m.hash_params, key); + let t0 = h2(hv0: u64); + let mask0 = m.group_mask; + let off0: size = (h1(hv0: u64): size) & mask0; + let idx0: size = 0; + + need_insert = false; + for (true) { + let g = &m.groups[off0]; + + for (let i = 0z; i < GROUP_SIZE; i += 1) { + let c = g.ctrl[i]; + if (is_full_ctrl(c) && c == t0) { + if (bytes::equal(g.keys[i], key)) { + g.vals[i] = value; + return; + }; + } else if (c == CTRL_EMPTY) { + need_insert = true; + break; + }; + }; + + if (need_insert) { + break; + }; + + let next = probe_next(off0, idx0, mask0); + off0 = next.0; + idx0 = next.1; + }; + } else { + need_insert = true; + }; + + if (!need_insert) { + return; + }; + + match (ensure_capacity_for_insert(m)) { + case void => yield; + case nomem => return nomem; + }; + + let hv = m.hash64(m.hash_params, key): u64; + let t = h2(hv); + let mask = m.group_mask; + let off: size = (h1(hv): size) & mask; + let idx: size = 0; + + for (true) { + let g = &m.groups[off]; + let first_dead: (size | void) = void; + + for (let i = 0z; i < GROUP_SIZE; i += 1) { + let c = g.ctrl[i]; + if (is_full_ctrl(c)) { + if (c == t && bytes::equal(g.keys[i], key)) { + g.vals[i] = value; + return; + }; + continue; + } else if (c == CTRL_DELETED) { + if (first_dead is void) first_dead = i; + } else { + let slot = match (first_dead) { + case void => yield i; + case let di: size => yield di; + }; + g.keys[slot] = key; + g.vals[slot] = value; + g.ctrl[slot] = t; + m.used += 1; + if (slot != i) { + m.tombs -= 1; + }; + return; + }; + }; + + let next = probe_next(off, idx, mask); + off = next.0; + idx = next.1; + }; +}; diff --git a/ds/map/swiss_siphash/del.ha b/ds/map/swiss_siphash/del.ha index 24d45ff8a9dafddfe70a290663e49471b3f58128..150b37c0bb9224ccc89612fb9e09181fecb92ef5 100644 --- a/ds/map/swiss_siphash/del.ha +++ b/ds/map/swiss_siphash/del.ha @@ -1,47 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 -// SPDX-FileCopyrightText: 2024 The Cockroach Authors -// SPDX-FileCopyrightText: 2025 Runxi Yu +// SPDX-FileCopyrightText: 2025 Runxi Yu -use bytes; +use ds::map; -// Deletes an item from a [[map]]. Returns the removed value or void. +// Deletes an item from a [[map]]. export fn del(m: *map, key: []u8) (*opaque | void) = { - if (len(m.groups) == 0) return; - let hv = hash64(m, key): u64; - let t = h2(hv); - let mask = m.group_mask; - let off: size = (h1(hv): size) & mask; - let idx: size = 0; - - for (true) { - let g = &m.groups[off]; - for (let i = 0z; i < GROUP_SIZE; i += 1) { - let c = g.ctrl[i]; - if (is_full_ctrl(c) && c == t) { - if (bytes::equal(g.keys[i], key)) { - let v = g.vals[i]; - g.ctrl[i] = CTRL_DELETED; - g.keys[i] = []; - g.vals[i] = null; - m.used -= 1; - m.tombs += 1; - // elide the tombstones if exceed 1/3 of the capacity - if (m.tombs * 3 >= capacity_slots(m)) { - rehash_in_place(m); - }; - match (v) { - case null => - abort("map: null internal state escaped"); - case let p: *opaque => - return p; - }; - }; - } else if (c == CTRL_EMPTY) { - return; - }; - }; - let next = probe_next(off, idx, mask); - off = next.0; - idx = next.1; - }; + return map::del(m.inner, key); }; diff --git a/ds/map/swiss_siphash/finish.ha b/ds/map/swiss_siphash/finish.ha index 640dd673472c4a9817496e481eb97f2eded8d80f..36bdcdf3b54ef0cfa21b2cdc19db572ad6ca094c 100644 --- a/ds/map/swiss_siphash/finish.ha +++ b/ds/map/swiss_siphash/finish.ha @@ -1,11 +1,11 @@ // SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 -// SPDX-FileCopyrightText: 2024 The Cockroach Authors -// SPDX-FileCopyrightText: 2025 Runxi Yu +// SPDX-FileCopyrightText: 2025 Runxi Yu + +use ds::map; // Frees resources associated with a [[map]]. export fn finish(m: *map) void = { - if (len(m.groups) != 0) { - free(m.groups); - }; + map::finish(m.inner); + free(m.key); free(m); }; diff --git a/ds/map/swiss_siphash/get.ha b/ds/map/swiss_siphash/get.ha index e44a8dbcbb51da74b5f87f660c5f9c7109a30ac8..591eb2b19845bfc74f8c1ce01ebb8c69e99b0761 100644 --- a/ds/map/swiss_siphash/get.ha +++ b/ds/map/swiss_siphash/get.ha @@ -1,37 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 -// SPDX-FileCopyrightText: 2024 The Cockroach Authors -// SPDX-FileCopyrightText: 2025 Runxi Yu +// SPDX-FileCopyrightText: 2025 Runxi Yu -use bytes; +use ds::map; // Gets an item from a [[map]] by key, returning void if not found. export fn get(m: *map, key: []u8) (*opaque | void) = { - if (len(m.groups) == 0) return; - let hv = hash64(m, key): u64; - let t = h2(hv); - let mask = m.group_mask; - let off: size = (h1(hv): size) & mask; - let idx: size = 0; - - for (true) { - let g = &m.groups[off]; - for (let i = 0z; i < GROUP_SIZE; i += 1) { - let c = g.ctrl[i]; - if (is_full_ctrl(c) && c == t) { - if (bytes::equal(g.keys[i], key)) { - match (g.vals[i]) { - case null => - abort("map: null internal state escaped"); - case let p: *opaque => - return p; - }; - }; - } else if (c == CTRL_EMPTY) { - return; - }; - }; - let next = probe_next(off, idx, mask); - off = next.0; - idx = next.1; - }; + return map::get(m.inner, key); }; diff --git a/ds/map/swiss_siphash/internal.ha b/ds/map/swiss_siphash/internal.ha index 13338f93cf912317adfa569c8246e0fe8ff6b175..c3c9bd1009b95e1c184cad09079f7590a91835a3 100644 --- a/ds/map/swiss_siphash/internal.ha +++ b/ds/map/swiss_siphash/internal.ha @@ -1,150 +1,19 @@ // SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 -// SPDX-FileCopyrightText: 2024 The Cockroach Authors -// SPDX-FileCopyrightText: 2025 Runxi Yu +// SPDX-FileCopyrightText: 2025 Runxi Yu -use bytes; use hash; use hash::siphash; -export def GROUP_SIZE: size = 8z; -export def CTRL_EMPTY: u8 = 0x80; -export def CTRL_DELETED: u8 = 0xFE; - -export type group = struct { - ctrl: [GROUP_SIZE]u8, - keys: [GROUP_SIZE][]u8, - vals: [GROUP_SIZE]nullable *opaque, -}; - -fn group_set_empty(g: *group) void = { - for (let i = 0z; i < GROUP_SIZE; i += 1) { - g.ctrl[i] = CTRL_EMPTY; - g.keys[i] = []; - g.vals[i] = null; +fn hash64(params: nullable *opaque, key: []u8) size = { + let keyptr = match (params) { + case null => + abort("ds::map::swiss_siphash: missing key"); + case let p: *opaque => + yield (p: *[16]u8); }; -}; - -fn is_full_ctrl(c: u8) bool = (c & 0x80) == 0 && c != CTRL_DELETED; -fn hash64(m: *map, key: []u8) size = { - let h = siphash::siphash(2, 4, &m.siphash_key); + let h = siphash::siphash(2, 4, keyptr); defer hash::close(&h); hash::write(&h, key); return siphash::sum(&h): size; }; - -fn h1(h: u64) u64 = h >> 7u64; -fn h2(h: u64) u8 = (h & 0x7Fu64): u8; - -fn probe_next(off: size, idx: size, mask: size) (size, size) = { - let nidx = idx + 1; - let noff = (off + nidx) & mask; - return (noff, nidx); -}; - -fn capacity_slots(m: *map) size = (m.group_mask + 1) * GROUP_SIZE; - -fn max_used_with_tombs(m: *map) size = { - return (capacity_slots(m) * 7z) / 8z; -}; - -fn ensure_capacity_for_insert(m: *map) (void | nomem) = { - if (m.used + m.tombs < max_used_with_tombs(m)) { - return; - }; - return resize(m, (m.group_mask + 1) * 2); -}; - -fn rehash_in_place(m: *map) void = { - if (len(m.groups) == 0) return; - let new_groups: []group = alloc([group{...}...], (m.group_mask + 1))!; - for (let i = 0z; i < len(new_groups); i += 1) { - group_set_empty(&new_groups[i]); - }; - let old = m.groups; - m.groups = new_groups; - let old_groups = old; - let old_mask = m.group_mask; - m.used = 0; - m.tombs = 0; - - for (let gi = 0z; gi <= old_mask; gi += 1) { - let g = &old_groups[gi]; - for (let si = 0z; si < GROUP_SIZE; si += 1) { - let c = g.ctrl[si]; - if (!is_full_ctrl(c)) continue; - let k = g.keys[si]; - let v = g.vals[si]; - unchecked_put(m, k, v); - }; - }; - free(old_groups); -}; - -fn resize(m: *map, new_groups_len: size) (void | nomem) = { - if (new_groups_len == 0) new_groups_len = 1; - let gs: []group = match (alloc([group{...}...], new_groups_len)) { - case let a: []group => yield a; - case nomem => return nomem; - }; - for (let i = 0z; i < len(gs); i += 1) { - group_set_empty(&gs[i]); - }; - let old = m.groups; - let old_mask = m.group_mask; - m.groups = gs; - m.group_mask = new_groups_len - 1; - m.used = 0; - m.tombs = 0; - - for (let gi = 0z; gi <= old_mask; gi += 1) { - let g = &old[gi]; - for (let si = 0z; si < GROUP_SIZE; si += 1) { - let c = g.ctrl[si]; - if (!is_full_ctrl(c)) continue; - unchecked_put(m, g.keys[si], g.vals[si]); - }; - }; - if (len(old) != 0) { - free(old); - }; -}; - -fn unchecked_put(m: *map, key: []u8, val: nullable *opaque) void = { - let hv = hash64(m, key): u64; - let t = h2(hv); - let mask = m.group_mask; - let off: size = (h1(hv): size) & mask; - let idx: size = 0; - - for (true) { - let g = &m.groups[off]; - let first_dead: (size | void) = void; - for (let i = 0z; i < GROUP_SIZE; i += 1) { - let c = g.ctrl[i]; - if (is_full_ctrl(c)) { - continue; - } else if (c == CTRL_DELETED) { - if (first_dead is void) first_dead = i; - } else { - let slot = match (first_dead) { - case void => yield i; - case let di: size => yield di; - }; - g.keys[slot] = key; - g.vals[slot] = val; - g.ctrl[slot] = t; - m.used += 1; - if (slot == i) { - void; - } else { - m.tombs -= 1; - }; - return; - }; - }; - let next = probe_next(off, idx, mask); - off = next.0; - idx = next.1; - }; -}; diff --git a/ds/map/swiss_siphash/map.ha b/ds/map/swiss_siphash/map.ha index e70ef4ed44d0eed1067c55749d93bebac4ffeb19..15d21ff2abddb7f462d1096c02a25c713d242710 100644 --- a/ds/map/swiss_siphash/map.ha +++ b/ds/map/swiss_siphash/map.ha @@ -1,19 +1,15 @@ // SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 -// SPDX-FileCopyrightText: 2024 The Cockroach Authors -// SPDX-FileCopyrightText: 2025 Runxi Yu +// SPDX-FileCopyrightText: 2025 Runxi Yu use ds::map; -// Swiss table based map from []u8 to *opaque. +// Swiss-table based map from []u8 to *opaque, using SipHash. // // You are advised to create these with [[new]]. export type map = struct { vt: map::map, - group_mask: size, - used: size, - tombs: size, - siphash_key: [16]u8, - groups: []group, + inner: *map::map, + key: *[16]u8, }; const _vt: map::vtable = map::vtable { diff --git a/ds/map/swiss_siphash/new.ha b/ds/map/swiss_siphash/new.ha index 96e2dde3d2af665a179e8b305b0c321e03946b2a..a9a694616185359b3a74d1e02342071c29ad6663 100644 --- a/ds/map/swiss_siphash/new.ha +++ b/ds/map/swiss_siphash/new.ha @@ -1,39 +1,45 @@ // SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 -// SPDX-FileCopyrightText: 2024 The Cockroach Authors -// SPDX-FileCopyrightText: 2025 Runxi Yu +// SPDX-FileCopyrightText: 2025 Runxi Yu use errors; use ds::map; +use ds::map::swiss; -// Creates a new [[map]] with an initial number of groups and SipHash key. +// Creates a new [[map]] with an initial number of groups and the SipHash key. // // n_groups must be greater than zero. -export fn new(n_groups: size, siphash_key: [16]u8) (*map | errors::invalid | nomem) = { - if (n_groups == 0) { - return errors::invalid; - }; - - let v: size = 1; - for (v < n_groups) { - v *= 2; - }; - let groups_count = v; - - let gs: []group = match (alloc([group{...}...]: []group, groups_count)) { - case let a: []group => yield a; +export fn new( + n_groups: size, + siphash_key: [16]u8, +) (*map | errors::invalid | nomem) = { + let keybox = match (alloc(siphash_key)) { + case let kp: *[16]u8 => yield kp; case nomem => return nomem; }; - for (let i = 0z; i < len(gs); i += 1) { - group_set_empty(&gs[i]); + + let inner = match (swiss::new( + n_groups, &hash64, (keybox: *opaque), + )) { + case let sm: *swiss::map => + yield (sm: *map::map); + case errors::invalid => + free(keybox); + return errors::invalid; + case nomem => + free(keybox); + return nomem; }; - let m = alloc(map { + let m = match (alloc(map { vt = &_vt, - group_mask = groups_count - 1, - used = 0, - tombs = 0, - siphash_key = siphash_key, - groups = gs, - })?; + inner = inner, + key = keybox, + })) { + case let p: *map => yield p; + case nomem => + map::finish(inner); + free(keybox); + return nomem; + }; return m; }; diff --git a/ds/map/swiss_siphash/set.ha b/ds/map/swiss_siphash/set.ha index 6cc817d8ea99196cbf62efc2ec19592f42ea8d2c..7751c5afba1129d964fb208049f6961372712004 100644 --- a/ds/map/swiss_siphash/set.ha +++ b/ds/map/swiss_siphash/set.ha @@ -1,96 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 -// SPDX-FileCopyrightText: 2024 The Cockroach Authors -// SPDX-FileCopyrightText: 2025 Runxi Yu +// SPDX-FileCopyrightText: 2025 Runxi Yu -use bytes; +use ds::map; // Sets an item in a [[map]], replacing any existing item with the same key. export fn set(m: *map, key: []u8, value: *opaque) (void | nomem) = { - let need_insert = true; - - if (len(m.groups) != 0) { - let hv0 = hash64(m, key); - let t0 = h2(hv0); - let mask0 = m.group_mask; - let off0: size = (h1(hv0): size) & mask0; - let idx0: size = 0; - - need_insert = false; - for (true) { - let g = &m.groups[off0]; - - for (let i = 0z; i < GROUP_SIZE; i += 1) { - let c = g.ctrl[i]; - if (is_full_ctrl(c) && c == t0) { - if (bytes::equal(g.keys[i], key)) { - g.vals[i] = value; - return; - }; - } else if (c == CTRL_EMPTY) { - need_insert = true; - break; - }; - }; - - if (need_insert) { - break; - }; - - let next = probe_next(off0, idx0, mask0); - off0 = next.0; - idx0 = next.1; - }; - } else { - need_insert = true; - }; - - if (!need_insert) { - return; - }; - - match (ensure_capacity_for_insert(m)) { - case void => yield; - case nomem => return nomem; - }; - - let hv = hash64(m, key); - let t = h2(hv); - let mask = m.group_mask; - let off: size = (h1(hv): size) & mask; - let idx: size = 0; - - for (true) { - let g = &m.groups[off]; - let first_dead: (size | void) = void; - - for (let i = 0z; i < GROUP_SIZE; i += 1) { - let c = g.ctrl[i]; - if (is_full_ctrl(c)) { - if (c == t && bytes::equal(g.keys[i], key)) { - g.vals[i] = value; - return; - }; - continue; - } else if (c == CTRL_DELETED) { - if (first_dead is void) first_dead = i; - } else { - let slot = match (first_dead) { - case void => yield i; - case let di: size => yield di; - }; - g.keys[slot] = key; - g.vals[slot] = value; - g.ctrl[slot] = t; - m.used += 1; - if (slot != i) { - m.tombs -= 1; - }; - return; - }; - }; - - let next = probe_next(off, idx, mask); - off = next.0; - idx = next.1; - }; + return map::set(m.inner, key, value); }; -- 2.48.1