From 669cca494fd456dbfcf42801c791796c286c18ad Mon Sep 17 00:00:00 2001 From: Runxi Yu Date: Thu, 03 Apr 2025 10:25:53 +0800 Subject: [PATCH] git2d: Import BARE and UTF-8 utilities --- .gitignore | 1 + Makefile | 2 +- git2d/bare.c | 383 +++++++++++++++++++++++++++++++++++++++++++++++++++++ git2d/bare.h | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++ git2d/utf8.h | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++ diff --git a/.gitignore b/.gitignore index 10ad875a7c2f9601f7f132939c163cc4f76c6c19..a57474dd83205641e91e3fbeee926b821fd5e0eb 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ /version.go /vendor /source.tar.gz *.c.BAK +*.o diff --git a/Makefile b/Makefile index 597c1bcb7f874771cec6389eed1a6daa48b63760..dd43574980b75a528660def3468ee7d20a44927c 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ utils/colb: hookc/hookc: -git2d/git2d: git2d/*.c +git2d/git2d: git2d/main.o git2d/bare.o $(CC) $(CFLAGS) `pkg-config --cflags --libs libgit2` -lpthread -o git2d/git2d $< version.go: diff --git a/git2d/bare.c b/git2d/bare.c new file mode 100644 index 0000000000000000000000000000000000000000..23c0352d5524c9c4d11c91dd4aa46163a143ee5e --- /dev/null +++ b/git2d/bare.c @@ -0,0 +1,383 @@ +/*- + * SPDX-License-Identifier: MIT + * SPDX-FileCopyrightText: Copyright (c) 2022 Frank Smit + */ + +#include +#include + +#include "bare.h" +#include "utf8.h" + +#define UNUSED(x) (void)(x) + +enum { + U8SZ = 1, + U16SZ = 2, + U32SZ = 4, + U64SZ = 8, + MAXVARINTSZ = 10, +}; + +static bool +checkstr(const char *x, uint64_t sz) +{ + if (x == NULL || sz == 0) { + return true; + } + + int err = 0; + uint32_t cp = 0; + char *buf = (void *)x; + uint64_t chunk = 4; + char *pad = (char *)(char[4]){0, 0, 0, 0}; + +#define _utf8_decode(buf) \ + do { \ + buf = utf8_decode(buf, &cp, &err); \ + if (err > 0) { \ + return false; \ + } \ + } while (0) + + for (; sz >= chunk; sz -= chunk) { + _utf8_decode(buf); + } + + if (sz > 0) { + memcpy(pad, buf, sz); + _utf8_decode(pad); + } + +#undef _utf8_decode + + return true; +} + +bare_error +bare_put_uint(struct bare_writer *ctx, uint64_t x) +{ + uint64_t i = 0; + uint8_t b[MAXVARINTSZ]; + + while (x >= 0x80) { + b[i] = (uint8_t)x | 0x80; + x >>= 7; + i++; + } + + b[i] = (uint8_t)x; + i++; + + return ctx->write(ctx->buffer, b, i); +} + +bare_error +bare_get_uint(struct bare_reader *ctx, uint64_t *x) +{ + bare_error err = BARE_ERROR_NONE; + + uint8_t shift = 0; + uint64_t result = 0; + + for (uint8_t i = 0;i < 10;i++) { + uint8_t b; + + err = ctx->read(ctx->buffer, &b, U8SZ); + if (err != BARE_ERROR_NONE) { + break; + } + + if (b < 0x80) { + result |= (uint64_t)b << shift; + break; + } else { + result |= ((uint64_t)b & 0x7f) << shift; + shift += 7; + } + } + + *x = result; + + return err; +} + +bare_error +bare_put_int(struct bare_writer *ctx, int64_t x) +{ + uint64_t ux = (uint64_t)x << 1; + + if (x < 0) { + ux = ~ux; + } + + return bare_put_uint(ctx, ux); +} + +bare_error +bare_get_int(struct bare_reader *ctx, int64_t *x) +{ + uint64_t ux; + + bare_error err = bare_get_uint(ctx, &ux); + + if (err == BARE_ERROR_NONE) { + *x = (int64_t)(ux >> 1); + + if ((ux & 1) != 0) { + *x = ~(*x); + } + } + + return err; +} + +bare_error +bare_put_u8(struct bare_writer *ctx, uint8_t x) +{ + return ctx->write(ctx->buffer, &x, U8SZ); +} + +bare_error +bare_get_u8(struct bare_reader *ctx, uint8_t *x) +{ + return ctx->read(ctx->buffer, x, U8SZ); +} + +bare_error +bare_put_u16(struct bare_writer *ctx, uint16_t x) +{ + return ctx->write(ctx->buffer, (uint8_t[U16SZ]){x, x >> 8}, U16SZ); +} + +bare_error +bare_get_u16(struct bare_reader *ctx, uint16_t *x) +{ + bare_error err = ctx->read(ctx->buffer, x, U16SZ); + + if (err == BARE_ERROR_NONE) { + *x = (uint16_t)((uint8_t *)x)[0] + | (uint16_t)((uint8_t *)x)[1] << 8; + } + + return err; +} + +bare_error +bare_put_u32(struct bare_writer *ctx, uint32_t x) +{ + uint8_t buf[U32SZ]; + + buf[0] = (uint8_t)(x); + buf[1] = (uint8_t)(x >> 8); + buf[2] = (uint8_t)(x >> 16); + buf[3] = (uint8_t)(x >> 24); + + return ctx->write(ctx->buffer, buf, U32SZ); +} + +bare_error +bare_get_u32(struct bare_reader *ctx, uint32_t *x) +{ + bare_error err = ctx->read(ctx->buffer, x, U32SZ); + + if (err == BARE_ERROR_NONE) { + *x = (uint32_t)(((uint8_t *)x)[0]) + | (uint32_t)(((uint8_t *)x)[1] << 8) + | (uint32_t)(((uint8_t *)x)[2] << 16) + | (uint32_t)(((uint8_t *)x)[3] << 24); + } + + return err; +} + +bare_error +bare_put_u64(struct bare_writer *ctx, uint64_t x) +{ + uint8_t buf[U64SZ]; + + buf[0] = x; + buf[1] = x >> 8; + buf[2] = x >> 16; + buf[3] = x >> 24; + buf[4] = x >> 32; + buf[5] = x >> 40; + buf[6] = x >> 48; + buf[7] = x >> 56; + + return ctx->write(ctx->buffer, buf, U64SZ); +} + +bare_error +bare_get_u64(struct bare_reader *ctx, uint64_t *x) +{ + bare_error err = ctx->read(ctx->buffer, x, U64SZ); + + if (err == BARE_ERROR_NONE) { + *x = (uint64_t)((uint8_t *)x)[0] + | (uint64_t)((uint8_t *)x)[1] << 8 + | (uint64_t)((uint8_t *)x)[2] << 16 + | (uint64_t)((uint8_t *)x)[3] << 24 + | (uint64_t)((uint8_t *)x)[4] << 32 + | (uint64_t)((uint8_t *)x)[5] << 40 + | (uint64_t)((uint8_t *)x)[6] << 48 + | (uint64_t)((uint8_t *)x)[7] << 56; + } + + return err; +} + +bare_error +bare_put_i8(struct bare_writer *ctx, int8_t x) +{ + return bare_put_u8(ctx, x); +} + +bare_error +bare_get_i8(struct bare_reader *ctx, int8_t *x) +{ + return bare_get_u8(ctx, (uint8_t *)x); +} + +bare_error +bare_put_i16(struct bare_writer *ctx, int16_t x) +{ + return bare_put_u16(ctx, x); +} + +bare_error +bare_get_i16(struct bare_reader *ctx, int16_t *x) +{ + return bare_get_u16(ctx, (uint16_t *)x); +} + +bare_error +bare_put_i32(struct bare_writer *ctx, int32_t x) +{ + return bare_put_u32(ctx, x); +} + +bare_error +bare_get_i32(struct bare_reader *ctx, int32_t *x) +{ + return bare_get_u32(ctx, (uint32_t *)x); +} + +bare_error +bare_put_i64(struct bare_writer *ctx, int64_t x) +{ + return bare_put_u64(ctx, x); +} + +bare_error +bare_get_i64(struct bare_reader *ctx, int64_t *x) +{ + return bare_get_u64(ctx, (uint64_t *)x); +} + +bare_error +bare_put_f32(struct bare_writer *ctx, float x) +{ + uint32_t b; + memcpy(&b, &x, U32SZ); + + return bare_put_u32(ctx, b); +} + +bare_error +bare_get_f32(struct bare_reader *ctx, float *x) +{ + return ctx->read(ctx->buffer, x, U32SZ); +} + +bare_error +bare_put_f64(struct bare_writer *ctx, double x) +{ + uint64_t b; + memcpy(&b, &x, U64SZ); + + return bare_put_u64(ctx, b); +} + +bare_error +bare_get_f64(struct bare_reader *ctx, double *x) +{ + return ctx->read(ctx->buffer, x, U64SZ); +} + +bare_error +bare_put_bool(struct bare_writer *ctx, bool x) +{ + return bare_put_u8(ctx, (uint8_t)x); +} + +bare_error +bare_get_bool(struct bare_reader *ctx, bool *x) +{ + return bare_get_u8(ctx, (uint8_t *)x); +} + +bare_error +bare_put_fixed_data(struct bare_writer *ctx, uint8_t *src, uint64_t sz) +{ + return ctx->write(ctx->buffer, (void *)src, sz); +} + +bare_error +bare_get_fixed_data(struct bare_reader *ctx, uint8_t *dst, uint64_t sz) +{ + return ctx->read(ctx->buffer, dst, sz); +} + +bare_error +bare_put_data(struct bare_writer *ctx, uint8_t *src, uint64_t sz) +{ + bare_error err = BARE_ERROR_NONE; + + err = bare_put_uint(ctx, sz); + + if (err == BARE_ERROR_NONE) { + err = bare_put_fixed_data(ctx, src, sz); + } + + return err; +} + +bare_error +bare_get_data(struct bare_reader *ctx, uint8_t *dst, uint64_t sz) +{ + bare_error err = BARE_ERROR_NONE; + uint64_t ssz = 0; + + err = bare_get_uint(ctx, &ssz); + + if (err == BARE_ERROR_NONE) { + err = ssz <= sz \ + ? bare_get_fixed_data(ctx, dst, ssz) \ + : BARE_ERROR_BUFFER_TOO_SMALL; + } + + return err; +} + +bare_error +bare_put_str(struct bare_writer *ctx, char *src, uint64_t sz) +{ + if (!checkstr(src, sz)) { + return BARE_ERROR_INVALID_UTF8; + } + + return bare_put_data(ctx, (uint8_t *)src, sz); +} + +bare_error +bare_get_str(struct bare_reader *ctx, char *dst, uint64_t sz) +{ + bare_error err = bare_get_data(ctx, (uint8_t *)dst, sz);\ + + if (err == BARE_ERROR_NONE) { + err = !checkstr(dst, sz) ? BARE_ERROR_INVALID_UTF8 : err; + } + + return err; +} diff --git a/git2d/bare.h b/git2d/bare.h new file mode 100644 index 0000000000000000000000000000000000000000..389017f0eb40053789c94e6c49b77096c41ca1f4 --- /dev/null +++ b/git2d/bare.h @@ -0,0 +1,70 @@ +/*- + * SPDX-License-Identifier: MIT + * SPDX-FileCopyrightText: Copyright (c) 2022 Frank Smit + */ + +#ifndef BARE_H +#define BARE_H + +#include +#include + +typedef enum { + BARE_ERROR_NONE, + BARE_ERROR_WRITE_FAILED, + BARE_ERROR_READ_FAILED, + BARE_ERROR_BUFFER_TOO_SMALL, + BARE_ERROR_INVALID_UTF8, +} bare_error; + +typedef bare_error (*bare_write_func)(void *buffer, void *src, uint64_t sz); +typedef bare_error (*bare_read_func)(void *buffer, void *dst, uint64_t sz); + +struct bare_writer { + void *buffer; + bare_write_func write; +}; + +struct bare_reader { + void *buffer; + bare_read_func read; +}; + +bare_error bare_put_uint(struct bare_writer *ctx, uint64_t x); /* varuint */ +bare_error bare_get_uint(struct bare_reader *ctx, uint64_t *x); /* varuint */ +bare_error bare_put_u8(struct bare_writer *ctx, uint8_t x); +bare_error bare_get_u8(struct bare_reader *ctx, uint8_t *x); +bare_error bare_put_u16(struct bare_writer *ctx, uint16_t x); +bare_error bare_get_u16(struct bare_reader *ctx, uint16_t *x); +bare_error bare_put_u32(struct bare_writer *ctx, uint32_t x); +bare_error bare_get_u32(struct bare_reader *ctx, uint32_t *x); +bare_error bare_put_u64(struct bare_writer *ctx, uint64_t x); +bare_error bare_get_u64(struct bare_reader *ctx, uint64_t *x); + +bare_error bare_put_int(struct bare_writer *ctx, int64_t x); /* varint */ +bare_error bare_get_int(struct bare_reader *ctx, int64_t *x); /* varint */ +bare_error bare_put_i8(struct bare_writer *ctx, int8_t x); +bare_error bare_get_i8(struct bare_reader *ctx, int8_t *x); +bare_error bare_put_i16(struct bare_writer *ctx, int16_t x); +bare_error bare_get_i16(struct bare_reader *ctx, int16_t *x); +bare_error bare_put_i32(struct bare_writer *ctx, int32_t x); +bare_error bare_get_i32(struct bare_reader *ctx, int32_t *x); +bare_error bare_put_i64(struct bare_writer *ctx, int64_t x); +bare_error bare_get_i64(struct bare_reader *ctx, int64_t *x); + +bare_error bare_put_f32(struct bare_writer *ctx, float x); +bare_error bare_get_f32(struct bare_reader *ctx, float *x); +bare_error bare_put_f64(struct bare_writer *ctx, double x); +bare_error bare_get_f64(struct bare_reader *ctx, double *x); + +bare_error bare_put_bool(struct bare_writer *ctx, bool x); +bare_error bare_get_bool(struct bare_reader *ctx, bool *x); + +bare_error bare_put_fixed_data(struct bare_writer *ctx, uint8_t *src, uint64_t sz); +bare_error bare_get_fixed_data(struct bare_reader *ctx, uint8_t *dst, uint64_t sz); +bare_error bare_put_data(struct bare_writer *ctx, uint8_t *src, uint64_t sz); +bare_error bare_get_data(struct bare_reader *ctx, uint8_t *dst, uint64_t sz); +bare_error bare_put_str(struct bare_writer *ctx, char *src, uint64_t sz); +bare_error bare_get_str(struct bare_reader *ctx, char *dst, uint64_t sz); + +#endif /* BARE_H */ diff --git a/git2d/utf8.h b/git2d/utf8.h new file mode 100644 index 0000000000000000000000000000000000000000..894cbd508dd5acc90a3129592f607b87e420295e --- /dev/null +++ b/git2d/utf8.h @@ -0,0 +1,74 @@ +/*- + * SPDX-License-Identifier: Unlicense + * SPDX-FileContributor: Chris Wellons + * + * From: https://nullprogram.com/blog/2017/10/06/ + */ + +#ifndef UTF8_H +#define UTF8_H + +#include + +/* + * Decode the next character, C, from BUF, reporting errors in E. + * + * Since this is a branchless decoder, four bytes will be read from the + * buffer regardless of the actual length of the next character. This + * means the buffer _must_ have at least three bytes of zero padding + * following the end of the data stream. + * + * Errors are reported in E, which will be non-zero if the parsed + * character was somehow invalid: invalid byte sequence, non-canonical + * encoding, or a surrogate half. + * + * The function returns a pointer to the next character. When an error + * occurs, this pointer will be a guess that depends on the particular + * error, but it will always advance at least one byte. + */ +static void * +utf8_decode(void *buf, uint32_t *c, int *e) +{ + static const char lengths[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0 + }; + static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07}; + static const uint32_t mins[] = {4194304, 0, 128, 2048, 65536}; + static const int shiftc[] = {0, 18, 12, 6, 0}; + static const int shifte[] = {0, 6, 4, 2, 0}; + + uint8_t *s = buf; + int len = lengths[s[0] >> 3]; + + /* + * Compute the pointer to the next character early so that the next + * iteration can start working on the next character. Neither Clang + * nor GCC figure out this reordering on their own. + */ + uint8_t *next = s + len + !len; + + /* + * Assume a four-byte character and load four bytes. Unused bits are + * shifted out. + */ + *c = (uint32_t)(s[0] & masks[len]) << 18; + *c |= (uint32_t)(s[1] & 0x3f) << 12; + *c |= (uint32_t)(s[2] & 0x3f) << 6; + *c |= (uint32_t)(s[3] & 0x3f) << 0; + *c >>= shiftc[len]; + + /* Accumulate the various error conditions. */ + *e = (*c < mins[len]) << 6; /* non-canonical encoding */ + *e |= ((*c >> 11) == 0x1b) << 7; /* surrogate half? */ + *e |= (*c > 0x10FFFF) << 8; /* out of range? */ + *e |= (s[1] & 0xc0) >> 2; + *e |= (s[2] & 0xc0) >> 4; + *e |= (s[3] ) >> 6; + *e ^= 0x2a; /* top two bits of each tail byte correct? */ + *e >>= shifte[len]; + + return next; +} + +#endif -- 2.48.1