Hi… I am well aware that this diff view is very suboptimal. It will be fixed when the refactored server comes along!
git2d: Remove UTF-8 checks
/*- * SPDX-License-Identifier: MIT * SPDX-FileCopyrightText: Copyright (c) 2022 Frank Smit <https://61924.nl/> */ #include <string.h> #include <stdbool.h> #include "bare.h"
#include "utf8.h"
#define UNUSED(x) (void)(x)
enum {
U8SZ = 1,
U16SZ = 2,
U32SZ = 4,
U64SZ = 8,
MAXVARINTSZ = 10,
};
static bool
checkstr(const char *x, uint64_t sz)
{
if (x == NULL || sz == 0) {
return true;
}
int err = 0;
uint32_t cp = 0;
char *buf = (void *)x;
uint64_t chunk = 4;
char *pad = (char *)(char[4]){0, 0, 0, 0};
#define _utf8_decode(buf) \
do { \
buf = utf8_decode(buf, &cp, &err); \
if (err > 0) { \
return false; \
} \
} while (0)
for (; sz >= chunk; sz -= chunk) {
_utf8_decode(buf);
}
if (sz > 0) {
memcpy(pad, buf, sz);
_utf8_decode(pad);
}
#undef _utf8_decode
return true;
}
bare_error
bare_put_uint(struct bare_writer *ctx, uint64_t x)
{
uint64_t i = 0;
uint8_t b[MAXVARINTSZ];
while (x >= 0x80) {
b[i] = (uint8_t)x | 0x80;
x >>= 7;
i++;
}
b[i] = (uint8_t)x;
i++;
return ctx->write(ctx->buffer, b, i);
}
bare_error
bare_get_uint(struct bare_reader *ctx, uint64_t *x)
{
bare_error err = BARE_ERROR_NONE;
uint8_t shift = 0;
uint64_t result = 0;
for (uint8_t i = 0;i < 10;i++) {
uint8_t b;
err = ctx->read(ctx->buffer, &b, U8SZ);
if (err != BARE_ERROR_NONE) {
break;
}
if (b < 0x80) {
result |= (uint64_t)b << shift;
break;
} else {
result |= ((uint64_t)b & 0x7f) << shift;
shift += 7;
}
}
*x = result;
return err;
}
bare_error
bare_put_int(struct bare_writer *ctx, int64_t x)
{
uint64_t ux = (uint64_t)x << 1;
if (x < 0) {
ux = ~ux;
}
return bare_put_uint(ctx, ux);
}
bare_error
bare_get_int(struct bare_reader *ctx, int64_t *x)
{
uint64_t ux;
bare_error err = bare_get_uint(ctx, &ux);
if (err == BARE_ERROR_NONE) {
*x = (int64_t)(ux >> 1);
if ((ux & 1) != 0) {
*x = ~(*x);
}
}
return err;
}
bare_error
bare_put_u8(struct bare_writer *ctx, uint8_t x)
{
return ctx->write(ctx->buffer, &x, U8SZ);
}
bare_error
bare_get_u8(struct bare_reader *ctx, uint8_t *x)
{
return ctx->read(ctx->buffer, x, U8SZ);
}
bare_error
bare_put_u16(struct bare_writer *ctx, uint16_t x)
{
return ctx->write(ctx->buffer, (uint8_t[U16SZ]){x, x >> 8}, U16SZ);
}
bare_error
bare_get_u16(struct bare_reader *ctx, uint16_t *x)
{
bare_error err = ctx->read(ctx->buffer, x, U16SZ);
if (err == BARE_ERROR_NONE) {
*x = (uint16_t)((uint8_t *)x)[0]
| (uint16_t)((uint8_t *)x)[1] << 8;
}
return err;
}
bare_error
bare_put_u32(struct bare_writer *ctx, uint32_t x)
{
uint8_t buf[U32SZ];
buf[0] = (uint8_t)(x);
buf[1] = (uint8_t)(x >> 8);
buf[2] = (uint8_t)(x >> 16);
buf[3] = (uint8_t)(x >> 24);
return ctx->write(ctx->buffer, buf, U32SZ);
}
bare_error
bare_get_u32(struct bare_reader *ctx, uint32_t *x)
{
bare_error err = ctx->read(ctx->buffer, x, U32SZ);
if (err == BARE_ERROR_NONE) {
*x = (uint32_t)(((uint8_t *)x)[0])
| (uint32_t)(((uint8_t *)x)[1] << 8)
| (uint32_t)(((uint8_t *)x)[2] << 16)
| (uint32_t)(((uint8_t *)x)[3] << 24);
}
return err;
}
bare_error
bare_put_u64(struct bare_writer *ctx, uint64_t x)
{
uint8_t buf[U64SZ];
buf[0] = x;
buf[1] = x >> 8;
buf[2] = x >> 16;
buf[3] = x >> 24;
buf[4] = x >> 32;
buf[5] = x >> 40;
buf[6] = x >> 48;
buf[7] = x >> 56;
return ctx->write(ctx->buffer, buf, U64SZ);
}
bare_error
bare_get_u64(struct bare_reader *ctx, uint64_t *x)
{
bare_error err = ctx->read(ctx->buffer, x, U64SZ);
if (err == BARE_ERROR_NONE) {
*x = (uint64_t)((uint8_t *)x)[0]
| (uint64_t)((uint8_t *)x)[1] << 8
| (uint64_t)((uint8_t *)x)[2] << 16
| (uint64_t)((uint8_t *)x)[3] << 24
| (uint64_t)((uint8_t *)x)[4] << 32
| (uint64_t)((uint8_t *)x)[5] << 40
| (uint64_t)((uint8_t *)x)[6] << 48
| (uint64_t)((uint8_t *)x)[7] << 56;
}
return err;
}
bare_error
bare_put_i8(struct bare_writer *ctx, int8_t x)
{
return bare_put_u8(ctx, x);
}
bare_error
bare_get_i8(struct bare_reader *ctx, int8_t *x)
{
return bare_get_u8(ctx, (uint8_t *)x);
}
bare_error
bare_put_i16(struct bare_writer *ctx, int16_t x)
{
return bare_put_u16(ctx, x);
}
bare_error
bare_get_i16(struct bare_reader *ctx, int16_t *x)
{
return bare_get_u16(ctx, (uint16_t *)x);
}
bare_error
bare_put_i32(struct bare_writer *ctx, int32_t x)
{
return bare_put_u32(ctx, x);
}
bare_error
bare_get_i32(struct bare_reader *ctx, int32_t *x)
{
return bare_get_u32(ctx, (uint32_t *)x);
}
bare_error
bare_put_i64(struct bare_writer *ctx, int64_t x)
{
return bare_put_u64(ctx, x);
}
bare_error
bare_get_i64(struct bare_reader *ctx, int64_t *x)
{
return bare_get_u64(ctx, (uint64_t *)x);
}
bare_error
bare_put_f32(struct bare_writer *ctx, float x)
{
uint32_t b;
memcpy(&b, &x, U32SZ);
return bare_put_u32(ctx, b);
}
bare_error
bare_get_f32(struct bare_reader *ctx, float *x)
{
return ctx->read(ctx->buffer, x, U32SZ);
}
bare_error
bare_put_f64(struct bare_writer *ctx, double x)
{
uint64_t b;
memcpy(&b, &x, U64SZ);
return bare_put_u64(ctx, b);
}
bare_error
bare_get_f64(struct bare_reader *ctx, double *x)
{
return ctx->read(ctx->buffer, x, U64SZ);
}
bare_error
bare_put_bool(struct bare_writer *ctx, bool x)
{
return bare_put_u8(ctx, (uint8_t)x);
}
bare_error
bare_get_bool(struct bare_reader *ctx, bool *x)
{
return bare_get_u8(ctx, (uint8_t *)x);
}
bare_error
bare_put_fixed_data(struct bare_writer *ctx, const uint8_t *src, uint64_t sz)
{
return ctx->write(ctx->buffer, (void *)src, sz);
}
bare_error
bare_get_fixed_data(struct bare_reader *ctx, uint8_t *dst, uint64_t sz)
{
return ctx->read(ctx->buffer, dst, sz);
}
bare_error
bare_put_data(struct bare_writer *ctx, const uint8_t *src, uint64_t sz)
{
bare_error err = BARE_ERROR_NONE;
err = bare_put_uint(ctx, sz);
if (err == BARE_ERROR_NONE) {
err = bare_put_fixed_data(ctx, src, sz);
}
return err;
}
bare_error
bare_get_data(struct bare_reader *ctx, uint8_t *dst, uint64_t sz)
{
bare_error err = BARE_ERROR_NONE;
uint64_t ssz = 0;
err = bare_get_uint(ctx, &ssz);
if (err == BARE_ERROR_NONE) {
err = ssz <= sz \
? bare_get_fixed_data(ctx, dst, ssz) \
: BARE_ERROR_BUFFER_TOO_SMALL;
}
return err;
}
bare_error
bare_put_str(struct bare_writer *ctx, const char *src, uint64_t sz)
{
if (!checkstr(src, sz)) {
return BARE_ERROR_INVALID_UTF8;
}
return bare_put_data(ctx, (uint8_t *)src, sz);
}
bare_error
bare_get_str(struct bare_reader *ctx, char *dst, uint64_t sz)
{
bare_error err = bare_get_data(ctx, (uint8_t *)dst, sz);\
if (err == BARE_ERROR_NONE) {
err = !checkstr(dst, sz) ? BARE_ERROR_INVALID_UTF8 : err;
}
return err;
return bare_get_data(ctx, (uint8_t *)dst, sz);
}
/*-
* SPDX-License-Identifier: MIT
* SPDX-FileCopyrightText: Copyright (c) 2022 Frank Smit <https://61924.nl/>
*/
#ifndef BARE_H
#define BARE_H
#include <stdint.h>
#include <stdbool.h>
typedef enum {
BARE_ERROR_NONE,
BARE_ERROR_WRITE_FAILED,
BARE_ERROR_READ_FAILED,
BARE_ERROR_BUFFER_TOO_SMALL,
BARE_ERROR_INVALID_UTF8,
} bare_error;
typedef bare_error (*bare_write_func)(void *buffer, const void *src, uint64_t sz);
typedef bare_error (*bare_read_func)(void *buffer, void *dst, uint64_t sz);
struct bare_writer {
void *buffer;
bare_write_func write;
};
struct bare_reader {
void *buffer;
bare_read_func read;
};
bare_error bare_put_uint(struct bare_writer *ctx, uint64_t x); /* varuint */
bare_error bare_get_uint(struct bare_reader *ctx, uint64_t *x); /* varuint */
bare_error bare_put_u8(struct bare_writer *ctx, uint8_t x);
bare_error bare_get_u8(struct bare_reader *ctx, uint8_t *x);
bare_error bare_put_u16(struct bare_writer *ctx, uint16_t x);
bare_error bare_get_u16(struct bare_reader *ctx, uint16_t *x);
bare_error bare_put_u32(struct bare_writer *ctx, uint32_t x);
bare_error bare_get_u32(struct bare_reader *ctx, uint32_t *x);
bare_error bare_put_u64(struct bare_writer *ctx, uint64_t x);
bare_error bare_get_u64(struct bare_reader *ctx, uint64_t *x);
bare_error bare_put_int(struct bare_writer *ctx, int64_t x); /* varint */
bare_error bare_get_int(struct bare_reader *ctx, int64_t *x); /* varint */
bare_error bare_put_i8(struct bare_writer *ctx, int8_t x);
bare_error bare_get_i8(struct bare_reader *ctx, int8_t *x);
bare_error bare_put_i16(struct bare_writer *ctx, int16_t x);
bare_error bare_get_i16(struct bare_reader *ctx, int16_t *x);
bare_error bare_put_i32(struct bare_writer *ctx, int32_t x);
bare_error bare_get_i32(struct bare_reader *ctx, int32_t *x);
bare_error bare_put_i64(struct bare_writer *ctx, int64_t x);
bare_error bare_get_i64(struct bare_reader *ctx, int64_t *x);
bare_error bare_put_f32(struct bare_writer *ctx, float x);
bare_error bare_get_f32(struct bare_reader *ctx, float *x);
bare_error bare_put_f64(struct bare_writer *ctx, double x);
bare_error bare_get_f64(struct bare_reader *ctx, double *x);
bare_error bare_put_bool(struct bare_writer *ctx, bool x);
bare_error bare_get_bool(struct bare_reader *ctx, bool *x);
bare_error bare_put_fixed_data(struct bare_writer *ctx, const uint8_t *src, uint64_t sz);
bare_error bare_get_fixed_data(struct bare_reader *ctx, uint8_t *dst, uint64_t sz);
bare_error bare_put_data(struct bare_writer *ctx, const uint8_t *src, uint64_t sz);
bare_error bare_get_data(struct bare_reader *ctx, uint8_t *dst, uint64_t sz);
bare_error bare_put_str(struct bare_writer *ctx, const char *src, uint64_t sz);
bare_error bare_get_str(struct bare_reader *ctx, char *dst, uint64_t sz);
/* Note that the _str implementation here does not check for UTF-8 validity. */
#endif /* BARE_H */
/*- * SPDX-License-Identifier: AGPL-3.0-only * SPDX-FileCopyrightText: Copyright (c) 2025 Runxi Yu <https://runxiyu.org> */ #include "utf8.h" extern inline void *utf8_decode(void *buf, uint32_t *c, int *e);
/*-
* SPDX-License-Identifier: Unlicense
* SPDX-FileContributor: Chris Wellons <wellons@nullprogram.com>
*
* From: https://nullprogram.com/blog/2017/10/06/
*/
#ifndef UTF8_H
#define UTF8_H
#include <stdint.h>
/*
* Decode the next character, C, from BUF, reporting errors in E.
*
* Since this is a branchless decoder, four bytes will be read from the
* buffer regardless of the actual length of the next character. This
* means the buffer _must_ have at least three bytes of zero padding
* following the end of the data stream.
*
* Errors are reported in E, which will be non-zero if the parsed
* character was somehow invalid: invalid byte sequence, non-canonical
* encoding, or a surrogate half.
*
* The function returns a pointer to the next character. When an error
* occurs, this pointer will be a guess that depends on the particular
* error, but it will always advance at least one byte.
*/
inline void *
utf8_decode(void *buf, uint32_t *c, int *e)
{
static const char lengths[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0
};
static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07};
static const uint32_t mins[] = {4194304, 0, 128, 2048, 65536};
static const int shiftc[] = {0, 18, 12, 6, 0};
static const int shifte[] = {0, 6, 4, 2, 0};
uint8_t *s = buf;
int len = lengths[s[0] >> 3];
/*
* Compute the pointer to the next character early so that the next
* iteration can start working on the next character. Neither Clang
* nor GCC figure out this reordering on their own.
*/
uint8_t *next = s + len + !len;
/*
* Assume a four-byte character and load four bytes. Unused bits are
* shifted out.
*/
*c = (uint32_t)(s[0] & masks[len]) << 18;
*c |= (uint32_t)(s[1] & 0x3f) << 12;
*c |= (uint32_t)(s[2] & 0x3f) << 6;
*c |= (uint32_t)(s[3] & 0x3f) << 0;
*c >>= shiftc[len];
/* Accumulate the various error conditions. */
*e = (*c < mins[len]) << 6; /* non-canonical encoding */
*e |= ((*c >> 11) == 0x1b) << 7; /* surrogate half? */
*e |= (*c > 0x10FFFF) << 8; /* out of range? */
*e |= (s[1] & 0xc0) >> 2;
*e |= (s[2] & 0xc0) >> 4;
*e |= (s[3] ) >> 6;
*e ^= 0x2a; /* top two bits of each tail byte correct? */
*e >>= shifte[len];
return next;
}
#endif