Lindenii Project Forge
Warning: Due to various recent migrations, viewing non-HEAD refs may be broken.
/src/utf8_size.c (raw)
#include <stdint.h>
#include <stddef.h>
#include "unicode.h"
struct {
uint8_t mask;
uint8_t result;
int octets;
} sizes[] = {
{ 0x80, 0x00, 1 },
{ 0xE0, 0xC0, 2 },
{ 0xF0, 0xE0, 3 },
{ 0xF8, 0xF0, 4 },
{ 0xFC, 0xF8, 5 },
{ 0xFE, 0xF8, 6 },
{ 0x80, 0x80, -1 },
};
int utf8_size(const char *s) {
uint8_t c = (uint8_t)*s;
for (size_t i = 0; i < sizeof(sizes) / 2; ++i) {
if ((c & sizes[i].mask) == sizes[i].result) {
return sizes[i].octets;
}
}
return -1;
}