Lindenii Project Forge
Initial commit
build
#ifndef _SCDOC_STRING_H #define _SCDOC_STRING_H #include <stdint.h> struct str { char *str; size_t len, size; }; typedef struct str str_t; str_t *str_create(); void str_free(str_t *str); void str_reset(str_t *str); int str_append_ch(str_t *str, uint32_t ch); #endif
#ifndef _SCDOC_UNICODE_H #define _SCDOC_UNICODE_H #include <stddef.h> #include <stdint.h> #include <stdio.h> // Technically UTF-8 supports up to 6 byte codepoints, but Unicode itself // doesn't really bother with more than 4. #define UTF8_MAX_SIZE 4 #define UTF8_INVALID 0x80 /** * Grabs the next UTF-8 character and advances the string pointer */ uint32_t utf8_decode(const char **str); /** * Encodes a character as UTF-8 and returns the length of that character. */ size_t utf8_encode(char *str, uint32_t ch); /** * Returns the size of the next UTF-8 character */ int utf8_size(const char *str); /** * Returns the size of a UTF-8 character */ size_t utf8_chsize(uint32_t ch); /** * Reads and returns the next character from the file. */ uint32_t utf8_fgetch(FILE *f); /** * Writes this character to the file and returns the number of bytes written. */ size_t utf8_fputch(FILE *f, uint32_t ch); #endif
#ifndef _SCDOC_PARSER_H #define _SCDOC_PARSER_H #include <stdarg.h> #include <stdint.h> #include <stdio.h> struct parser { FILE *input, *output; int line, col; }; void parser_fatal(struct parser *parser, const char *err); uint32_t parser_getch(struct parser *parser); int roff_macro(struct parser *p, char *cmd, ...); #endif
# TODO: Just use a makefile project( 'scdoc', 'c', license: 'MIT', meson_version: '>=0.43.0', default_options: [ 'c_std=c99', 'warning_level=2', 'werror=true', ], ) add_project_arguments('-Wno-unused-parameter', language: 'c') executable( 'scdoc', [ 'src/main.c', 'src/string.c', 'src/utf8_chsize.c', 'src/utf8_decode.c', 'src/utf8_encode.c', 'src/utf8_fgetch.c', 'src/utf8_fputch.c', 'src/utf8_size.c', 'src/util.c', ], include_directories: include_directories('include') )
scdoc(5) # NAME scdoc - syntax description for scdoc markup language # DESCRIPTION scdoc is a tool designed to make the process of writing man pages more friendly. It converts scdoc files into roff macros, which can then be converted to man pages or a number of other formats. The syntax is inspired by, but not directly taken from, markdown. Input files *must* use the UTF-8 encoding. # PREAMBLE Each scdoc file must begin with the following preamble: *name*(_section_) The *name* is the name of the man page you are writing, and _section_ is the section you're writing for (see *man*(1) for information on manual sections). # SECTION HEADERS Each section of your man page should begin with something similar to the following: # HEADER NAME Subsection headers are also understood - use two hashes. Each header must have an empty line on either side. # PARAGRAPHS Begin a new paragraph with an empty line. # FORMATTING Text can be made *bold* or _underlined_ with asterisks and underscores: \*bold\* or \_underlined\_. # INDENTATION You may indent lines with tab characters ("\t") to indent them by 4 spaces in the output. Indented lines may not contain headers. # LISTS You may start bulleted lists with dashes, like so: ``` - Item 1 - Item 2 - Item 3 ``` You may also use numbered lists like so: ``` 1. Item 1 2. Item 2 3. Item 3 ``` # LITERAL TEXT You may turn off scdoc formatting and output literal text with escape codes and literal blocks. Inserting a \\ into your source will cause the subsequent symbol to be treated as a literal and copied directly to the output. You may also make blocks of literal syntax like so: ``` \`\`\` _This formatting_ will *not* be interpreted by scdoc. \`\`\` ``` These blocks will be indented one level. Note that literal text is shown literally in the man viewer - that is, it's not a means for inserting your own roff macros into the output.
#include <assert.h> #include <ctype.h> #include <stdio.h> #include <stdlib.h> #include <time.h> #include <unistd.h> #include "string.h" #include "unicode.h" #include "util.h" char date[256]; static int parse_section(struct parser *p) { str_t *section = str_create(); uint32_t ch; while ((ch = parser_getch(p)) != UTF8_INVALID) { if (isdigit(ch)) { assert(str_append_ch(section, ch) != -1); } else if (ch == ')') { if (!section->str) { break; } int sec = strtol(section->str, NULL, 10); if (sec < 1 || sec > 9) { parser_fatal(p, "Expected section between 1 and 9"); break; } str_free(section); return sec; } else { parser_fatal(p, "Expected digit or )"); break; } }; parser_fatal(p, "Expected manual section"); return -1; } static void parse_preamble(struct parser *p) { str_t *name = str_create(); int section = -1; uint32_t ch; do { ch = parser_getch(p); if (isalnum(ch)) { assert(str_append_ch(name, ch) != -1); } else if (ch == '(') { section = parse_section(p); } else if (ch == '\n') { if (name->len == 0) { parser_fatal(p, "Expected preamble"); } if (section == -1) { parser_fatal(p, "Expected manual section"); } char sec[2] = { '0' + section, 0 }; roff_macro(p, "TH", name->str, sec, date, NULL); break; } } while (ch != UTF8_INVALID); str_free(name); } static void output_preamble(struct parser *p) { // TODO: Add version here fprintf(p->output, ".\\\" Generated by scdoc\n"); fprintf(p->output, ".\\\" Fix weird qutation marks:\n"); fprintf(p->output, ".\\\" http://bugs.debian.org/507673\n"); fprintf(p->output, ".\\\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html\n"); fprintf(p->output, ".ie \\n(.g .ds Aq \\(aq\n"); fprintf(p->output, ".el .ds Aq '\n"); fprintf(p->output, ".\\\" Disable hyphenation:\n"); roff_macro(p, "nh", NULL); fprintf(p->output, ".\\\" Generated content:\n"); } int main(int argc, char **argv) { if (argc > 1) { fprintf(stderr, "Usage: scdoc < input.scd > output.roff"); return 1; } time_t now; time(&now); struct tm *now_tm = localtime(&now); strftime(date, sizeof(date), "%F", now_tm); struct parser p = { .input = stdin, .output = stdout, .line = 1, .col = 1 }; output_preamble(&p); parse_preamble(&p); return 0; }
#include <stdlib.h> #include <stdint.h> #include "string.h" #include "unicode.h" static void sanity_check(str_t *str) { if (str->str == NULL) { str->str = malloc(16); str->size = 16; str->len = 0; str->str[0] = '\0'; } } static int ensure_capacity(str_t *str, size_t len) { if (len + 1 >= str->size) { char *new = realloc(str->str, str->size * 2); if (!new) { return 0; } str->str = new; str->size *= 2; } return 1; } str_t *str_create() { return calloc(sizeof(str_t), 1); } void str_free(str_t *str) { if (!str) return; free(str->str); free(str); } void str_reset(str_t *str) { str->len = 0; str->str[0] = '\0'; } int str_append_ch(str_t *str, uint32_t ch) { int size = utf8_chsize(ch); if (size <= 0) { return -1; } sanity_check(str); if (!ensure_capacity(str, str->len + size)) { return -1; } utf8_encode(&str->str[str->len], ch); str->len += size; str->str[str->len] = '\0'; return size; }
#include <stdint.h> #include <stddef.h> #include "unicode.h" size_t utf8_chsize(uint32_t ch) { if (ch < 0x80) { return 1; } else if (ch < 0x800) { return 2; } else if (ch < 0x10000) { return 3; } return 4; }
#include <stdint.h> #include <stddef.h> #include "unicode.h" uint8_t masks[] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 }; uint32_t utf8_decode(const char **char_str) { uint8_t **s = (uint8_t **)char_str; uint32_t cp = 0; if (**s < 128) { // shortcut cp = **s; ++*s; return cp; } int size = utf8_size((char *)*s); if (size == -1) { ++*s; return UTF8_INVALID; } uint8_t mask = masks[size - 1]; cp = **s & mask; ++*s; while (--size) { cp <<= 6; cp |= **s & 0x3f; ++*s; } return cp; }
#include <stdint.h> #include <stddef.h> #include "unicode.h" size_t utf8_encode(char *str, uint32_t ch) { size_t len = 0; uint8_t first; if (ch < 0x80) { first = 0; len = 1; } else if (ch < 0x800) { first = 0xc0; len = 2; } else if (ch < 0x10000) { first = 0xe0; len = 3; } else { first = 0xf0; len = 4; } for (size_t i = len - 1; i > 0; --i) { str[i] = (ch & 0x3f) | 0x80; ch >>= 6; } str[0] = ch | first; return len; }
#include <stdint.h> #include <stdio.h> #include "unicode.h" uint32_t utf8_fgetch(FILE *f) { char buffer[UTF8_MAX_SIZE]; int c = fgetc(f); if (c == EOF) { return UTF8_INVALID; } buffer[0] = (char)c; int size = utf8_size(buffer); if (size > 1) { int amt = fread(&buffer[1], 1, size - 1, f); if (amt != size - 1) { return UTF8_INVALID; } } const char *ptr = buffer; return utf8_decode(&ptr); }
#include <stdint.h> #include <stdio.h> #include "unicode.h" size_t utf8_fputch(FILE *f, uint32_t ch) { char buffer[UTF8_MAX_SIZE]; char *ptr = buffer; size_t size = utf8_encode(ptr, ch); return fwrite(&buffer, 1, size, f); }
#include <stdint.h> #include <stddef.h> #include "unicode.h" struct { uint8_t mask; uint8_t result; int octets; } sizes[] = { { 0x80, 0x00, 1 }, { 0xE0, 0xC0, 2 }, { 0xF0, 0xE0, 3 }, { 0xF8, 0xF0, 4 }, { 0xFC, 0xF8, 5 }, { 0xFE, 0xF8, 6 }, { 0x80, 0x80, -1 }, }; int utf8_size(const char *s) { uint8_t c = (uint8_t)*s; for (size_t i = 0; i < sizeof(sizes) / 2; ++i) { if ((c & sizes[i].mask) == sizes[i].result) { return sizes[i].octets; } } return -1; }
#include <stdarg.h> #include <stdlib.h> #include <stdint.h> #include <stdio.h> #include "unicode.h" #include "util.h" void parser_fatal(struct parser *parser, const char *err) { fprintf(stderr, "Error at %d:%d: %s\n", parser->line, parser->col, err); fclose(parser->input); fclose(parser->output); exit(1); } uint32_t parser_getch(struct parser *parser) { uint32_t ch = utf8_fgetch(parser->input); if (ch == '\n') { parser->col = 0; ++parser->line; } else { ++parser->col; } return ch; } int roff_macro(struct parser *p, char *cmd, ...) { FILE *f = p->output; int l = fprintf(f, ".%s", cmd); va_list ap; va_start(ap, cmd); const char *arg; while ((arg = va_arg(ap, const char *))) { fputc(' ', f); fputc('"', f); while (*arg) { uint32_t ch = utf8_decode(&arg); if (ch == '"') { fputc('\\', f); ++l; } l += utf8_fputch(f, ch); } fputc('"', f); l += 3; } va_end(ap); fputc('\n', f); return l + 1; }