Lindenii Project Forge
Login

scdoc

scdoc mirror for performance testing
Commit info
ID
a4193949ec755b3848a803a9b02364eeddbb1455
Author
Drew DeVault <sir@cmpwn.com>
Author date
Sat, 09 Dec 2017 23:18:57 -0500
Committer
Drew DeVault <sir@cmpwn.com>
Committer date
Sat, 09 Dec 2017 23:18:57 -0500
Actions
Initial commit
build
#ifndef _SCDOC_STRING_H
#define _SCDOC_STRING_H
#include <stdint.h>

struct str {
	char *str;
	size_t len, size;
};

typedef struct str str_t;

str_t *str_create();
void str_free(str_t *str);
void str_reset(str_t *str);
int str_append_ch(str_t *str, uint32_t ch);

#endif
#ifndef _SCDOC_UNICODE_H
#define _SCDOC_UNICODE_H
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>

// Technically UTF-8 supports up to 6 byte codepoints, but Unicode itself
// doesn't really bother with more than 4.
#define UTF8_MAX_SIZE 4

#define UTF8_INVALID 0x80

/**
 * Grabs the next UTF-8 character and advances the string pointer
 */
uint32_t utf8_decode(const char **str);

/**
 * Encodes a character as UTF-8 and returns the length of that character.
 */
size_t utf8_encode(char *str, uint32_t ch);

/**
 * Returns the size of the next UTF-8 character
 */
int utf8_size(const char *str);

/**
 * Returns the size of a UTF-8 character
 */
size_t utf8_chsize(uint32_t ch);

/**
 * Reads and returns the next character from the file.
 */
uint32_t utf8_fgetch(FILE *f);

/**
 * Writes this character to the file and returns the number of bytes written.
 */
size_t utf8_fputch(FILE *f, uint32_t ch);

#endif
#ifndef _SCDOC_PARSER_H
#define _SCDOC_PARSER_H
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>

struct parser {
	FILE *input, *output;
	int line, col;
};

void parser_fatal(struct parser *parser, const char *err);
uint32_t parser_getch(struct parser *parser);
int roff_macro(struct parser *p, char *cmd, ...);

#endif
# TODO: Just use a makefile
project(
	'scdoc',
	'c',
	license: 'MIT',
	meson_version: '>=0.43.0',
	default_options: [
		'c_std=c99',
		'warning_level=2',
		'werror=true',
	],
)

add_project_arguments('-Wno-unused-parameter', language: 'c')

executable(
	'scdoc', [
		'src/main.c',
		'src/string.c',
		'src/utf8_chsize.c',
		'src/utf8_decode.c',
		'src/utf8_encode.c',
		'src/utf8_fgetch.c',
		'src/utf8_fputch.c',
		'src/utf8_size.c',
		'src/util.c',
	],
	include_directories: include_directories('include')
)
scdoc(5)

# NAME

scdoc - syntax description for scdoc markup language

# DESCRIPTION

scdoc is a tool designed to make the process of writing man pages more
friendly. It converts scdoc files into roff macros, which can then be converted
to man pages or a number of other formats. The syntax is inspired by, but not
directly taken from, markdown. Input files *must* use the UTF-8 encoding.

# PREAMBLE

Each scdoc file must begin with the following preamble:

	*name*(_section_)

The *name* is the name of the man page you are writing, and _section_ is the
section you're writing for (see *man*(1) for information on manual sections).

# SECTION HEADERS

Each section of your man page should begin with something similar to the
following:

	# HEADER NAME

Subsection headers are also understood - use two hashes. Each header must have
an empty line on either side.

# PARAGRAPHS

Begin a new paragraph with an empty line.

# FORMATTING

Text can be made *bold* or _underlined_ with asterisks and underscores: \*bold\*
or \_underlined\_.

# INDENTATION

You may indent lines with tab characters ("\t") to indent them by 4 spaces in
the output. Indented lines may not contain headers.

# LISTS

You may start bulleted lists with dashes, like so:

```
- Item 1
- Item 2
- Item 3
```

You may also use numbered lists like so:

```
1. Item 1
2. Item 2
3. Item 3
```

# LITERAL TEXT

You may turn off scdoc formatting and output literal text with escape codes and
literal blocks. Inserting a \\ into your source will cause the subsequent symbol
to be treated as a literal and copied directly to the output. You may also make
blocks of literal syntax like so:

```
\`\`\`
_This formatting_ will *not* be interpreted by scdoc.
\`\`\`
```

These blocks will be indented one level. Note that literal text is shown
literally in the man viewer - that is, it's not a means for inserting your own
roff macros into the output.
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#include "string.h"
#include "unicode.h"
#include "util.h"

char date[256];

static int parse_section(struct parser *p) {
	str_t *section = str_create();
	uint32_t ch;
	while ((ch = parser_getch(p)) != UTF8_INVALID) {
		if (isdigit(ch)) {
			assert(str_append_ch(section, ch) != -1);
		} else if (ch == ')') {
			if (!section->str) {
				break;
			}
			int sec = strtol(section->str, NULL, 10);
			if (sec < 1 || sec > 9) {
				parser_fatal(p, "Expected section between 1 and 9");
				break;
			}
			str_free(section);
			return sec;
		} else {
			parser_fatal(p, "Expected digit or )");
			break;
		}
	};
	parser_fatal(p, "Expected manual section");
	return -1;
}

static void parse_preamble(struct parser *p) {
	str_t *name = str_create();
	int section = -1;
	uint32_t ch;
	do {
		ch = parser_getch(p);
		if (isalnum(ch)) {
			assert(str_append_ch(name, ch) != -1);
		} else if (ch == '(') {
			section = parse_section(p);
		} else if (ch == '\n') {
			if (name->len == 0) {
				parser_fatal(p, "Expected preamble");
			}
			if (section == -1) {
				parser_fatal(p, "Expected manual section");
			}
			char sec[2] = { '0' + section, 0 };
			roff_macro(p, "TH", name->str, sec, date, NULL);
			break;
		}
	} while (ch != UTF8_INVALID);
	str_free(name);
}

static void output_preamble(struct parser *p) {
	// TODO: Add version here
	fprintf(p->output, ".\\\" Generated by scdoc\n");
	fprintf(p->output, ".\\\" Fix weird qutation marks:\n");
	fprintf(p->output, ".\\\" http://bugs.debian.org/507673\n");
	fprintf(p->output, ".\\\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html\n");
	fprintf(p->output, ".ie \\n(.g .ds Aq \\(aq\n");
	fprintf(p->output, ".el       .ds Aq '\n");
	fprintf(p->output, ".\\\" Disable hyphenation:\n");
	roff_macro(p, "nh", NULL);
	fprintf(p->output, ".\\\" Generated content:\n");
}

int main(int argc, char **argv) {
	if (argc > 1) {
		fprintf(stderr, "Usage: scdoc < input.scd > output.roff");
		return 1;
	}
	time_t now;
	time(&now);
	struct tm *now_tm = localtime(&now);
	strftime(date, sizeof(date), "%F", now_tm);
	struct parser p = {
		.input = stdin,
		.output = stdout,
		.line = 1,
		.col = 1
	};
	output_preamble(&p);
	parse_preamble(&p);
	return 0;
}
#include <stdlib.h>
#include <stdint.h>
#include "string.h"
#include "unicode.h"

static void sanity_check(str_t *str) {
	if (str->str == NULL) {
		str->str = malloc(16);
		str->size = 16;
		str->len = 0;
		str->str[0] = '\0';
	}
}

static int ensure_capacity(str_t *str, size_t len) {
	if (len + 1 >= str->size) {
		char *new = realloc(str->str, str->size * 2);
		if (!new) {
			return 0;
		}
		str->str = new;
		str->size *= 2;
	}
	return 1;
}

str_t *str_create() {
	return calloc(sizeof(str_t), 1);
}

void str_free(str_t *str) {
	if (!str) return;
	free(str->str);
	free(str);
}

void str_reset(str_t *str) {
	str->len = 0;
	str->str[0] = '\0';
}

int str_append_ch(str_t *str, uint32_t ch) {
	int size = utf8_chsize(ch);
	if (size <= 0) {
		return -1;
	}
	sanity_check(str);
	if (!ensure_capacity(str, str->len + size)) {
		return -1;
	}
	utf8_encode(&str->str[str->len], ch);
	str->len += size;
	str->str[str->len] = '\0';
	return size;
}
#include <stdint.h>
#include <stddef.h>
#include "unicode.h"

size_t utf8_chsize(uint32_t ch) {
	if (ch < 0x80) {
		return 1;
	} else if (ch < 0x800) {
		return 2;
	} else if (ch < 0x10000) {
		return 3;
	}
	return 4;
}
#include <stdint.h>
#include <stddef.h>
#include "unicode.h"

uint8_t masks[] = {
	0x7F,
	0x1F,
	0x0F,
	0x07,
	0x03,
	0x01
};

uint32_t utf8_decode(const char **char_str) {
	uint8_t **s = (uint8_t **)char_str;

	uint32_t cp = 0;
	if (**s < 128) {
		// shortcut
		cp = **s;
		++*s;
		return cp;
	}
	int size = utf8_size((char *)*s);
	if (size == -1) {
		++*s;
		return UTF8_INVALID;
	}
	uint8_t mask = masks[size - 1];
	cp = **s & mask;
	++*s;
	while (--size) {
		cp <<= 6;
		cp |= **s & 0x3f;
		++*s;
	}
	return cp;
}
#include <stdint.h>
#include <stddef.h>
#include "unicode.h"

size_t utf8_encode(char *str, uint32_t ch) {
	size_t len = 0;
	uint8_t first;

	if (ch < 0x80) {
		first = 0;
		len = 1;
	} else if (ch < 0x800) {
		first = 0xc0;
		len = 2;
	} else if (ch < 0x10000) {
		first = 0xe0;
		len = 3;
	} else {
		first = 0xf0;
		len = 4;
	}

	for (size_t i = len - 1; i > 0; --i) {
		str[i] = (ch & 0x3f) | 0x80;
		ch >>= 6;
	}

	str[0] = ch | first;
	return len;
}
#include <stdint.h>
#include <stdio.h>
#include "unicode.h"

uint32_t utf8_fgetch(FILE *f) {
	char buffer[UTF8_MAX_SIZE];
	int c = fgetc(f);
	if (c == EOF) {
		return UTF8_INVALID;
	}
	buffer[0] = (char)c;
	int size = utf8_size(buffer);
	if (size > 1) {
		int amt = fread(&buffer[1], 1, size - 1, f);
		if (amt != size - 1) {
			return UTF8_INVALID;
		}
	}
	const char *ptr = buffer;
	return utf8_decode(&ptr);
}
#include <stdint.h>
#include <stdio.h>
#include "unicode.h"

size_t utf8_fputch(FILE *f, uint32_t ch) {
	char buffer[UTF8_MAX_SIZE];
	char *ptr = buffer;
	size_t size = utf8_encode(ptr, ch);
	return fwrite(&buffer, 1, size, f);
}
#include <stdint.h>
#include <stddef.h>
#include "unicode.h"

struct {
	uint8_t mask;
	uint8_t result;
	int octets;
} sizes[] = {
	{ 0x80, 0x00, 1 },
	{ 0xE0, 0xC0, 2 },
	{ 0xF0, 0xE0, 3 },
	{ 0xF8, 0xF0, 4 },
	{ 0xFC, 0xF8, 5 },
	{ 0xFE, 0xF8, 6 },
	{ 0x80, 0x80, -1 },
};

int utf8_size(const char *s) {
	uint8_t c = (uint8_t)*s;
	for (size_t i = 0; i < sizeof(sizes) / 2; ++i) {
		if ((c & sizes[i].mask) == sizes[i].result) {
			return sizes[i].octets;
		}
	}
	return -1;
}
#include <stdarg.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include "unicode.h"
#include "util.h"

void parser_fatal(struct parser *parser, const char *err) {
	fprintf(stderr, "Error at %d:%d: %s\n",
			parser->line, parser->col, err);
	fclose(parser->input);
	fclose(parser->output);
	exit(1);
}

uint32_t parser_getch(struct parser *parser) {
	uint32_t ch = utf8_fgetch(parser->input);
	if (ch == '\n') {
		parser->col = 0;
		++parser->line;
	} else {
		++parser->col;
	}
	return ch;
}

int roff_macro(struct parser *p, char *cmd, ...) {
	FILE *f = p->output;
	int l = fprintf(f, ".%s", cmd);
	va_list ap;
	va_start(ap, cmd);
	const char *arg;
	while ((arg = va_arg(ap, const char *))) {
		fputc(' ', f);
		fputc('"', f);
		while (*arg) {
			uint32_t ch = utf8_decode(&arg);
			if (ch == '"') {
				fputc('\\', f);
				++l;
			}
			l += utf8_fputch(f, ch);
		}
		fputc('"', f);
		l += 3;
	}
	va_end(ap);
	fputc('\n', f);
	return l + 1;
}