creole/creole.c

#include "creole.h"

/*************************************************************************
 * Static information
 ************************************************************************/

/* Arguments to opcodes can accept the following:
   * immediate values only (as of now, no values are like this)
   * register values only (push, pop, etc.)
   * either values (math operations)
   * labels (jumps)
   * none (do not give an argument)
 */
enum creole_arg_type {
	TYPE_NONE,
	TYPE_IMM,
	TYPE_REG,
	TYPE_VAL,
	TYPE_LAB,
	CREOLE_ARG_TYPE_LEN
};

/* C99+ allows for designating the array index when initializing arrays:
      [i] = v,
 * in C89 indicies are implicit from 0 to the maximum filled-in value.
 */
#define defop(s, n, a1, a2, a3) {n, {a1, a2, a3}}
static const struct {
	unsigned arglen;
	enum creole_arg_type argtype[CREOLE_MAX_ARG];
} opcode_info[CREOLE_OPCODE_LEN] = {
	defop(NOOP, 0, TYPE_NONE, TYPE_NONE, TYPE_NONE),
	defop(PUSH, 1, TYPE_VAL, TYPE_NONE, TYPE_NONE),
	defop(POP, 1, TYPE_REG, TYPE_NONE, TYPE_NONE),
	defop(ADD, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),
	defop(MUL, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),
	defop(DIV, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),
	defop(JL, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL),
	defop(CLB, 1, TYPE_LAB, TYPE_NONE, TYPE_NONE),
	defop(SYS, 1, TYPE_VAL, TYPE_NONE, TYPE_NONE)
};

/*************************************************************************
 * Reading from the buffer
 ************************************************************************/

static int read(struct creole_reader *r)
{
	if (r->left == 0)
		return -1;
	r->left--;
	return *r->p++;
}

static int read_eof(struct creole_reader *r)
{
	return r->left == 0;
}

/*************************************************************************
 * Pseudo-UTF-8 lexing
 *
 * Pseudo-UTF-8 is based off of UTF-8 but adds more
 * bytes and allows (requires!) overlong encodings.
 *
 * Possible values:
 *   0xxxxxxx                                              (7 bits)
 *   110HHHHx 10xxxxxx                                     (11 bits)
 *   1110HHHH 10xxxxxx 10xxxxxx                            (16 bits)
 *   11110HHH 10Hxxxxx 10xxxxxx 10xxxxxx                   (21 bits)
 *   111110HH 10HHxxxx 10xxxxxx 10xxxxxx 10xxxxxx          (26 bits)
 *   1111110H 10HHHxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx (31 bits)
 *   11111110 10HHHHxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx (36 bits)
 *            10xxxxxx
 ************************************************************************/

/* A Psuedo-UTF-8 sequence can be either
 *
 * * A 1 byte sequence, where the lower 7 bits are the encoded
 * * word (no high bits).

 * * A multi-byte sequence where the 4 MSB are flags, and the
 * * lower bits are the encoded word.
 */
#define MAX_HIGH_BITS 15
struct word {
	int len;
	int high_bits;
	creole_word word;
};

/* Decode a set of continuation bytes directly into the word. This assumes
 * that each continuation byte contains no high words.
 */
static int read_continue(struct creole_reader *r, struct word *w,
                         int to_read)
{
	int i;
	int r_ret;
	unsigned char c;

	for (i = 0; i < to_read; i++) {
		r_ret = read(r);
		if (r_ret < 0)
			return 0;
		/* Characters might not be 8 bits! */
		c = (unsigned char)(r_ret & 0xFF);
		if (c >> 6 != 0x2)
			return 0;
		w->word = w->word << 6 | (c & 0x3F);
	}

	return 1;
}

/* Start bytes must be treated differently. Depending on the scenario,
 * start bytes will contain parts of the encoded word and high-bit flags.
 * In some cases, not all of the high-bit flags are part of the start
 * byte.
 */
#define START_BYTE_NUM 7
static int parse_start_byte(unsigned char c, struct word *w)
{
	static const struct {
		/* The algorithm compares the mask to the start byte
		 * by shifting both to the right by the amount of 'x's
		 * (blank spaces). The array is arranged in reverse
		 * order so that the index indicates the amount of
		 * bits to shift.
		 */
		unsigned char mask;

		/* The word bits, if they exist, always start from the
		 * LSB, so there is no need to shift the bits away. The
		 * word_mask gets the low bits. If there are no bits, set
		 * to 0.
		 */
		unsigned char word_mask;

		/* The high bits may not start from the LSB. There needs
		 * to be a shift to get the bits to the LSB, and a mask
		 * to discard the higher bits.
		 */
		unsigned char high_bit_mask;
		int high_bit_shift;

		/* The amount of NORMAL continuation bytes to read.
		 * This does NOT include continuation bytes that have
		 * high-bit flags in them.
		 */
		int to_read;
	} start_data[START_BYTE_NUM-1] = {
		{0xFE, 0x00, 0, 0x0, 5}, /* 11111110 */
		{0xFC, 0x00, 0, 0x1, 4}, /* 1111110x */
		{0xF8, 0x00, 0, 0x3, 3}, /* 111110xx */
		{0xF0, 0x00, 0, 0x7, 2}, /* 11110xxx */
		{0xE0, 0x00, 0, 0xF, 2}, /* 1110xxxx */
		{0xC0, 0x01, 1, 0xF, 1}  /* 110xxxxx */
	};

	int i;

	for (i = 0; i < START_BYTE_NUM-1; i++) {
		if (c >> i == start_data[i].mask >> i) {
			w->len = START_BYTE_NUM - i;
			w->word = c & start_data[i].word_mask;
			w->high_bits = (c >> start_data[i].high_bit_shift)
			             & start_data[i].high_bit_mask;
			return start_data[i].to_read;
		}
	}
	/* i == 7 */
	if (c >> 7 == 0) {
		w->len = 1;
		w->word = c;
		w->high_bits = 0;
		return 0;
	}

	return -1;
}

/* This parses the first continuation byte if it is special. */
#define SPECIAL_CONTINUE_BYTE_NUM (START_BYTE_NUM - 3)
static void parse_special_byte(unsigned char c, struct word *w)
{
	/* The index denotes the amount of high bits that were in
	 * the start byte. This is the amount that the stored value
	 * must be shifted.
	 *
	 * The amount of bits that must be shifted out in the continue
	 * byte increase with the index. The amount shifted is (i + 2).
	 *
 	 * Each value stored in the array is the mask applied after
	 * shifting the continue byte bits.
	 */
	static const unsigned char mask[SPECIAL_CONTINUE_BYTE_NUM] = {
		0xF, /* 1111110 10HHHHxx */
		0x7, /* 111110H 10HHHxxx */
		0x3, /* 11110HH 10HHxxxx */
		0x1  /* 1110HHH 10Hxxxxx */
	};
	int i = w->len - START_BYTE_NUM;
	w->high_bits = (w->high_bits << i) | ((c >> (2 + i)) & mask[i]);
}

/* Parse an entire Pseudo-UTF8 sequence. */
static int decode_seq(struct creole_reader *r, struct word *w)
{
	int r_ret;
	int to_read;

	r_ret = read(r);
	if (r_ret < 0)
		return 0;

	to_read = parse_start_byte((unsigned char)(r_ret & 0xFF), w);
	if (to_read < 0)
		return 0;

	/* If to_read is not one less than w->len, that means there are
	 * high bits in the first continuation byte.
	 */
	if (w->len - to_read > 1) {
		r_ret = read(r);
		if (r_ret < 0)
			return 0;
		parse_special_byte((unsigned char)(r_ret & 0xFF), w);
	}

	return read_continue(r, w, to_read);
}

int creole_encode(creole_word i, unsigned encode_to, unsigned high_bits,
                  unsigned char buf[7])
{
	static const struct {
		creole_word max;
		unsigned char b1_mask;
		int high_bit_shift_b1;
		int high_bit_shift_to_right_b1;
		int data_shift_b1;

		int high_bit_mask_b2;
		int high_bit_shift_b2;
		unsigned char b2_data_mask;
	} d[] = {
		{0x7F,       0xC0, 0, 1,  6, 0x0, 0, 0x3F}, /* 2 */
		{0xFFF,      0xE0, 0, 0, 12, 0x0, 0, 0x3F}, /* 3 */
		{0x1FFFF,    0xF0, 1, 0, 17, 0x1, 5, 0x1F}, /* 4 */
		{0x3FFFFF,   0xF8, 2, 0, 22, 0x3, 4, 0x0F}, /* 5 */
		{0x7FFFFFF,  0xFC, 3, 0, 27, 0x7, 3, 0x07}, /* 6 */
		{0xFFFFFFFF, 0xFE, 4, 0, 32, 0xF, 2, 0x03}  /* 7 */
	};
	unsigned lb;
	unsigned j;

	if (encode_to > 8)
		return 0;

	if (encode_to == 1) {
		if (i < 0x80) {
			buf[0] = i;
			return 1;
		}
		return 0;
	}

	lb = encode_to - 2;
	if (i > d[lb].max)
		return 0;

	buf[0] = (d[lb].b1_mask | (high_bits >> d[lb].high_bit_shift_b1
	                           << d[lb].high_bit_shift_to_right_b1));
	/* shifts greater than or equal to the bit size of a type are
	 * undefined. Data in the first byte is always aligned with the LSB.
	 */
	if (d[lb].data_shift_b1 < sizeof(i) * CHAR_BIT)
	       buf[0] |= i >> d[lb].data_shift_b1;

	buf[1] = 0x80 | ((high_bits & d[lb].high_bit_mask_b2)
	                 << d[lb].high_bit_shift_b2)
	              | ((i >> ((encode_to - 2) * 6))
	                 & d[lb].b2_data_mask);

	for (j = 2; j < encode_to; j++) {
		buf[j] = 0x80 | ((i >> ((encode_to - j - 1) * 6)) & 0x3F);
	}

	return 1;
}

#if 0

/*************************************************************************
 * Parsing instructions
 *
 * This parses an entire instruction, which is
 *  a single byte sequence,
 *  zero or more multibyte sequences,
 *  one single byte of all zeros.
 *************************************************************************/

int creole_parse_line(struct creole_ins *ins, struct creole_reader *r)
{
	struct word w = {0};
	unsigned arg = 0;

	if (!decode_seq(r, &w))
		return 0;

	ins->opcode = w.word;
	if (w.word < CREOLE_ARG_TYPE_LEN || w.len != 1)
		return 0;

	for (arg = 0; arg < arglen; arg++) {
		if (!decode_seq(r, &w))
			return 0;
		if (w.len == 1)
			return 0;
		ins->w[arg] = w.word;
		ins->w_flags[arg] = w.high_bits;
	}

	if (!decode_seq(r, &w))
		return 0;
	if (w.word != 0 || w.len != 1)
		return 0;
	return 1;
}

/**************************************************************************
 * High level compiling interface
 *************************************************************************/

static void clear_instruction(struct creole_env *env,
                              struct creole_ins *ins)
{
	memset(ins, 0, sizeof(ins));
	env->prgptr--;
}

static int typecheck(enum creole_word_flag fl, enum creole_arg_type typ)
{
	switch (typ) {
	case TYPE_NONE: return 0;
	case TYPE_IMM: return fl == CREOLE_IMMEDIATE;
	case TYPE_REG: return fl == CREOLE_REGISTER;
	case TYPE_VAL: return fl == CREOLE_IMMEDIATE
	                    | fl == CREOLE_REGISTER;
	case TYPE_LAB: return fl == CREOLE_IMMEDIATE;
	default: return 0;
	}
}

static enum creole_compiler_ret typecheck_ins(struct creole_env *env,
                                              struct creole_ins *ins)
{
	unsigned i;

	for (i = 0; i < opcode_info[env->opcode].arglen; i++) {
		if (!typecheck(ins->w[i],
		               opcode_info[env->opcode].argtype[i]))
			return CREOLE_TYPE_ERROR;
	}
	return CREOLE_COMPILE_OK;
}

static enum creole_compiler_ret
handle_compiletime_immediate(struct creole_env *env,
                             struct creole_ins *ins)
{
	switch (ins->opcode) {
	case CREOLE_CLB:
		if (ins->w[0] >= ins->lablen)
			return CREOLE_LABEL_OVERFLOW;
		ins->lab[ins->w[0]] = env->prgptr;
		/* Delete instruction because it is a compile time
		 * instruction. Place next instruction in its place. */
		clear_instruction(env, ins);
		return CREOLE_COMPILE_OK;
	case CREOLE_NOOP:
		clear_instruction(env, ins);
		return CREOLE_COMPILE_OK;
	default:
		return typecheck_ins(env, ins);
	}
}

int creole_compile(struct creole_env *env, struct creole_reader *r)
{
	struct creole_ins *cur_ins = env->prg;
	int rcode;

	while (env->prgptr < env->prglen) {
		if (!creole_parse_line(cur_ins, r))
			return CREOLE_PARSE_ERROR;
		/* Increase prgptr here. If the instruction is a compile
		 * time instruction, then this will be decremented since
		 * the instruction will not be executed.
		 */
		env->prgptr++;

		rcode = handle_compiletime_immediate(env, cur_ins);
		if (rcode != CREOLE_COMPILE_OK)
			return rcode;

		if (read_eof(r))
			break;
		cur_ins += 1;
	}

	if (env->prgptr == env->prglen && *line)
		return CREOLE_PROGRAM_OVERFLOW;
	env->prgend = env->prgptr;
	env->prgptr = 0;
	return CREOLE_COMPILE_OK;
}

static creole_word read_word(struct creole_ins *ins, int i)
{
	if (env->w_flags[i] == CREOLE_REGISTER)
		return env->reg[env->w[i]];
	else
		return env->w[i];
}

int creole_step(struct creole_env *env)
{
	struct creole_ins *ins = env->prg + env->prgptr;
	creole_word a1, a2;

	if (env->prgptr == env->prgend)
		return CREOLE_STEP_STOP;
	env->prgptr++;

	switch (ins->opcode) {
	case CREOLE_PUSH:
		if (env->stkptr == env->stklen)
			return CREOLE_STACK_OVERFLOW;
		env->stk[env->stkptr++] = env->reg[env->w[0]];
		break;
	case CREOLE_POP:
		if (env->stkptr == 0)
			return CREOLE_STACK_OVERFLOW;
		env->reg[env->w[0]] = env->stk[--env->stkptr];
		break;
	case CREOLE_ADD:
		a1 = read_word(ins, 1);
		a2 = read_word(ins, 2);
		env->reg[env->w[0]] = a1 + a2;
		break;
	case CREOLE_MUL:
		a1 = read_word(ins, 1);
		a2 = read_word(ins, 2);
		env->reg[env->w[0]] = a1 * a2;
		break;
	case CREOLE_DIV:
		a1 = read_word(ins, 1);
		a2 = read_word(ins, 2);
		env->reg[env->w[0]] = a1 / a2;
		break;
	case CREOLE_JL:
		a1 = read_word(ins, 1);
		a2 = read_word(ins, 2);
		if (a1 < a2)
			env->prgptr = env->lab[env->w[0]];
		break;
	case SYS:
		a1 = read_word(ins, 1);
		/* do syscall */
		break;
	}
}
#endif
prototype bytecode interpreter 2023-02-05 06:44:37 -05:00			`#include "creole.h"`

			`/*************************************************************************`
			`* Static information`
			`************************************************************************/`

			`/* Arguments to opcodes can accept the following:`
			`* immediate values only (as of now, no values are like this)`
			`* register values only (push, pop, etc.)`
			`* either values (math operations)`
			`* labels (jumps)`
			`* none (do not give an argument)`
			`*/`
			`enum creole_arg_type {`
			`TYPE_NONE,`
			`TYPE_IMM,`
			`TYPE_REG,`
			`TYPE_VAL,`
			`TYPE_LAB,`
			`CREOLE_ARG_TYPE_LEN`
			`};`

			`/* C99+ allows for designating the array index when initializing arrays:`
			`[i] = v,`
			`* in C89 indicies are implicit from 0 to the maximum filled-in value.`
			`*/`
			`#define defop(s, n, a1, a2, a3) {n, {a1, a2, a3}}`
			`static const struct {`
			`unsigned arglen;`
			`enum creole_arg_type argtype[CREOLE_MAX_ARG];`
			`} opcode_info[CREOLE_OPCODE_LEN] = {`
			`defop(NOOP, 0, TYPE_NONE, TYPE_NONE, TYPE_NONE),`
			`defop(PUSH, 1, TYPE_VAL, TYPE_NONE, TYPE_NONE),`
			`defop(POP, 1, TYPE_REG, TYPE_NONE, TYPE_NONE),`
			`defop(ADD, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),`
			`defop(MUL, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),`
			`defop(DIV, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),`
			`defop(JL, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL),`
			`defop(CLB, 1, TYPE_LAB, TYPE_NONE, TYPE_NONE),`
			`defop(SYS, 1, TYPE_VAL, TYPE_NONE, TYPE_NONE)`
			`};`

			`/*************************************************************************`
			`* Reading from the buffer`
			`************************************************************************/`

			`static int read(struct creole_reader *r)`
			`{`
			`if (r->left == 0)`
			`return -1;`
			`r->left--;`
			`return *r->p++;`
			`}`

			`static int read_eof(struct creole_reader *r)`
			`{`
			`return r->left == 0;`
			`}`

			`/*************************************************************************`
			`* Pseudo-UTF-8 lexing`
			`*`
			`* Pseudo-UTF-8 is based off of UTF-8 but adds more`
			`* bytes and allows (requires!) overlong encodings.`
			`*`
			`* Possible values:`
			`* 0xxxxxxx (7 bits)`
encode 2023-02-06 23:38:39 -05:00			`* 110HHHHx 10xxxxxx (11 bits)`
			`* 1110HHHH 10xxxxxx 10xxxxxx (16 bits)`
			`* 11110HHH 10Hxxxxx 10xxxxxx 10xxxxxx (21 bits)`
			`* 111110HH 10HHxxxx 10xxxxxx 10xxxxxx 10xxxxxx (26 bits)`
			`* 1111110H 10HHHxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx (31 bits)`
			`* 11111110 10HHHHxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx (36 bits)`
prototype bytecode interpreter 2023-02-05 06:44:37 -05:00			`* 10xxxxxx`
			`************************************************************************/`

			`/* A Psuedo-UTF-8 sequence can be either`
			`*`
			`* * A 1 byte sequence, where the lower 7 bits are the encoded`
			`* * word (no high bits).`

			`* * A multi-byte sequence where the 4 MSB are flags, and the`
			`* * lower bits are the encoded word.`
			`*/`
encode 2023-02-06 23:38:39 -05:00			`#define MAX_HIGH_BITS 15`
prototype bytecode interpreter 2023-02-05 06:44:37 -05:00			`struct word {`
			`int len;`
			`int high_bits;`
			`creole_word word;`
			`};`

			`/* Decode a set of continuation bytes directly into the word. This assumes`
			`* that each continuation byte contains no high words.`
			`*/`
encode 2023-02-06 23:38:39 -05:00			`static int read_continue(struct creole_reader r, struct word w,`
prototype bytecode interpreter 2023-02-05 06:44:37 -05:00			`int to_read)`
			`{`
			`int i;`
			`int r_ret;`
			`unsigned char c;`

encode 2023-02-06 23:38:39 -05:00			`for (i = 0; i < to_read; i++) {`
prototype bytecode interpreter 2023-02-05 06:44:37 -05:00			`r_ret = read(r);`
			`if (r_ret < 0)`
			`return 0;`
			`/* Characters might not be 8 bits! */`
			`c = (unsigned char)(r_ret & 0xFF);`
			`if (c >> 6 != 0x2)`
			`return 0;`
encode 2023-02-06 23:38:39 -05:00			`w->word = w->word << 6 \| (c & 0x3F);`
prototype bytecode interpreter 2023-02-05 06:44:37 -05:00			`}`

			`return 1;`
			`}`

			`/* Start bytes must be treated differently. Depending on the scenario,`
			`* start bytes will contain parts of the encoded word and high-bit flags.`
			`* In some cases, not all of the high-bit flags are part of the start`
			`* byte.`
			`*/`
			`#define START_BYTE_NUM 7`
			`static int parse_start_byte(unsigned char c, struct word *w)`
			`{`
			`static const struct {`
			`/* The algorithm compares the mask to the start byte`
			`* by shifting both to the right by the amount of 'x's`
			`* (blank spaces). The array is arranged in reverse`
			`* order so that the index indicates the amount of`
			`* bits to shift.`
			`*/`
			`unsigned char mask;`
encode 2023-02-06 23:38:39 -05:00
prototype bytecode interpreter 2023-02-05 06:44:37 -05:00			`/* The word bits, if they exist, always start from the`
			`* LSB, so there is no need to shift the bits away. The`
			`* word_mask gets the low bits. If there are no bits, set`
			`* to 0.`
			`*/`
			`unsigned char word_mask;`

			`/* The high bits may not start from the LSB. There needs`
			`* to be a shift to get the bits to the LSB, and a mask`
			`* to discard the higher bits.`
			`*/`
			`unsigned char high_bit_mask;`
			`int high_bit_shift;`

			`/* The amount of NORMAL continuation bytes to read.`
			`* This does NOT include continuation bytes that have`
			`* high-bit flags in them.`
			`*/`
			`int to_read;`
encode 2023-02-06 23:38:39 -05:00			`} start_data[START_BYTE_NUM-1] = {`
prototype bytecode interpreter 2023-02-05 06:44:37 -05:00			`{0xFE, 0x00, 0, 0x0, 5}, /* 11111110 */`
encode 2023-02-06 23:38:39 -05:00			`{0xFC, 0x00, 0, 0x1, 4}, /* 1111110x */`
			`{0xF8, 0x00, 0, 0x3, 3}, /* 111110xx */`
			`{0xF0, 0x00, 0, 0x7, 2}, /* 11110xxx */`
			`{0xE0, 0x00, 0, 0xF, 2}, /* 1110xxxx */`
			`{0xC0, 0x01, 1, 0xF, 1} /* 110xxxxx */`
prototype bytecode interpreter 2023-02-05 06:44:37 -05:00			`};`

			`int i;`

encode 2023-02-06 23:38:39 -05:00			`for (i = 0; i < START_BYTE_NUM-1; i++) {`
prototype bytecode interpreter 2023-02-05 06:44:37 -05:00			`if (c >> i == start_data[i].mask >> i) {`
			`w->len = START_BYTE_NUM - i;`
			`w->word = c & start_data[i].word_mask;`
			`w->high_bits = (c >> start_data[i].high_bit_shift)`
			`& start_data[i].high_bit_mask;`
			`return start_data[i].to_read;`
			`}`
			`}`
encode 2023-02-06 23:38:39 -05:00			`/* i == 7 */`
			`if (c >> 7 == 0) {`
			`w->len = 1;`
			`w->word = c;`
			`w->high_bits = 0;`
			`return 0;`
			`}`
prototype bytecode interpreter 2023-02-05 06:44:37 -05:00
			`return -1;`
			`}`

			`/* This parses the first continuation byte if it is special. */`
			`#define SPECIAL_CONTINUE_BYTE_NUM (START_BYTE_NUM - 3)`
			`static void parse_special_byte(unsigned char c, struct word *w)`
			`{`
			`/* The index denotes the amount of high bits that were in`
			`* the start byte. This is the amount that the stored value`
			`* must be shifted.`
			`*`
			`* The amount of bits that must be shifted out in the continue`
			`* byte increase with the index. The amount shifted is (i + 2).`
			`*`
			`* Each value stored in the array is the mask applied after`
			`* shifting the continue byte bits.`
			`*/`
			`static const unsigned char mask[SPECIAL_CONTINUE_BYTE_NUM] = {`
			`0xF, /* 1111110 10HHHHxx */`
			`0x7, /* 111110H 10HHHxxx */`
			`0x3, /* 11110HH 10HHxxxx */`
			`0x1 /* 1110HHH 10Hxxxxx */`
			`};`
			`int i = w->len - START_BYTE_NUM;`
			`w->high_bits = (w->high_bits << i) \| ((c >> (2 + i)) & mask[i]);`
			`}`

			`/* Parse an entire Pseudo-UTF8 sequence. */`
			`static int decode_seq(struct creole_reader r, struct word w)`
			`{`
			`int r_ret;`
			`int to_read;`

			`r_ret = read(r);`
			`if (r_ret < 0)`
			`return 0;`

			`to_read = parse_start_byte((unsigned char)(r_ret & 0xFF), w);`
			`if (to_read < 0)`
			`return 0;`

			`/* If to_read is not one less than w->len, that means there are`
			`* high bits in the first continuation byte.`
			`*/`
			`if (w->len - to_read > 1) {`
			`r_ret = read(r);`
			`if (r_ret < 0)`
			`return 0;`
			`parse_special_byte((unsigned char)(r_ret & 0xFF), w);`
			`}`

encode 2023-02-06 23:38:39 -05:00			`return read_continue(r, w, to_read);`
prototype bytecode interpreter 2023-02-05 06:44:37 -05:00			`}`

encode 2023-02-06 23:38:39 -05:00			`int creole_encode(creole_word i, unsigned encode_to, unsigned high_bits,`
			`unsigned char buf[7])`
			`{`
			`static const struct {`
			`creole_word max;`
			`unsigned char b1_mask;`
			`int high_bit_shift_b1;`
			`int high_bit_shift_to_right_b1;`
			`int data_shift_b1;`

			`int high_bit_mask_b2;`
			`int high_bit_shift_b2;`
			`unsigned char b2_data_mask;`
			`} d[] = {`
			`{0x7F, 0xC0, 0, 1, 6, 0x0, 0, 0x3F}, /* 2 */`
			`{0xFFF, 0xE0, 0, 0, 12, 0x0, 0, 0x3F}, /* 3 */`
			`{0x1FFFF, 0xF0, 1, 0, 17, 0x1, 5, 0x1F}, /* 4 */`
			`{0x3FFFFF, 0xF8, 2, 0, 22, 0x3, 4, 0x0F}, /* 5 */`
			`{0x7FFFFFF, 0xFC, 3, 0, 27, 0x7, 3, 0x07}, /* 6 */`
			`{0xFFFFFFFF, 0xFE, 4, 0, 32, 0xF, 2, 0x03} /* 7 */`
			`};`
			`unsigned lb;`
			`unsigned j;`

			`if (encode_to > 8)`
			`return 0;`

			`if (encode_to == 1) {`
			`if (i < 0x80) {`
			`buf[0] = i;`
			`return 1;`
			`}`
			`return 0;`
			`}`

			`lb = encode_to - 2;`
			`if (i > d[lb].max)`
			`return 0;`

			`buf[0] = (d[lb].b1_mask \| (high_bits >> d[lb].high_bit_shift_b1`
			`<< d[lb].high_bit_shift_to_right_b1));`
			`/* shifts greater than or equal to the bit size of a type are`
			`* undefined. Data in the first byte is always aligned with the LSB.`
			`*/`
			`if (d[lb].data_shift_b1 < sizeof(i) * CHAR_BIT)`
			`buf[0] \|= i >> d[lb].data_shift_b1;`

			`buf[1] = 0x80 \| ((high_bits & d[lb].high_bit_mask_b2)`
			`<< d[lb].high_bit_shift_b2)`
			`\| ((i >> ((encode_to - 2) * 6))`
			`& d[lb].b2_data_mask);`

			`for (j = 2; j < encode_to; j++) {`
			`buf[j] = 0x80 \| ((i >> ((encode_to - j - 1) * 6)) & 0x3F);`
			`}`

			`return 1;`
			`}`

			`#if 0`

prototype bytecode interpreter 2023-02-05 06:44:37 -05:00			`/*************************************************************************`
			`* Parsing instructions`
			`*`
			`* This parses an entire instruction, which is`
			`* a single byte sequence,`
			`* zero or more multibyte sequences,`
			`* one single byte of all zeros.`
			`*************************************************************************/`

			`int creole_parse_line(struct creole_ins ins, struct creole_reader r)`
			`{`
			`struct word w = {0};`
			`unsigned arg = 0;`

			`if (!decode_seq(r, &w))`
			`return 0;`

			`ins->opcode = w.word;`
			`if (w.word < CREOLE_ARG_TYPE_LEN \|\| w.len != 1)`
			`return 0;`

			`for (arg = 0; arg < arglen; arg++) {`
			`if (!decode_seq(r, &w))`
			`return 0;`
			`if (w.len == 1)`
			`return 0;`
			`ins->w[arg] = w.word;`
			`ins->w_flags[arg] = w.high_bits;`
			`}`

			`if (!decode_seq(r, &w))`
			`return 0;`
			`if (w.word != 0 \|\| w.len != 1)`
			`return 0;`
			`return 1;`
			`}`

			`/**************************************************************************`
			`* High level compiling interface`
			`*************************************************************************/`

			`static void clear_instruction(struct creole_env *env,`
			`struct creole_ins *ins)`
			`{`
			`memset(ins, 0, sizeof(ins));`
			`env->prgptr--;`
			`}`

			`static int typecheck(enum creole_word_flag fl, enum creole_arg_type typ)`
			`{`
			`switch (typ) {`
			`case TYPE_NONE: return 0;`
			`case TYPE_IMM: return fl == CREOLE_IMMEDIATE;`
			`case TYPE_REG: return fl == CREOLE_REGISTER;`
			`case TYPE_VAL: return fl == CREOLE_IMMEDIATE`
			`\| fl == CREOLE_REGISTER;`
			`case TYPE_LAB: return fl == CREOLE_IMMEDIATE;`
			`default: return 0;`
			`}`
			`}`

			`static enum creole_compiler_ret typecheck_ins(struct creole_env *env,`
			`struct creole_ins *ins)`
			`{`
			`unsigned i;`

			`for (i = 0; i < opcode_info[env->opcode].arglen; i++) {`
			`if (!typecheck(ins->w[i],`
			`opcode_info[env->opcode].argtype[i]))`
			`return CREOLE_TYPE_ERROR;`
			`}`
			`return CREOLE_COMPILE_OK;`
			`}`

			`static enum creole_compiler_ret`
			`handle_compiletime_immediate(struct creole_env *env,`
			`struct creole_ins *ins)`
			`{`
			`switch (ins->opcode) {`
			`case CREOLE_CLB:`
			`if (ins->w[0] >= ins->lablen)`
			`return CREOLE_LABEL_OVERFLOW;`
			`ins->lab[ins->w[0]] = env->prgptr;`
			`/* Delete instruction because it is a compile time`
			`* instruction. Place next instruction in its place. */`
			`clear_instruction(env, ins);`
			`return CREOLE_COMPILE_OK;`
			`case CREOLE_NOOP:`
			`clear_instruction(env, ins);`
			`return CREOLE_COMPILE_OK;`
			`default:`
			`return typecheck_ins(env, ins);`
			`}`
			`}`

			`int creole_compile(struct creole_env env, struct creole_reader r)`
			`{`
			`struct creole_ins *cur_ins = env->prg;`
			`int rcode;`

			`while (env->prgptr < env->prglen) {`
			`if (!creole_parse_line(cur_ins, r))`
			`return CREOLE_PARSE_ERROR;`
			`/* Increase prgptr here. If the instruction is a compile`
			`* time instruction, then this will be decremented since`
			`* the instruction will not be executed.`
			`*/`
			`env->prgptr++;`

			`rcode = handle_compiletime_immediate(env, cur_ins);`
			`if (rcode != CREOLE_COMPILE_OK)`
			`return rcode;`

			`if (read_eof(r))`
			`break;`
			`cur_ins += 1;`
			`}`

			`if (env->prgptr == env->prglen && *line)`
			`return CREOLE_PROGRAM_OVERFLOW;`
			`env->prgend = env->prgptr;`
			`env->prgptr = 0;`
			`return CREOLE_COMPILE_OK;`
			`}`

			`static creole_word read_word(struct creole_ins *ins, int i)`
			`{`
			`if (env->w_flags[i] == CREOLE_REGISTER)`
			`return env->reg[env->w[i]];`
			`else`
			`return env->w[i];`
			`}`

			`int creole_step(struct creole_env *env)`
			`{`
			`struct creole_ins *ins = env->prg + env->prgptr;`
			`creole_word a1, a2;`

			`if (env->prgptr == env->prgend)`
			`return CREOLE_STEP_STOP;`
			`env->prgptr++;`

			`switch (ins->opcode) {`
			`case CREOLE_PUSH:`
			`if (env->stkptr == env->stklen)`
			`return CREOLE_STACK_OVERFLOW;`
			`env->stk[env->stkptr++] = env->reg[env->w[0]];`
			`break;`
			`case CREOLE_POP:`
			`if (env->stkptr == 0)`
			`return CREOLE_STACK_OVERFLOW;`
			`env->reg[env->w[0]] = env->stk[--env->stkptr];`
			`break;`
			`case CREOLE_ADD:`
			`a1 = read_word(ins, 1);`
			`a2 = read_word(ins, 2);`
			`env->reg[env->w[0]] = a1 + a2;`
			`break;`
			`case CREOLE_MUL:`
			`a1 = read_word(ins, 1);`
			`a2 = read_word(ins, 2);`
			`env->reg[env->w[0]] = a1 * a2;`
			`break;`
			`case CREOLE_DIV:`
			`a1 = read_word(ins, 1);`
			`a2 = read_word(ins, 2);`
			`env->reg[env->w[0]] = a1 / a2;`
			`break;`
			`case CREOLE_JL:`
			`a1 = read_word(ins, 1);`
			`a2 = read_word(ins, 2);`
			`if (a1 < a2)`
			`env->prgptr = env->lab[env->w[0]];`
			`break;`
			`case SYS:`
			`a1 = read_word(ins, 1);`
			`/* do syscall */`
			`break;`
			`}`
			`}`
			`#endif`