aboutsummaryrefslogtreecommitdiffstats
path: root/creole.c
diff options
context:
space:
mode:
authorGravatar Peter McGoron 2023-02-05 11:44:37 +0000
committerGravatar Peter McGoron 2023-02-05 11:44:37 +0000
commitf63d6cdd3df24d869437f9f689cbfff5d4bfb435 (patch)
treeee6c907242abe6d27edd87cf87979db6c928693a /creole.c
prototype bytecode interpreter
Diffstat (limited to 'creole.c')
-rw-r--r--creole.c410
1 files changed, 410 insertions, 0 deletions
diff --git a/creole.c b/creole.c
new file mode 100644
index 0000000..4259e42
--- /dev/null
+++ b/creole.c
@@ -0,0 +1,410 @@
+#include "creole.h"
+
+/*************************************************************************
+ * Static information
+ ************************************************************************/
+
+/* Arguments to opcodes can accept the following:
+ * immediate values only (as of now, no values are like this)
+ * register values only (push, pop, etc.)
+ * either values (math operations)
+ * labels (jumps)
+ * none (do not give an argument)
+ */
+enum creole_arg_type {
+ TYPE_NONE,
+ TYPE_IMM,
+ TYPE_REG,
+ TYPE_VAL,
+ TYPE_LAB,
+ CREOLE_ARG_TYPE_LEN
+};
+
+/* C99+ allows for designating the array index when initializing arrays:
+ [i] = v,
+ * in C89 indicies are implicit from 0 to the maximum filled-in value.
+ */
+#define defop(s, n, a1, a2, a3) {n, {a1, a2, a3}}
+static const struct {
+ unsigned arglen;
+ enum creole_arg_type argtype[CREOLE_MAX_ARG];
+} opcode_info[CREOLE_OPCODE_LEN] = {
+ defop(NOOP, 0, TYPE_NONE, TYPE_NONE, TYPE_NONE),
+ defop(PUSH, 1, TYPE_VAL, TYPE_NONE, TYPE_NONE),
+ defop(POP, 1, TYPE_REG, TYPE_NONE, TYPE_NONE),
+ defop(ADD, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),
+ defop(MUL, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),
+ defop(DIV, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),
+ defop(JL, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL),
+ defop(CLB, 1, TYPE_LAB, TYPE_NONE, TYPE_NONE),
+ defop(SYS, 1, TYPE_VAL, TYPE_NONE, TYPE_NONE)
+};
+
+/*************************************************************************
+ * Reading from the buffer
+ ************************************************************************/
+
+static int read(struct creole_reader *r)
+{
+ if (r->left == 0)
+ return -1;
+ r->left--;
+ return *r->p++;
+}
+
+static int read_eof(struct creole_reader *r)
+{
+ return r->left == 0;
+}
+
+#if 0
+
+/*************************************************************************
+ * Pseudo-UTF-8 lexing
+ *
+ * Pseudo-UTF-8 is based off of UTF-8 but adds more
+ * bytes and allows (requires!) overlong encodings.
+ *
+ * Possible values:
+ * 0xxxxxxx (7 bits)
+ * 110xxxxx 10xxxxxx (11 bits)
+ * 1110xxxx 10xxxxxx 10xxxxxx (16 bits)
+ * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx (21 bits)
+ * 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx (26 bits)
+ * 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx (31 bits)
+ * 11111110 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx (36 bits)
+ * 10xxxxxx
+ ************************************************************************/
+
+/* A Psuedo-UTF-8 sequence can be either
+ *
+ * * A 1 byte sequence, where the lower 7 bits are the encoded
+ * * word (no high bits).
+
+ * * A multi-byte sequence where the 4 MSB are flags, and the
+ * * lower bits are the encoded word.
+ */
+struct word {
+ int len;
+ int high_bits;
+ creole_word word;
+};
+
+/* Decode a set of continuation bytes directly into the word. This assumes
+ * that each continuation byte contains no high words.
+ */
+static int read_continue(struct creole_reader *r, struct encoded_word *w,
+ int to_read)
+{
+ int i;
+ int r_ret;
+ unsigned char c;
+
+ for (i = 0; i < to_read) {
+ r_ret = read(r);
+ if (r_ret < 0)
+ return 0;
+ /* Characters might not be 8 bits! */
+ c = (unsigned char)(r_ret & 0xFF);
+ if (c >> 6 != 0x2)
+ return 0;
+ w->word = w->word << 6 | (c & 0x6);
+ }
+
+ return 1;
+}
+
+/* Start bytes must be treated differently. Depending on the scenario,
+ * start bytes will contain parts of the encoded word and high-bit flags.
+ * In some cases, not all of the high-bit flags are part of the start
+ * byte.
+ */
+#define START_BYTE_NUM 7
+static int parse_start_byte(unsigned char c, struct word *w)
+{
+ static const struct {
+ /* The algorithm compares the mask to the start byte
+ * by shifting both to the right by the amount of 'x's
+ * (blank spaces). The array is arranged in reverse
+ * order so that the index indicates the amount of
+ * bits to shift.
+ */
+ unsigned char mask;
+ /* The word bits, if they exist, always start from the
+ * LSB, so there is no need to shift the bits away. The
+ * word_mask gets the low bits. If there are no bits, set
+ * to 0.
+ */
+ unsigned char word_mask;
+
+ /* The high bits may not start from the LSB. There needs
+ * to be a shift to get the bits to the LSB, and a mask
+ * to discard the higher bits.
+ */
+ unsigned char high_bit_mask;
+ int high_bit_shift;
+
+ /* The amount of NORMAL continuation bytes to read.
+ * This does NOT include continuation bytes that have
+ * high-bit flags in them.
+ */
+ int to_read;
+ } start_data[START_BYTE_NUM] {
+ {0xFE, 0x00, 0, 0x0, 5}, /* 11111110 */
+ {0xFC, 0x00, 0, 0x1, 4}, /* 1111110x */,
+ {0xF8, 0x00, 0, 0x3, 3}, /* 111110xx */,
+ {0xF0, 0x00, 0, 0x7, 2}, /* 11110xxx */,
+ {0xE0, 0x00, 0, 0xF, 2}, /* 1110xxxx */,
+ {0xC0, 0x01, 1, 0xF, 1}, /* 110xxxxx */,
+ /* The single byte sequence has no high bits. */
+ {0x00, 0x7F, 0, 0x0, 0} /* 0xxxxxxx */,
+ };
+
+ int i;
+
+ for (i = 0; i < START_BYTE_NUM; i++) {
+ if (c >> i == start_data[i].mask >> i) {
+ w->len = START_BYTE_NUM - i;
+ w->word = c & start_data[i].word_mask;
+ w->high_bits = (c >> start_data[i].high_bit_shift)
+ & start_data[i].high_bit_mask;
+ return start_data[i].to_read;
+ }
+ }
+
+ return -1;
+}
+
+/* This parses the first continuation byte if it is special. */
+#define SPECIAL_CONTINUE_BYTE_NUM (START_BYTE_NUM - 3)
+static void parse_special_byte(unsigned char c, struct word *w)
+{
+ /* The index denotes the amount of high bits that were in
+ * the start byte. This is the amount that the stored value
+ * must be shifted.
+ *
+ * The amount of bits that must be shifted out in the continue
+ * byte increase with the index. The amount shifted is (i + 2).
+ *
+ * Each value stored in the array is the mask applied after
+ * shifting the continue byte bits.
+ */
+ static const unsigned char mask[SPECIAL_CONTINUE_BYTE_NUM] = {
+ 0xF, /* 1111110 10HHHHxx */
+ 0x7, /* 111110H 10HHHxxx */
+ 0x3, /* 11110HH 10HHxxxx */
+ 0x1 /* 1110HHH 10Hxxxxx */
+ };
+ int i = w->len - START_BYTE_NUM;
+ w->high_bits = (w->high_bits << i) | ((c >> (2 + i)) & mask[i]);
+}
+
+/* Parse an entire Pseudo-UTF8 sequence. */
+static int decode_seq(struct creole_reader *r, struct word *w)
+{
+ int r_ret;
+ unsigned char c;
+ int to_read;
+
+ r_ret = read(r);
+ if (r_ret < 0)
+ return 0;
+
+ to_read = parse_start_byte((unsigned char)(r_ret & 0xFF), w);
+ if (to_read < 0)
+ return 0;
+
+ /* If to_read is not one less than w->len, that means there are
+ * high bits in the first continuation byte.
+ */
+ if (w->len - to_read > 1) {
+ r_ret = read(r);
+ if (r_ret < 0)
+ return 0;
+ parse_special_byte((unsigned char)(r_ret & 0xFF), w);
+ }
+
+ return read_continue(r, decoded_word, to_read);
+}
+
+/*************************************************************************
+ * Parsing instructions
+ *
+ * This parses an entire instruction, which is
+ * a single byte sequence,
+ * zero or more multibyte sequences,
+ * one single byte of all zeros.
+ *************************************************************************/
+
+int creole_parse_line(struct creole_ins *ins, struct creole_reader *r)
+{
+ struct word w = {0};
+ unsigned arg = 0;
+
+ if (!decode_seq(r, &w))
+ return 0;
+
+ ins->opcode = w.word;
+ if (w.word < CREOLE_ARG_TYPE_LEN || w.len != 1)
+ return 0;
+
+ for (arg = 0; arg < arglen; arg++) {
+ if (!decode_seq(r, &w))
+ return 0;
+ if (w.len == 1)
+ return 0;
+ ins->w[arg] = w.word;
+ ins->w_flags[arg] = w.high_bits;
+ }
+
+ if (!decode_seq(r, &w))
+ return 0;
+ if (w.word != 0 || w.len != 1)
+ return 0;
+ return 1;
+}
+
+/**************************************************************************
+ * High level compiling interface
+ *************************************************************************/
+
+static void clear_instruction(struct creole_env *env,
+ struct creole_ins *ins)
+{
+ memset(ins, 0, sizeof(ins));
+ env->prgptr--;
+}
+
+static int typecheck(enum creole_word_flag fl, enum creole_arg_type typ)
+{
+ switch (typ) {
+ case TYPE_NONE: return 0;
+ case TYPE_IMM: return fl == CREOLE_IMMEDIATE;
+ case TYPE_REG: return fl == CREOLE_REGISTER;
+ case TYPE_VAL: return fl == CREOLE_IMMEDIATE
+ | fl == CREOLE_REGISTER;
+ case TYPE_LAB: return fl == CREOLE_IMMEDIATE;
+ default: return 0;
+ }
+}
+
+static enum creole_compiler_ret typecheck_ins(struct creole_env *env,
+ struct creole_ins *ins)
+{
+ unsigned i;
+
+ for (i = 0; i < opcode_info[env->opcode].arglen; i++) {
+ if (!typecheck(ins->w[i],
+ opcode_info[env->opcode].argtype[i]))
+ return CREOLE_TYPE_ERROR;
+ }
+ return CREOLE_COMPILE_OK;
+}
+
+static enum creole_compiler_ret
+handle_compiletime_immediate(struct creole_env *env,
+ struct creole_ins *ins)
+{
+ switch (ins->opcode) {
+ case CREOLE_CLB:
+ if (ins->w[0] >= ins->lablen)
+ return CREOLE_LABEL_OVERFLOW;
+ ins->lab[ins->w[0]] = env->prgptr;
+ /* Delete instruction because it is a compile time
+ * instruction. Place next instruction in its place. */
+ clear_instruction(env, ins);
+ return CREOLE_COMPILE_OK;
+ case CREOLE_NOOP:
+ clear_instruction(env, ins);
+ return CREOLE_COMPILE_OK;
+ default:
+ return typecheck_ins(env, ins);
+ }
+}
+
+int creole_compile(struct creole_env *env, struct creole_reader *r)
+{
+ struct creole_ins *cur_ins = env->prg;
+ int rcode;
+
+ while (env->prgptr < env->prglen) {
+ if (!creole_parse_line(cur_ins, r))
+ return CREOLE_PARSE_ERROR;
+ /* Increase prgptr here. If the instruction is a compile
+ * time instruction, then this will be decremented since
+ * the instruction will not be executed.
+ */
+ env->prgptr++;
+
+ rcode = handle_compiletime_immediate(env, cur_ins);
+ if (rcode != CREOLE_COMPILE_OK)
+ return rcode;
+
+ if (read_eof(r))
+ break;
+ cur_ins += 1;
+ }
+
+ if (env->prgptr == env->prglen && *line)
+ return CREOLE_PROGRAM_OVERFLOW;
+ env->prgend = env->prgptr;
+ env->prgptr = 0;
+ return CREOLE_COMPILE_OK;
+}
+
+static creole_word read_word(struct creole_ins *ins, int i)
+{
+ if (env->w_flags[i] == CREOLE_REGISTER)
+ return env->reg[env->w[i]];
+ else
+ return env->w[i];
+}
+
+int creole_step(struct creole_env *env)
+{
+ struct creole_ins *ins = env->prg + env->prgptr;
+ creole_word a1, a2;
+
+ if (env->prgptr == env->prgend)
+ return CREOLE_STEP_STOP;
+ env->prgptr++;
+
+ switch (ins->opcode) {
+ case CREOLE_PUSH:
+ if (env->stkptr == env->stklen)
+ return CREOLE_STACK_OVERFLOW;
+ env->stk[env->stkptr++] = env->reg[env->w[0]];
+ break;
+ case CREOLE_POP:
+ if (env->stkptr == 0)
+ return CREOLE_STACK_OVERFLOW;
+ env->reg[env->w[0]] = env->stk[--env->stkptr];
+ break;
+ case CREOLE_ADD:
+ a1 = read_word(ins, 1);
+ a2 = read_word(ins, 2);
+ env->reg[env->w[0]] = a1 + a2;
+ break;
+ case CREOLE_MUL:
+ a1 = read_word(ins, 1);
+ a2 = read_word(ins, 2);
+ env->reg[env->w[0]] = a1 * a2;
+ break;
+ case CREOLE_DIV:
+ a1 = read_word(ins, 1);
+ a2 = read_word(ins, 2);
+ env->reg[env->w[0]] = a1 / a2;
+ break;
+ case CREOLE_JL:
+ a1 = read_word(ins, 1);
+ a2 = read_word(ins, 2);
+ if (a1 < a2)
+ env->prgptr = env->lab[env->w[0]];
+ break;
+ case SYS:
+ a1 = read_word(ins, 1);
+ /* do syscall */
+ break;
+ }
+}
+#endif