diff options
| author | 2023-02-05 11:44:37 +0000 | |
|---|---|---|
| committer | 2023-02-05 11:44:37 +0000 | |
| commit | f63d6cdd3df24d869437f9f689cbfff5d4bfb435 (patch) | |
| tree | ee6c907242abe6d27edd87cf87979db6c928693a /creole.c | |
prototype bytecode interpreter
Diffstat (limited to 'creole.c')
| -rw-r--r-- | creole.c | 410 |
1 files changed, 410 insertions, 0 deletions
diff --git a/creole.c b/creole.c new file mode 100644 index 0000000..4259e42 --- /dev/null +++ b/creole.c @@ -0,0 +1,410 @@ +#include "creole.h" + +/************************************************************************* + * Static information + ************************************************************************/ + +/* Arguments to opcodes can accept the following: + * immediate values only (as of now, no values are like this) + * register values only (push, pop, etc.) + * either values (math operations) + * labels (jumps) + * none (do not give an argument) + */ +enum creole_arg_type { + TYPE_NONE, + TYPE_IMM, + TYPE_REG, + TYPE_VAL, + TYPE_LAB, + CREOLE_ARG_TYPE_LEN +}; + +/* C99+ allows for designating the array index when initializing arrays: + [i] = v, + * in C89 indicies are implicit from 0 to the maximum filled-in value. + */ +#define defop(s, n, a1, a2, a3) {n, {a1, a2, a3}} +static const struct { + unsigned arglen; + enum creole_arg_type argtype[CREOLE_MAX_ARG]; +} opcode_info[CREOLE_OPCODE_LEN] = { + defop(NOOP, 0, TYPE_NONE, TYPE_NONE, TYPE_NONE), + defop(PUSH, 1, TYPE_VAL, TYPE_NONE, TYPE_NONE), + defop(POP, 1, TYPE_REG, TYPE_NONE, TYPE_NONE), + defop(ADD, 3, TYPE_REG, TYPE_VAL, TYPE_VAL), + defop(MUL, 3, TYPE_REG, TYPE_VAL, TYPE_VAL), + defop(DIV, 3, TYPE_REG, TYPE_VAL, TYPE_VAL), + defop(JL, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL), + defop(CLB, 1, TYPE_LAB, TYPE_NONE, TYPE_NONE), + defop(SYS, 1, TYPE_VAL, TYPE_NONE, TYPE_NONE) +}; + +/************************************************************************* + * Reading from the buffer + ************************************************************************/ + +static int read(struct creole_reader *r) +{ + if (r->left == 0) + return -1; + r->left--; + return *r->p++; +} + +static int read_eof(struct creole_reader *r) +{ + return r->left == 0; +} + +#if 0 + +/************************************************************************* + * Pseudo-UTF-8 lexing + * + * Pseudo-UTF-8 is based off of UTF-8 but adds more + * bytes and allows (requires!) overlong encodings. + * + * Possible values: + * 0xxxxxxx (7 bits) + * 110xxxxx 10xxxxxx (11 bits) + * 1110xxxx 10xxxxxx 10xxxxxx (16 bits) + * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx (21 bits) + * 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx (26 bits) + * 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx (31 bits) + * 11111110 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx (36 bits) + * 10xxxxxx + ************************************************************************/ + +/* A Psuedo-UTF-8 sequence can be either + * + * * A 1 byte sequence, where the lower 7 bits are the encoded + * * word (no high bits). + + * * A multi-byte sequence where the 4 MSB are flags, and the + * * lower bits are the encoded word. + */ +struct word { + int len; + int high_bits; + creole_word word; +}; + +/* Decode a set of continuation bytes directly into the word. This assumes + * that each continuation byte contains no high words. + */ +static int read_continue(struct creole_reader *r, struct encoded_word *w, + int to_read) +{ + int i; + int r_ret; + unsigned char c; + + for (i = 0; i < to_read) { + r_ret = read(r); + if (r_ret < 0) + return 0; + /* Characters might not be 8 bits! */ + c = (unsigned char)(r_ret & 0xFF); + if (c >> 6 != 0x2) + return 0; + w->word = w->word << 6 | (c & 0x6); + } + + return 1; +} + +/* Start bytes must be treated differently. Depending on the scenario, + * start bytes will contain parts of the encoded word and high-bit flags. + * In some cases, not all of the high-bit flags are part of the start + * byte. + */ +#define START_BYTE_NUM 7 +static int parse_start_byte(unsigned char c, struct word *w) +{ + static const struct { + /* The algorithm compares the mask to the start byte + * by shifting both to the right by the amount of 'x's + * (blank spaces). The array is arranged in reverse + * order so that the index indicates the amount of + * bits to shift. + */ + unsigned char mask; + /* The word bits, if they exist, always start from the + * LSB, so there is no need to shift the bits away. The + * word_mask gets the low bits. If there are no bits, set + * to 0. + */ + unsigned char word_mask; + + /* The high bits may not start from the LSB. There needs + * to be a shift to get the bits to the LSB, and a mask + * to discard the higher bits. + */ + unsigned char high_bit_mask; + int high_bit_shift; + + /* The amount of NORMAL continuation bytes to read. + * This does NOT include continuation bytes that have + * high-bit flags in them. + */ + int to_read; + } start_data[START_BYTE_NUM] { + {0xFE, 0x00, 0, 0x0, 5}, /* 11111110 */ + {0xFC, 0x00, 0, 0x1, 4}, /* 1111110x */, + {0xF8, 0x00, 0, 0x3, 3}, /* 111110xx */, + {0xF0, 0x00, 0, 0x7, 2}, /* 11110xxx */, + {0xE0, 0x00, 0, 0xF, 2}, /* 1110xxxx */, + {0xC0, 0x01, 1, 0xF, 1}, /* 110xxxxx */, + /* The single byte sequence has no high bits. */ + {0x00, 0x7F, 0, 0x0, 0} /* 0xxxxxxx */, + }; + + int i; + + for (i = 0; i < START_BYTE_NUM; i++) { + if (c >> i == start_data[i].mask >> i) { + w->len = START_BYTE_NUM - i; + w->word = c & start_data[i].word_mask; + w->high_bits = (c >> start_data[i].high_bit_shift) + & start_data[i].high_bit_mask; + return start_data[i].to_read; + } + } + + return -1; +} + +/* This parses the first continuation byte if it is special. */ +#define SPECIAL_CONTINUE_BYTE_NUM (START_BYTE_NUM - 3) +static void parse_special_byte(unsigned char c, struct word *w) +{ + /* The index denotes the amount of high bits that were in + * the start byte. This is the amount that the stored value + * must be shifted. + * + * The amount of bits that must be shifted out in the continue + * byte increase with the index. The amount shifted is (i + 2). + * + * Each value stored in the array is the mask applied after + * shifting the continue byte bits. + */ + static const unsigned char mask[SPECIAL_CONTINUE_BYTE_NUM] = { + 0xF, /* 1111110 10HHHHxx */ + 0x7, /* 111110H 10HHHxxx */ + 0x3, /* 11110HH 10HHxxxx */ + 0x1 /* 1110HHH 10Hxxxxx */ + }; + int i = w->len - START_BYTE_NUM; + w->high_bits = (w->high_bits << i) | ((c >> (2 + i)) & mask[i]); +} + +/* Parse an entire Pseudo-UTF8 sequence. */ +static int decode_seq(struct creole_reader *r, struct word *w) +{ + int r_ret; + unsigned char c; + int to_read; + + r_ret = read(r); + if (r_ret < 0) + return 0; + + to_read = parse_start_byte((unsigned char)(r_ret & 0xFF), w); + if (to_read < 0) + return 0; + + /* If to_read is not one less than w->len, that means there are + * high bits in the first continuation byte. + */ + if (w->len - to_read > 1) { + r_ret = read(r); + if (r_ret < 0) + return 0; + parse_special_byte((unsigned char)(r_ret & 0xFF), w); + } + + return read_continue(r, decoded_word, to_read); +} + +/************************************************************************* + * Parsing instructions + * + * This parses an entire instruction, which is + * a single byte sequence, + * zero or more multibyte sequences, + * one single byte of all zeros. + *************************************************************************/ + +int creole_parse_line(struct creole_ins *ins, struct creole_reader *r) +{ + struct word w = {0}; + unsigned arg = 0; + + if (!decode_seq(r, &w)) + return 0; + + ins->opcode = w.word; + if (w.word < CREOLE_ARG_TYPE_LEN || w.len != 1) + return 0; + + for (arg = 0; arg < arglen; arg++) { + if (!decode_seq(r, &w)) + return 0; + if (w.len == 1) + return 0; + ins->w[arg] = w.word; + ins->w_flags[arg] = w.high_bits; + } + + if (!decode_seq(r, &w)) + return 0; + if (w.word != 0 || w.len != 1) + return 0; + return 1; +} + +/************************************************************************** + * High level compiling interface + *************************************************************************/ + +static void clear_instruction(struct creole_env *env, + struct creole_ins *ins) +{ + memset(ins, 0, sizeof(ins)); + env->prgptr--; +} + +static int typecheck(enum creole_word_flag fl, enum creole_arg_type typ) +{ + switch (typ) { + case TYPE_NONE: return 0; + case TYPE_IMM: return fl == CREOLE_IMMEDIATE; + case TYPE_REG: return fl == CREOLE_REGISTER; + case TYPE_VAL: return fl == CREOLE_IMMEDIATE + | fl == CREOLE_REGISTER; + case TYPE_LAB: return fl == CREOLE_IMMEDIATE; + default: return 0; + } +} + +static enum creole_compiler_ret typecheck_ins(struct creole_env *env, + struct creole_ins *ins) +{ + unsigned i; + + for (i = 0; i < opcode_info[env->opcode].arglen; i++) { + if (!typecheck(ins->w[i], + opcode_info[env->opcode].argtype[i])) + return CREOLE_TYPE_ERROR; + } + return CREOLE_COMPILE_OK; +} + +static enum creole_compiler_ret +handle_compiletime_immediate(struct creole_env *env, + struct creole_ins *ins) +{ + switch (ins->opcode) { + case CREOLE_CLB: + if (ins->w[0] >= ins->lablen) + return CREOLE_LABEL_OVERFLOW; + ins->lab[ins->w[0]] = env->prgptr; + /* Delete instruction because it is a compile time + * instruction. Place next instruction in its place. */ + clear_instruction(env, ins); + return CREOLE_COMPILE_OK; + case CREOLE_NOOP: + clear_instruction(env, ins); + return CREOLE_COMPILE_OK; + default: + return typecheck_ins(env, ins); + } +} + +int creole_compile(struct creole_env *env, struct creole_reader *r) +{ + struct creole_ins *cur_ins = env->prg; + int rcode; + + while (env->prgptr < env->prglen) { + if (!creole_parse_line(cur_ins, r)) + return CREOLE_PARSE_ERROR; + /* Increase prgptr here. If the instruction is a compile + * time instruction, then this will be decremented since + * the instruction will not be executed. + */ + env->prgptr++; + + rcode = handle_compiletime_immediate(env, cur_ins); + if (rcode != CREOLE_COMPILE_OK) + return rcode; + + if (read_eof(r)) + break; + cur_ins += 1; + } + + if (env->prgptr == env->prglen && *line) + return CREOLE_PROGRAM_OVERFLOW; + env->prgend = env->prgptr; + env->prgptr = 0; + return CREOLE_COMPILE_OK; +} + +static creole_word read_word(struct creole_ins *ins, int i) +{ + if (env->w_flags[i] == CREOLE_REGISTER) + return env->reg[env->w[i]]; + else + return env->w[i]; +} + +int creole_step(struct creole_env *env) +{ + struct creole_ins *ins = env->prg + env->prgptr; + creole_word a1, a2; + + if (env->prgptr == env->prgend) + return CREOLE_STEP_STOP; + env->prgptr++; + + switch (ins->opcode) { + case CREOLE_PUSH: + if (env->stkptr == env->stklen) + return CREOLE_STACK_OVERFLOW; + env->stk[env->stkptr++] = env->reg[env->w[0]]; + break; + case CREOLE_POP: + if (env->stkptr == 0) + return CREOLE_STACK_OVERFLOW; + env->reg[env->w[0]] = env->stk[--env->stkptr]; + break; + case CREOLE_ADD: + a1 = read_word(ins, 1); + a2 = read_word(ins, 2); + env->reg[env->w[0]] = a1 + a2; + break; + case CREOLE_MUL: + a1 = read_word(ins, 1); + a2 = read_word(ins, 2); + env->reg[env->w[0]] = a1 * a2; + break; + case CREOLE_DIV: + a1 = read_word(ins, 1); + a2 = read_word(ins, 2); + env->reg[env->w[0]] = a1 / a2; + break; + case CREOLE_JL: + a1 = read_word(ins, 1); + a2 = read_word(ins, 2); + if (a1 < a2) + env->prgptr = env->lab[env->w[0]]; + break; + case SYS: + a1 = read_word(ins, 1); + /* do syscall */ + break; + } +} +#endif |
