566 lines
15 KiB
C
566 lines
15 KiB
C
#include "creole.h"
|
|
|
|
/*************************************************************************
|
|
* Static information
|
|
************************************************************************/
|
|
|
|
/* Arguments to opcodes can accept the following:
|
|
* immediate values only (as of now, no values are like this)
|
|
* register values only (push, pop, etc.)
|
|
* either values (math operations)
|
|
* labels (jumps)
|
|
* none (do not give an argument)
|
|
*/
|
|
enum creole_arg_type {
|
|
TYPE_NONE,
|
|
TYPE_IMM,
|
|
TYPE_REG,
|
|
TYPE_VAL,
|
|
TYPE_LAB,
|
|
CREOLE_ARG_TYPE_LEN
|
|
};
|
|
|
|
/* C99+ allows for designating the array index when initializing arrays:
|
|
[i] = v,
|
|
* in C89 indicies are implicit from 0 to the maximum filled-in value.
|
|
*/
|
|
#define defop(s, n, a1, a2, a3) {n, {a1, a2, a3}}
|
|
static const struct {
|
|
unsigned arglen;
|
|
enum creole_arg_type argtype[CREOLE_MAX_ARG];
|
|
} opcode_info[CREOLE_OPCODE_LEN] = {
|
|
defop(NOOP, 0, TYPE_NONE, TYPE_NONE, TYPE_NONE),
|
|
defop(PUSH, 1, TYPE_VAL, TYPE_NONE, TYPE_NONE),
|
|
defop(POP, 1, TYPE_REG, TYPE_NONE, TYPE_NONE),
|
|
defop(ADD, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),
|
|
defop(MUL, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),
|
|
defop(DIV, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),
|
|
defop(JL, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL),
|
|
defop(CLB, 1, TYPE_LAB, TYPE_NONE, TYPE_NONE),
|
|
defop(SYS, 1, TYPE_VAL, TYPE_NONE, TYPE_NONE)
|
|
};
|
|
|
|
/*************************************************************************
|
|
* Reading from the buffer
|
|
************************************************************************/
|
|
|
|
static int read(struct creole_reader *r)
|
|
{
|
|
if (r->left == 0)
|
|
return -1;
|
|
r->left--;
|
|
return *r->p++;
|
|
}
|
|
|
|
static int read_eof(struct creole_reader *r)
|
|
{
|
|
return r->left == 0;
|
|
}
|
|
|
|
/*************************************************************************
|
|
* Pseudo-UTF-8 lexing
|
|
*
|
|
* Pseudo-UTF-8 is based off of UTF-8 but adds more
|
|
* bytes and allows (requires!) overlong encodings.
|
|
*
|
|
* Possible values:
|
|
* 0xxxxxxx (7 bits)
|
|
* 110HHHHx 10xxxxxx (11 bits)
|
|
* 1110HHHH 10xxxxxx 10xxxxxx (16 bits)
|
|
* 11110HHH 10Hxxxxx 10xxxxxx 10xxxxxx (21 bits)
|
|
* 111110HH 10HHxxxx 10xxxxxx 10xxxxxx 10xxxxxx (26 bits)
|
|
* 1111110H 10HHHxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx (31 bits)
|
|
* 11111110 10HHHHxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx (36 bits)
|
|
* 10xxxxxx
|
|
************************************************************************/
|
|
|
|
/* A Psuedo-UTF-8 sequence can be either
|
|
*
|
|
* * A 1 byte sequence, where the lower 7 bits are the encoded
|
|
* * word (no high bits).
|
|
|
|
* * A multi-byte sequence where the 4 MSB are flags, and the
|
|
* * lower bits are the encoded word.
|
|
*/
|
|
#define MAX_HIGH_BITS 15
|
|
struct word {
|
|
int len;
|
|
int high_bits;
|
|
creole_word word;
|
|
};
|
|
|
|
/* Decode a set of continuation bytes directly into the word. This assumes
|
|
* that each continuation byte contains no high words.
|
|
*/
|
|
static int read_continue(struct creole_reader *r, struct word *w,
|
|
int to_read)
|
|
{
|
|
int i;
|
|
int r_ret;
|
|
unsigned char c;
|
|
|
|
for (i = 0; i < to_read; i++) {
|
|
r_ret = read(r);
|
|
if (r_ret < 0) {
|
|
return 0;
|
|
}
|
|
/* Characters might not be 8 bits! */
|
|
c = (unsigned char)(r_ret & 0xFF);
|
|
if (c >> 6 != 0x2) {
|
|
return 0;
|
|
}
|
|
w->word = (w->word << 6) | (c & 0x3F);
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/* Start bytes must be treated differently. Depending on the scenario,
|
|
* start bytes will contain parts of the encoded word and high-bit flags.
|
|
* In some cases, not all of the high-bit flags are part of the start
|
|
* byte.
|
|
*/
|
|
#define START_BYTE_NUM 7
|
|
static int parse_start_byte(unsigned char c, struct word *w)
|
|
{
|
|
static const struct {
|
|
/* The algorithm compares the mask to the start byte
|
|
* by shifting both to the right by the amount of 'x's
|
|
* (blank spaces). The array is arranged in reverse
|
|
* order so that the index indicates the amount of
|
|
* bits to shift.
|
|
*/
|
|
unsigned char mask;
|
|
|
|
/* The word bits, if they exist, always start from the
|
|
* LSB, so there is no need to shift the bits away. The
|
|
* word_mask gets the low bits. If there are no bits, set
|
|
* to 0.
|
|
*/
|
|
unsigned char word_mask;
|
|
|
|
/* The high bits may not start from the LSB. There needs
|
|
* to be a shift to get the bits to the LSB, and a mask
|
|
* to discard the higher bits.
|
|
*/
|
|
unsigned char high_bit_mask;
|
|
int high_bit_shift;
|
|
|
|
/* The amount of NORMAL continuation bytes to read.
|
|
* This does NOT include continuation bytes that have
|
|
* high-bit flags in them.
|
|
*/
|
|
int to_read;
|
|
} start_data[START_BYTE_NUM-1] = {
|
|
{0xFE, 0x00, 0x0, 0, 5}, /* 11111110 */
|
|
{0xFC, 0x00, 0x1, 0, 4}, /* 1111110x */
|
|
{0xF8, 0x00, 0x3, 0, 3}, /* 111110xx */
|
|
{0xF0, 0x00, 0x7, 0, 2}, /* 11110xxx */
|
|
{0xE0, 0x00, 0xF, 0, 2}, /* 1110xxxx */
|
|
{0xC0, 0x01, 0xF, 1, 1} /* 110xxxxx */
|
|
};
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < START_BYTE_NUM-1; i++) {
|
|
if (c >> i == start_data[i].mask >> i) {
|
|
w->len = START_BYTE_NUM - i;
|
|
w->word = c & start_data[i].word_mask;
|
|
w->high_bits = (c >> start_data[i].high_bit_shift)
|
|
& start_data[i].high_bit_mask;
|
|
return start_data[i].to_read;
|
|
}
|
|
}
|
|
/* i == 7 */
|
|
if (c >> 7 == 0) {
|
|
w->len = 1;
|
|
w->word = c;
|
|
w->high_bits = 0;
|
|
return 0;
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
/* This parses the first continuation byte if it is special. */
|
|
#define SPECIAL_CONTINUE_BYTE_NUM (START_BYTE_NUM - 3)
|
|
static int parse_special_byte(unsigned char c, struct word *w)
|
|
{
|
|
/* The index denotes the amount of high bits that were in
|
|
* the start byte. This is the amount that the stored value
|
|
* must be shifted.
|
|
*
|
|
* The amount of bits that must be shifted out in the continue
|
|
* byte increase with the index. The amount shifted is (i + 2).
|
|
*
|
|
* Each value stored in the array is the mask applied after
|
|
* shifting the continue byte bits.
|
|
*/
|
|
static const unsigned char mask[SPECIAL_CONTINUE_BYTE_NUM] = {
|
|
0x1, /* 11110HHH 10Hxxxxx */
|
|
0x3, /* 111110HH 10HHxxxx */
|
|
0x7, /* 1111110H 10HHHxxx */
|
|
0xF /* 11111110 10HHHHxx */
|
|
};
|
|
static const unsigned char wordmask[SPECIAL_CONTINUE_BYTE_NUM] = {
|
|
0x1F, 0xF, 0x7, 0x3
|
|
};
|
|
|
|
int i = w->len - 4;
|
|
if (i >= SPECIAL_CONTINUE_BYTE_NUM)
|
|
return 0;
|
|
|
|
w->high_bits = (w->high_bits << (i + 1)) | ((c >> (5 - i)) & mask[i]);
|
|
w->word = c & wordmask[i];
|
|
return 1;
|
|
}
|
|
|
|
/* Parse an entire Pseudo-UTF8 sequence. */
|
|
static int decode_seq(struct creole_reader *r, struct word *w)
|
|
{
|
|
int r_ret;
|
|
int to_read;
|
|
w->high_bits = 0;
|
|
|
|
r_ret = read(r);
|
|
if (r_ret < 0)
|
|
return 0;
|
|
|
|
to_read = parse_start_byte((unsigned char)(r_ret & 0xFF), w);
|
|
if (to_read < 0)
|
|
return 0;
|
|
|
|
/* If to_read is not one less than w->len, that means there are
|
|
* high bits in the first continuation byte.
|
|
*/
|
|
if (w->len - to_read > 1) {
|
|
r_ret = read(r);
|
|
if (r_ret < 0)
|
|
return 0;
|
|
if (!parse_special_byte((unsigned char)(r_ret & 0xFF), w))
|
|
return 0;
|
|
}
|
|
|
|
return read_continue(r, w, to_read);
|
|
}
|
|
|
|
int creole_encode(creole_word i, unsigned encode_to, unsigned high_bits,
|
|
unsigned char buf[7])
|
|
{
|
|
static const struct {
|
|
creole_word max;
|
|
unsigned char b1_mask;
|
|
int high_bit_shift_b1;
|
|
int high_bit_shift_to_right_b1;
|
|
int data_shift_b1;
|
|
|
|
int high_bit_mask_b2;
|
|
int high_bit_shift_b2;
|
|
unsigned char b2_data_mask;
|
|
} d[] = {
|
|
{0x7F, 0xC0, 0, 1, 6, 0x0, 0, 0x3F}, /* 2 */
|
|
{0xFFF, 0xE0, 0, 0, 12, 0x0, 0, 0x3F}, /* 3 */
|
|
{0x1FFFF, 0xF0, 1, 0, 17, 0x1, 5, 0x1F}, /* 4 */
|
|
{0x3FFFFF, 0xF8, 2, 0, 22, 0x3, 4, 0x0F}, /* 5 */
|
|
{0x7FFFFFF, 0xFC, 3, 0, 27, 0x7, 3, 0x07}, /* 6 */
|
|
{0xFFFFFFFF, 0xFE, 4, 0, 32, 0xF, 2, 0x03} /* 7 */
|
|
};
|
|
int lb;
|
|
unsigned j;
|
|
|
|
if (encode_to > 8)
|
|
return 0;
|
|
|
|
if (encode_to == 1) {
|
|
if (i < 0x80) {
|
|
buf[0] = i;
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
lb = encode_to - 2;
|
|
if (i > d[lb].max) {
|
|
return 0;
|
|
}
|
|
|
|
buf[0] = (d[lb].b1_mask | (high_bits >> d[lb].high_bit_shift_b1
|
|
<< d[lb].high_bit_shift_to_right_b1));
|
|
/* shifts greater than or equal to the bit size of a type are
|
|
* undefined. Data in the first byte is always aligned with the LSB.
|
|
*/
|
|
if (d[lb].data_shift_b1 < sizeof(i) * CHAR_BIT)
|
|
buf[0] |= i >> d[lb].data_shift_b1;
|
|
|
|
buf[1] = 0x80 | ((high_bits & d[lb].high_bit_mask_b2)
|
|
<< d[lb].high_bit_shift_b2)
|
|
| ((i >> ((encode_to - 2) * 6))
|
|
& d[lb].b2_data_mask);
|
|
|
|
for (j = 2; j < encode_to; j++) {
|
|
buf[j] = 0x80 | ((i >> ((encode_to - j - 1) * 6)) & 0x3F);
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*************************************************************************
|
|
* Parsing instructions
|
|
*
|
|
* This parses an entire instruction, which is
|
|
* a single byte sequence,
|
|
* zero or more multibyte sequences,
|
|
* one single byte of all zeros.
|
|
*************************************************************************/
|
|
|
|
enum creole_compiler_ret
|
|
creole_parse_line(struct creole_ins *ins, struct creole_reader *r)
|
|
{
|
|
struct word w = {0};
|
|
unsigned arg = 0;
|
|
|
|
if (!decode_seq(r, &w))
|
|
return CREOLE_OPCODE_READ_ERROR;
|
|
|
|
ins->opcode = w.word;
|
|
if (w.word >= CREOLE_ARG_TYPE_LEN || w.len != 1)
|
|
return CREOLE_OPCODE_MALFORMED;
|
|
|
|
for (arg = 0; arg < opcode_info[ins->opcode].arglen; arg++) {
|
|
if (!decode_seq(r, &w))
|
|
return CREOLE_ARG_READ_ERROR;
|
|
if (w.len == 1)
|
|
return CREOLE_ARG_MALFORMED;
|
|
ins->w[arg] = w.word;
|
|
ins->w_flags[arg] = w.high_bits;
|
|
}
|
|
|
|
if (!decode_seq(r, &w))
|
|
return CREOLE_LAST_READ_ERROR;
|
|
if (w.word != 0 || w.len != 1)
|
|
return CREOLE_LAST_MALFORMED;
|
|
return CREOLE_COMPILE_OK;
|
|
}
|
|
|
|
/**************************************************************************
|
|
* High level compiling interface
|
|
*************************************************************************/
|
|
|
|
static int valid_register(struct creole_env *env, int reg)
|
|
{
|
|
return reg < env->reglen;
|
|
}
|
|
|
|
static int valid_label(struct creole_env *env, int reg)
|
|
{
|
|
return reg < env->lablen;
|
|
}
|
|
|
|
static int typecheck(struct creole_env *env, int val,
|
|
enum creole_word_flag fl, enum creole_arg_type typ)
|
|
{
|
|
switch (typ) {
|
|
case TYPE_IMM: return fl == CREOLE_IMMEDIATE;
|
|
case TYPE_REG: return fl == CREOLE_REGISTER
|
|
&& valid_register(env, val);
|
|
case TYPE_VAL: return fl == CREOLE_IMMEDIATE
|
|
|| fl == CREOLE_REGISTER;
|
|
case TYPE_LAB: return fl == CREOLE_IMMEDIATE
|
|
&& valid_label(env, val);
|
|
default: return 0;
|
|
}
|
|
}
|
|
|
|
static enum creole_compiler_ret typecheck_ins(struct creole_env *env,
|
|
struct creole_ins *ins)
|
|
{
|
|
unsigned i;
|
|
|
|
for (i = 0; i < opcode_info[ins->opcode].arglen; i++) {
|
|
if (!typecheck(env, ins->w[i], ins->w_flags[i],
|
|
opcode_info[ins->opcode].argtype[i]))
|
|
return CREOLE_TYPE_ERROR;
|
|
}
|
|
return CREOLE_COMPILE_OK;
|
|
}
|
|
|
|
static void clear_ins(struct creole_ins *i)
|
|
{
|
|
const struct creole_ins blank = {0};
|
|
*i = blank;
|
|
}
|
|
|
|
/****
|
|
* Get rid of instructions that can be written out at compile time.
|
|
***/
|
|
static enum creole_compiler_ret
|
|
handle_compiletime_immediate(struct creole_env *env,
|
|
struct creole_ins *cur_ins)
|
|
{
|
|
switch (cur_ins->opcode) {
|
|
case CREOLE_CLB:
|
|
if (cur_ins->w[0] >= env->lablen)
|
|
return CREOLE_LABEL_OVERFLOW;
|
|
env->lab[cur_ins->w[0]] = env->prgptr;
|
|
/* Delete instruction because it is a compile time
|
|
* instruction. Place next instruction in its place. */
|
|
clear_ins(cur_ins);
|
|
return CREOLE_COMPILE_CLEARED_INSTRUCTION;
|
|
case CREOLE_NOOP:
|
|
clear_ins(cur_ins);
|
|
return CREOLE_COMPILE_CLEARED_INSTRUCTION;
|
|
default:
|
|
return typecheck_ins(env, cur_ins);
|
|
}
|
|
}
|
|
|
|
enum creole_compiler_ret
|
|
creole_compile(struct creole_env *env, struct creole_reader *r)
|
|
{
|
|
struct creole_ins *cur_ins = env->prg;
|
|
int rcode;
|
|
|
|
while (env->prgptr < env->prglen) {
|
|
rcode = creole_parse_line(cur_ins, r);
|
|
if (rcode != CREOLE_COMPILE_OK)
|
|
return rcode;
|
|
|
|
rcode = handle_compiletime_immediate(env, cur_ins);
|
|
switch (rcode) {
|
|
case CREOLE_COMPILE_CLEARED_INSTRUCTION:
|
|
break;
|
|
case CREOLE_COMPILE_OK:
|
|
cur_ins++;
|
|
env->prgptr++;
|
|
break;
|
|
default:
|
|
return rcode;
|
|
}
|
|
|
|
if (read_eof(r))
|
|
break;
|
|
}
|
|
|
|
if (env->prgptr == env->prglen && !read_eof(r))
|
|
return CREOLE_PROGRAM_OVERFLOW;
|
|
env->prgend = env->prgptr;
|
|
env->prgptr = 0;
|
|
return CREOLE_COMPILE_OK;
|
|
}
|
|
|
|
enum creole_run_ret creole_reg_write(struct creole_env *env,
|
|
creole_word w, unsigned reg)
|
|
{
|
|
if (!valid_register(env, reg))
|
|
return CREOLE_REGISTER_OVERFLOW;
|
|
env->reg[reg] = w;
|
|
return CREOLE_STEP_CONTINUE;
|
|
}
|
|
|
|
enum creole_run_ret creole_reg_read(struct creole_env *env, creole_word *w,
|
|
unsigned reg)
|
|
{
|
|
if (!valid_register(env, reg))
|
|
return CREOLE_REGISTER_OVERFLOW;
|
|
*w = env->reg[reg];
|
|
return CREOLE_STEP_CONTINUE;
|
|
}
|
|
|
|
static enum creole_run_ret read_val(struct creole_env *env,
|
|
struct creole_ins *ins,
|
|
unsigned arg,
|
|
creole_word *w)
|
|
{
|
|
if (ins->w_flags[arg] == CREOLE_REGISTER) {
|
|
return creole_reg_read(env, w, ins->w[arg]);
|
|
} else {
|
|
*w = ins->w[arg];
|
|
}
|
|
|
|
return CREOLE_STEP_CONTINUE;
|
|
}
|
|
|
|
enum creole_run_ret creole_push(struct creole_env *env, creole_word w)
|
|
{
|
|
if (env->stkptr == env->stklen)
|
|
return CREOLE_STACK_OVERFLOW;
|
|
env->stk[env->stkptr++] = w;
|
|
return CREOLE_STEP_CONTINUE;
|
|
}
|
|
|
|
enum creole_run_ret creole_pop(struct creole_env *env, creole_word *w)
|
|
{
|
|
if (env->stkptr == 0)
|
|
return CREOLE_STACK_UNDERFLOW;
|
|
*w = env->stk[--env->stkptr];
|
|
return CREOLE_STEP_CONTINUE;
|
|
}
|
|
|
|
static enum creole_run_ret
|
|
check_label(struct creole_env *env, creole_word label)
|
|
{
|
|
return label < env->lablen
|
|
? CREOLE_STEP_CONTINUE
|
|
: CREOLE_RUN_LABEL_OVERFLOW;
|
|
}
|
|
|
|
#define check(fun) do { \
|
|
rcode = fun; \
|
|
if (rcode != CREOLE_STEP_CONTINUE) \
|
|
return rcode; \
|
|
} while(0)
|
|
|
|
enum creole_run_ret creole_step(struct creole_env *env, creole_word *sc)
|
|
{
|
|
struct creole_ins *ins = env->prg + env->prgptr;
|
|
creole_word a1, a2;
|
|
int rcode = CREOLE_STEP_CONTINUE;
|
|
|
|
if (env->prgptr == env->prgend)
|
|
return CREOLE_STEP_STOP;
|
|
|
|
switch (ins->opcode) {
|
|
case CREOLE_PUSH:
|
|
check(read_val(env, ins, 0, &a1));
|
|
check(creole_push(env, a1));
|
|
break;
|
|
case CREOLE_POP:
|
|
check(creole_pop(env, &a1));
|
|
check(creole_reg_write(env, a1, ins->w[0]));
|
|
break;
|
|
case CREOLE_ADD:
|
|
check(read_val(env, ins, 1, &a1));
|
|
check(read_val(env, ins, 2, &a2));
|
|
check(creole_reg_write(env, ins->w[0], a1 + a2));
|
|
break;
|
|
case CREOLE_MUL:
|
|
check(read_val(env, ins, 1, &a1));
|
|
check(read_val(env, ins, 2, &a2));
|
|
check(creole_reg_write(env, ins->w[0], a1 * a2));
|
|
break;
|
|
case CREOLE_DIV:
|
|
check(read_val(env, ins, 1, &a1));
|
|
check(read_val(env, ins, 2, &a2));
|
|
check(creole_reg_write(env, ins->w[0], a1 / a2));
|
|
break;
|
|
case CREOLE_JL:
|
|
check(read_val(env, ins, 1, &a1));
|
|
check(read_val(env, ins, 2, &a2));
|
|
check(check_label(env, ins->w[0]));
|
|
if (a1 < a2)
|
|
env->prgptr = env->lab[ins->w[0]];
|
|
break;
|
|
case CREOLE_SYS:
|
|
check(read_val(env, ins, 0, sc));
|
|
rcode = CREOLE_STEP_SYSCALL;
|
|
break;
|
|
default:
|
|
rcode = CREOLE_STEP_UNKNOWN_OPCODE;
|
|
}
|
|
|
|
env->prgptr++;
|
|
return rcode;
|
|
}
|
|
|
|
#undef check
|