From 7b3eaf9b4f9aaec02ce63be638de373397fa9460 Mon Sep 17 00:00:00 2001 From: Peter McGoron Date: Sun, 12 Feb 2023 18:06:08 +0000 Subject: [PATCH] add pseudoinstructions that compile to regular instructions --- asm/creole.py | 128 ++++++++++++++++++++++++++++++-------------------- asm/ffi.py | 3 +- asm/test.py | 4 +- creole.c | 86 +++++++++++++++++++-------------- creole.h | 28 +++++------ 5 files changed, 146 insertions(+), 103 deletions(-) diff --git a/asm/creole.py b/asm/creole.py index 4cb4b8c..b1481dd 100644 --- a/asm/creole.py +++ b/asm/creole.py @@ -17,6 +17,16 @@ def from_2c(w): return w return -word_2c(w) +class Argument: + def __init__(self, argtype, val, sign=False): + self.at = argtype + self.sign = sign + self.val = val + def __str__(self): + return f'({self.at}, {self.sign}, {self.val})' + def high_bits(self): + return int(self.sign) << 1 | (self.at == ArgType.REG) + class ArgType(Enum): """ Class denoting the type of an argument to an instruction. """ @@ -39,13 +49,13 @@ class ArgType(Enum): the argument. """ if s.isnumeric(): - return (ArgType.IMM, int(s)) + return Argument(ArgType.IMM, int(s)) elif s[0] == "-" and s[1:].isnumeric(): - return (ArgType.IMM, word_2c(int(s[1:]))) + return Argument(ArgType.IMM, word_2c(int(s[1:]))) elif s[0] == 'r' and s[1:].isnumeric(): - return (ArgType.REG, int(s[1:])) + return Argument(ArgType.REG, int(s[1:])) elif s[0] == 'l' and s[1:].isnumeric(): - return (ArgType.LAB, int(s[1:])) + return Argument(ArgType.LAB, int(s[1:])) else: raise MalformedArgument(s) @@ -54,11 +64,11 @@ class ArgType(Enum): the type of the enum value. """ t = ArgType.gettype(s) if self == ArgType.VAL: - if t[0] == ArgType.REG or t[0] == ArgType.IMM: + if t.at == ArgType.REG or t.at == ArgType.IMM: return t else: return None - elif t[0] == self: + elif t.at == self: return t else: return None @@ -84,29 +94,38 @@ class TypecheckException(Exception): class Instruction(Enum): """ Class of microcode instructions. The first number is the opcode and the suceeding values are the types of each of the - arguments. """ - NOP = 0 - PUSH = 1, ArgType.VAL - POP = 2, ArgType.REG - ADD = 3, ArgType.REG, ArgType.VAL, ArgType.VAL - MUL = 4, ArgType.REG, ArgType.VAL, ArgType.VAL - DIV = 5, ArgType.REG, ArgType.VAL, ArgType.VAL - SDIV = 6, ArgType.REG, ArgType.VAL, ArgType.VAL - SYS = 7, ArgType.VAL - CLB = 8, ArgType.LAB - JL = 9, ArgType.LAB, ArgType.VAL, ArgType.VAL - JLE = 10, ArgType.LAB, ArgType.VAL, ArgType.VAL - JG = 11, ArgType.LAB, ArgType.VAL, ArgType.VAL - JGE = 12, ArgType.LAB, ArgType.VAL, ArgType.VAL - JE = 13, ArgType.LAB, ArgType.VAL, ArgType.VAL - JNE = 13, ArgType.LAB, ArgType.VAL, ArgType.VAL + arguments. The first argument is the opcode and the second + argument is if the argument is a signed variant of another + opcode. """ + NOP = 0, False + PUSH = 1, False, ArgType.VAL + POP = 2, False, ArgType.REG + ADD = 3, False, ArgType.REG, ArgType.VAL, ArgType.VAL + MUL = 4, False, ArgType.REG, ArgType.VAL, ArgType.VAL + DIV = 5, False, ArgType.REG, ArgType.VAL, ArgType.VAL + SDIV = "DIV", True, ArgType.REG, ArgType.VAL, ArgType.VAL + SYS = 6, False, ArgType.VAL + CLB = 7, False, ArgType.LAB + JL = 8, False, ArgType.LAB, ArgType.VAL, ArgType.VAL + JLS = "JL", True, ArgType.LAB, ArgType.VAL, ArgType.VAL + JLE = 9, False, ArgType.LAB, ArgType.VAL, ArgType.VAL + JLES = "JLE", True, ArgType.LAB, ArgType.VAL, ArgType.VAL + JE = 10, False, ArgType.LAB, ArgType.VAL, ArgType.VAL + JNE = 11, False, ArgType.LAB, ArgType.VAL, ArgType.VAL - def __init__(self, opcode, *args): - if opcode > 0x7F or opcode < 0: + def __int__(self): + return self.opcode + + def __init__(self, opcode, signed_instruction, *args): + if type(opcode) is int and (opcode > 0x7F or opcode < 0): raise OpcodeException(opcode) self.opcode = opcode self.argtypes = args + if signed_instruction: + self.render = self._render_change_args + else: + self.render = self._default_render def typecheck(self, sargs): """ Pass arguments to the instruction and check if the @@ -124,15 +143,31 @@ class Instruction(Enum): rargs.append(t) return rargs + # The following will be called using OPCODE.render() instead of being + # called directly. + + def _render_change_args(self, args): + for i in range(0,len(args)): + if args[i].at != ArgType.LAB: + args[i].sign = True + return Instruction[self.opcode].render(args) + + def _default_render(self, args): + b = bytes([self.opcode]) + for a in args: + l = 2 if a.val < 0x80 else None + bex = encode_pseudo_utf8(a.val, a.high_bits(), l) + b = b + bex + return b + bytes([0]) + encoding_types = { -# start mask A B +# start mask B 2: (0x7F, 0xC0, 7), 3: (0xFFF, 0xE0, 12), - 4: (0x1FFFF, 0xF0, 16), - 5: (0x3FFFFF, 0xF8, 21), - 6: (0x7FFFFFF, 0xFC, 26), - 7: (0xFFFFFFFF, 0xFE, 36), -# A : number of bits in start byte + 4: (0x1FFFF, 0xF0, 17), + 5: (0x3FFFFF, 0xF8, 22), + 6: (0x7FFFFFF, 0xFC, 27), + 7: (0xFFFFFFFF, 0xFE, 32), # B : Total number of bits excluding high bits } @@ -172,32 +207,25 @@ def encode_pseudo_utf8(n, high_bits, to): class RangeCheckException(Exception): pass class Line: - def __init__(self, opcode, args): - self.opcode = opcode + def __init__(self, ins, args): + self.ins = ins self.args = args def check_line(self, lablen, reglen): for a in self.args: - if a[0] == ArgType.REG: - if a[1] < 0 or a[1] >= reglen: - raise RangeCheckException(a[0], - a[1], + if a.at == ArgType.REG: + if a.val < 0 or a.val >= reglen: + raise RangeCheckException(a.at, + a.val, reglen) - elif a[0] == ArgType.LAB: - if a[1] < 0 or a[1] >= lablen: - raise RangeCheckException(a[0], - a[1], + elif a.at == ArgType.LAB: + if a.val < 0 or a.val >= lablen: + raise RangeCheckException(a.at, + a.val, reglen) def __call__(self): - b = bytes([self.opcode]) - for a in self.args: - l = 2 if a[1] < 0x80 else None - if a[0] == ArgType.REG: - b = b + encode_pseudo_utf8(a[1],1,l) - else: - b = b + encode_pseudo_utf8(a[1],0,l) - return b + bytes([0]) + return self.ins.render(self.args) class InstructionNotFoundException(Exception): pass @@ -212,12 +240,12 @@ class Program: line[0] = line[0].casefold() try: # TODO: is there no better way to do this in Python? - ins = getattr(Instruction, line[0].upper()) + ins = Instruction[line[0].upper()] except Exception as e: raise InstructionNotFoundException(line[0]) args_w_type = ins.typecheck(line[1:]) - self.asm_push_line(ins.opcode, args_w_type) + self.asm_push_line(ins, args_w_type) def parse_lines(self, lines): for l in lines: diff --git a/asm/ffi.py b/asm/ffi.py index ba327bf..ccbe40d 100644 --- a/asm/ffi.py +++ b/asm/ffi.py @@ -22,10 +22,11 @@ class RunRet(Enum): STOP = 2 STACK_OVERFLOW = 3 STACK_UNDERFLOW = 4 - RUN_LABEL_OVERFLOW = 5 + LABEL_OVERFLOW = 5 REGISTER_OVERFLOW = 6 UNKNOWN_OPCODE = 7 DIVIDE_BY_ZERO = 8 + HIGH_BIT_MALFORMED = 9 def is_halt(self): return not (self == RunRet.CONTINUE or self == RunRet.SYSCALL) diff --git a/asm/test.py b/asm/test.py index 00e9e5e..4008e3d 100644 --- a/asm/test.py +++ b/asm/test.py @@ -50,7 +50,7 @@ class PushTest(unittest.TestCase): p = Program(reglen=0x8000000) p.parse_asm_line("PUSH r134217727") b = p() - self.assertEqual(b, b'\x01\xFC\x87\xbf\xbf\xbf\xbf\x00') + self.assertEqual(b, b'\x01\xFC\x8f\xbf\xbf\xbf\xbf\x00') def test_compile_push(self): p = Program() @@ -318,7 +318,7 @@ class LabelTest(unittest.TestCase): "CLB l0", "add r0 r0 -1", "add r1 r1 1", - "jg l0 r0 0" + "jl l0 0 r0" ]) ex = ffi.Environment(p()) self.assertEqual(ex(), ffi.RunRet.STOP) diff --git a/creole.c b/creole.c index 6a1daa8..fbde87e 100644 --- a/creole.c +++ b/creole.c @@ -35,13 +35,10 @@ static const struct { defop(ADD, 3, TYPE_REG, TYPE_VAL, TYPE_VAL), defop(MUL, 3, TYPE_REG, TYPE_VAL, TYPE_VAL), defop(DIV, 3, TYPE_REG, TYPE_VAL, TYPE_VAL), - defop(SDIV, 3, TYPE_REG, TYPE_VAL, TYPE_VAL), defop(SYS, 1, TYPE_VAL, TYPE_NONE, TYPE_NONE), defop(CLB, 1, TYPE_LAB, TYPE_NONE, TYPE_NONE), defop(JL, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL), defop(JLE, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL), - defop(JG, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL), - defop(JGE, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL), defop(JE, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL), defop(JNE, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL) }; @@ -378,13 +375,23 @@ static int typecheck(struct creole_env *env, int val, } } +static enum creole_word_flag get_type_from_high_bit(unsigned high_bits) +{ + if (high_bits & 1) { + return CREOLE_REGISTER; + } else { + return CREOLE_IMMEDIATE; + } +} + static enum creole_compiler_ret typecheck_ins(struct creole_env *env, struct creole_ins *ins) { unsigned i; for (i = 0; i < opcode_info[ins->opcode].arglen; i++) { - if (!typecheck(env, ins->w[i], ins->w_flags[i], + if (!typecheck(env, ins->w[i], + get_type_from_high_bit(ins->w_flags[i]), opcode_info[ins->opcode].argtype[i])) return CREOLE_TYPE_ERROR; } @@ -479,7 +486,7 @@ static enum creole_run_ret read_val(struct creole_env *env, unsigned arg, creole_word *w) { - if (ins->w_flags[arg] == CREOLE_REGISTER) { + if (get_type_from_high_bit(ins->w_flags[arg]) == CREOLE_REGISTER) { return creole_reg_read(env, ins->w[arg], w); } else { *w = ins->w[arg]; @@ -512,12 +519,43 @@ check_label(struct creole_env *env, creole_word label) : CREOLE_RUN_LABEL_OVERFLOW; } +enum argument_signed { + ALL_UNSIGNED = 0, // 0b00 + FIRST_SIGNED = 2, // 0b10 + SECOND_SIGNED = 1, // 0b01 + ALL_SIGNED = 3 // 0b11 +}; + +static enum argument_signed check_sign_bits(unsigned flags1, unsigned flags2) +{ + return (flags1 & 0x2) | ((flags2 & 0x2) >> 1); +} + #define check(fun) do { \ rcode = fun; \ if (rcode != CREOLE_STEP_CONTINUE) \ return rcode; \ } while(0) +#define chk_sign_op(OPER) do { \ + switch (check_sign_bits(ins->w_flags[1], ins->w_flags[2])) { \ + case ALL_UNSIGNED: \ + a1 = a1 OPER a2; \ + break; \ + case FIRST_SIGNED: \ + a1 = (creole_signed)a1 OPER a2; \ + break; \ + case SECOND_SIGNED: \ + a1 = a1 OPER (creole_signed)a2; \ + break; \ + case ALL_SIGNED: \ + a1 = (creole_signed) a1 OPER (creole_signed) a2; \ + break; \ + default: \ + return CREOLE_STEP_HIGH_BIT_MALFORMED; \ + } \ +} while(0) + enum creole_run_ret creole_step(struct creole_env *env, creole_word *sc) { struct creole_ins *ins = env->prg + env->prgptr; @@ -550,17 +588,11 @@ enum creole_run_ret creole_step(struct creole_env *env, creole_word *sc) case CREOLE_DIV: check(read_val(env, ins, 1, &a1)); check(read_val(env, ins, 2, &a2)); - if (a2 == 0) + if (a2 == 0) { return CREOLE_DIV_BY_ZERO; - check(creole_reg_write(env, ins->w[0], a1 / a2)); - break; - case CREOLE_SDIV: - check(read_val(env, ins, 1, &a1)); - check(read_val(env, ins, 2, &a2)); - if (a2 == 0) - return CREOLE_DIV_BY_ZERO; - check(creole_reg_write(env, ins->w[0], - (creole_signed)a1 / (creole_signed)a2)); + } + chk_sign_op(/); + check(creole_reg_write(env, ins->w[0], a1)); break; case CREOLE_SYS: check(read_val(env, ins, 0, sc)); @@ -570,7 +602,8 @@ enum creole_run_ret creole_step(struct creole_env *env, creole_word *sc) check(read_val(env, ins, 1, &a1)); check(read_val(env, ins, 2, &a2)); check(check_label(env, ins->w[0])); - if (a1 < a2) { + chk_sign_op(<); + if (a1) { env->prgptr = env->lab[ins->w[0]]; increase_pointer = 0; } @@ -579,25 +612,8 @@ enum creole_run_ret creole_step(struct creole_env *env, creole_word *sc) check(read_val(env, ins, 1, &a1)); check(read_val(env, ins, 2, &a2)); check(check_label(env, ins->w[0])); - if (a1 <= a2) { - env->prgptr = env->lab[ins->w[0]]; - increase_pointer = 0; - } - break; - case CREOLE_JG: - check(read_val(env, ins, 1, &a1)); - check(read_val(env, ins, 2, &a2)); - check(check_label(env, ins->w[0])); - if (a1 > a2) { - env->prgptr = env->lab[ins->w[0]]; - increase_pointer = 0; - } - break; - case CREOLE_JGE: - check(read_val(env, ins, 1, &a1)); - check(read_val(env, ins, 2, &a2)); - check(check_label(env, ins->w[0])); - if (a1 >= a2) { + chk_sign_op(<=); + if (a1) { env->prgptr = env->lab[ins->w[0]]; increase_pointer = 0; } diff --git a/creole.h b/creole.h index f5b118f..49a411f 100644 --- a/creole.h +++ b/creole.h @@ -17,21 +17,18 @@ typedef CREOLE_WORD creole_word; typedef CREOLE_SIGNED_WORD creole_signed; enum creole_opcode { - CREOLE_NOOP = 0, - CREOLE_PUSH = 1, - CREOLE_POP = 2, - CREOLE_ADD = 3, - CREOLE_MUL = 4, - CREOLE_DIV = 5, - CREOLE_SDIV = 6, - CREOLE_SYS = 7, - CREOLE_CLB = 8, - CREOLE_JL = 9, - CREOLE_JLE = 10, - CREOLE_JG = 11, - CREOLE_JGE = 12, - CREOLE_JE = 13, - CREOLE_JNE = 14, + CREOLE_NOOP, + CREOLE_PUSH, + CREOLE_POP, + CREOLE_ADD, + CREOLE_MUL, + CREOLE_DIV, + CREOLE_SYS, + CREOLE_CLB, + CREOLE_JL, + CREOLE_JLE, + CREOLE_JE, + CREOLE_JNE, CREOLE_OPCODE_LEN }; @@ -66,6 +63,7 @@ enum creole_run_ret { CREOLE_REGISTER_OVERFLOW, CREOLE_STEP_UNKNOWN_OPCODE, CREOLE_DIV_BY_ZERO, + CREOLE_STEP_HIGH_BIT_MALFORMED, CREOLE_RUN_RET_LEN };