From 48827c5b747eca6d56555f69cae37291feb5fcda Mon Sep 17 00:00:00 2001 From: Peter McGoron Date: Sat, 18 Feb 2023 16:05:09 +0000 Subject: [PATCH] adjust python assembler to new API --- Makefile | 2 +- asm/creole.py | 78 +++++++++++++++++++++++++++++++++++++-------------- asm/ffi.py | 77 +++++++++++++++++--------------------------------- asm/test.py | 27 +++++++----------- creole.c | 3 +- creole.h | 2 +- 6 files changed, 97 insertions(+), 92 deletions(-) diff --git a/Makefile b/Makefile index 7fe4cc9..bac519e 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ asm/libcreole.so: creole.c creole.h $(CC) -shared -o asm/libcreole.so c_test/creole.o test_asm: asm/libcreole.so - cd asm && python3 test.py + cd asm && python3 test.py -f c_test/encode_decode: c_test/encode_decode.c creole.c creole.h $(CC) c_test/encode_decode.c -Wall -pedantic -std=c89 -g -fopenmp -o c_test/encode_decode # c_test/encode_decode diff --git a/asm/creole.py b/asm/creole.py index 8bf609a..559d663 100644 --- a/asm/creole.py +++ b/asm/creole.py @@ -65,6 +65,24 @@ class Argument: """ Returns the high bits that the argument would have in the opcode. """ return int(self.sign) << 1 | (self.at == ArgType.REG) + def __call__(self): + l = 2 if self.val < 0x80 else None + return encode_pseudo_utf8(self.val, self.high_bits(), l) + +class StringArgument(Argument): + def __init__(self, *args, **kwargs): + super().__init__(self, *args, **kwargs) + def __bytes__(self): + b = bytes() + for v in self.val: + b = b + Argument(ArgType.IMM, v)() + return b + +class LabelArgument(Argument): + def __init__(self, *args, **kwargs): + super().__init__(self, *args, **kwargs) + def load_label(self, labels): + self.val = labels[val] class ArgType(Enum): """ Class denoting the type of an argument to an instruction. """ @@ -78,8 +96,17 @@ class ArgType(Enum): VAL = 3 """ Type that denotes either immediate values or registers. """ - LAB = 4 - """ Type of labels. """ + DAT = 4 + """ Type of data label. """ + + STR = 5 + """ Type of a string of 32 bit integers. """ + + LAB = 6 + """ Type of a label (.name). """ + + def is_number(t): + return t == ArgType.IMM or t == ArgType.REG def gettype(s): """ Parses the type of the argument represented as a string @@ -97,14 +124,18 @@ class ArgType(Enum): :return: The Argument object representing the argument. :raises MalformedArgument: """ - if s.isnumeric(): + if type(s) is list: + return StringArgument(ArgType.STR, s) + elif s.isnumeric(): return Argument(ArgType.IMM, int(s)) elif s[0] == "-" and s[1:].isnumeric(): return Argument(ArgType.IMM, word_2c(int(s[1:])), True) elif s[0] == 'r' and s[1:].isnumeric(): return Argument(ArgType.REG, int(s[1:])) - elif s[0] == 'l' and s[1:].isnumeric(): - return Argument(ArgType.LAB, int(s[1:])) + elif s[0] == 'd' and s[1:].isnumeric(): + return Argument(ArgType.DAT, int(s[1:])) + elif s[0] == '.': + return Argument(ArgType.LAB, s[1:]) else: raise MalformedArgument(s) @@ -168,14 +199,14 @@ class Instruction(Enum): DIV = 5, "_render_default", ArgType.REG, ArgType.VAL, ArgType.VAL SDIV = "DIV", "_render_change_args", ArgType.REG, ArgType.VAL, ArgType.VAL SYS = 6, "_render_default", ArgType.VAL - CLB = 7, "_render_default", ArgType.LAB - JL = 8, "_render_default", ArgType.LAB, ArgType.VAL, ArgType.VAL + JL = 7, "_render_default", ArgType.LAB, ArgType.VAL, ArgType.VAL JLS = "JL", "_render_change_args", ArgType.LAB, ArgType.VAL, ArgType.VAL - JLE = 9, "_render_default", ArgType.LAB, ArgType.VAL, ArgType.VAL + JLE = 8, "_render_default", ArgType.LAB, ArgType.VAL, ArgType.VAL JLES = "JLE", "_render_change_args", ArgType.LAB, ArgType.VAL, ArgType.VAL - JE = 10, "_render_default", ArgType.LAB, ArgType.VAL, ArgType.VAL + JE = 9, "_render_default", ArgType.LAB, ArgType.VAL, ArgType.VAL J = "JE", "_render_j", ArgType.LAB - JNE = 11, "_render_default", ArgType.LAB, ArgType.VAL, ArgType.VAL + JNE = 10, "_render_default", ArgType.LAB, ArgType.VAL, ArgType.VAL + DB = 11, "_render_default", ArgType.DAT, ArgType.STR def __int__(self): """ Returns the opcode associated with the Instruction. @@ -244,16 +275,14 @@ class Instruction(Enum): def _render_change_args(self, args): for i in range(0,len(args)): - if args[i].at != ArgType.LAB: + if ArgType.is_number(args[i].at): args[i].sign = True return Instruction[self.opcode].render(args) def _render_default(self, args): b = bytes([self.opcode]) for a in args: - l = 2 if a.val < 0x80 else None - bex = encode_pseudo_utf8(a.val, a.high_bits(), l) - b = b + bex + b = b + a() return b + bytes([0]) encoding_types = { @@ -314,12 +343,11 @@ class Line: raise RangeCheckException(a.at, a.val, reglen) - elif a.at == ArgType.LAB: - if a.val < 0 or a.val >= lablen: - raise RangeCheckException(a.at, - a.val, - reglen) + def load_label(self, labels): + for a in self.args: + if a.at == ArgType.LAB: + a.load_label(labels) def __call__(self): return self.ins.render(self.args) @@ -332,10 +360,13 @@ class Program: self.asm.append(l) def parse_asm_line(self, line): - line = line.split() + line = line.strip().split() line[0] = line[0].casefold() + if line[0][0] == '.': + self.asm.append(line[0][1:]) + return None + try: - # TODO: is there no better way to do this in Python? ins = Instruction[line[0].upper()] except Exception as e: raise InstructionNotFoundException(line[0]) @@ -349,7 +380,12 @@ class Program: def __call__(self): b = bytes() + labels = {} for line in self.asm: + if type(line) is str: + labels[line] = len(b) + continue + line.load_label(labels) b = b + line() return b diff --git a/asm/ffi.py b/asm/ffi.py index 41faba3..13731dd 100644 --- a/asm/ffi.py +++ b/asm/ffi.py @@ -24,10 +24,9 @@ class CompileRet(Enum): ARG_MALFORMED = 4 LAST_READ_ERROR = 5 LAST_MALFORMED = 6 - LABEL_OVERFLOW = 7 + DATA_OVERFLOW = 7 TYPE_ERROR = 8 - CLEARED_INSTRUCTION = 9 - PROGRAM_OVERFLOW = 10 + PROGRAM_OVERFLOW = 9 class RunRet(Enum): CONTINUE = 0 @@ -35,44 +34,36 @@ class RunRet(Enum): STOP = 2 STACK_OVERFLOW = 3 STACK_UNDERFLOW = 4 - LABEL_OVERFLOW = 5 + DECODE_ERROR = 5 REGISTER_OVERFLOW = 6 UNKNOWN_OPCODE = 7 DIVIDE_BY_ZERO = 8 HIGH_BIT_MALFORMED = 9 + JUMP_OVERFLOW = 10 def is_halt(self): return not (self == RunRet.CONTINUE or self == RunRet.SYSCALL) -class CIns(Structure): - _fields_ = [("opcode", c_int), - ("w_flags", c_ubyte * 3), - ("w", c_uint * 3)] - class CReader(Structure): _fields_ = [("p", POINTER(c_ubyte)), ("left", c_size_t)] -def make_uchar_buf(s): - buf = (c_ubyte * len(s))() - buf[:] = s[:] - return buf -def make_reader(s): - buf = make_uchar_buf(s) - return CReader(buf, len(s)) +class Reader: + def __init__(self, s): + self.buf = (c_ubyte * len(s))() + self.buf[:] = s[:] + self.rd = CReader(self.buf, len(s)) class CEnv(Structure): _fields_ = [ + ("dats", POINTER(POINTER(c_ubyte))), + ("datlen", c_size_t), ("reg", POINTER(c_uint)), ("reglen", c_size_t), - ("lab", POINTER(c_size_t)), - ("lablen", c_size_t), ("stk", POINTER(c_uint)), ("stkptr", c_size_t), ("stklen", c_size_t), - ("prg", POINTER(CIns)), - ("prgptr", c_size_t), - ("prgend", c_size_t), - ("prglen", c_size_t) + ("r_current", CReader), + ("r_start", CReader) ] class RegisterOverflowError(Exception): @@ -122,38 +113,34 @@ class Environment: stk = stk - 1 return self.cenv.stk[stk] - def __init__(self, prog=None, reglen=32, lablen=32, stklen=4096, prglen=4096): + def reset(self): + self.cenv.r_current = self.cenv.r_start + + def __init__(self, prog=None, reglen=32, datlen=32, stklen=4096, prglen=4096): cenv = CEnv() + cenv.dats = (POINTER(c_ubyte) * datlen)() + cenv.datlen = datlen + cenv.reglen = reglen cenv.reg = (c_uint * reglen)() - cenv.lablen = lablen - cenv.lab = (c_size_t * lablen)() - cenv.stklen = stklen cenv.stk = (c_uint * stklen)() cenv.stkptr = 0 - cenv.prglen = prglen - cenv.prg = (CIns * prglen)() - cenv.prgptr = 0 - cenv.prgend = 0 - self.cenv = cenv if prog is not None: + if type(prog) is creole.Program: + prog = prog() r = self.load(prog) if r is not CompileRet.OK: raise CompileError(r) - def restart(self): - self.cenv.stkptr = 0 - self.cenv.prgptr = 0 - self.cenv.prgend = 0 - def load(self, prog): - rd = make_reader(prog) - self.restart() - ret = dll.creole_compile(byref(self.cenv), byref(rd)) + self.reader = Reader(prog) + self.cenv.r_current = self.reader.rd + self.cenv.r_start = self.cenv.r_current + ret = dll.creole_compile(byref(self.cenv), byref(self.cenv.r_current)) return CompileRet(ret) def syscall(self, sc): @@ -168,15 +155,3 @@ class Environment: if debug: print(self.cenv.reg[0]) return ret - -class CParseLineException(Exception): - pass -def parse_line(line): - rd = make_reader(line) - ins = CIns() - - ret = dll.creole_parse_line(byref(ins), byref(rd)) - - if ret != CompileRet.OK.value: - raise CParseLineException(CompileRet(ret).name) - return (ins.opcode, list(zip(ins.w_flags, ins.w))) diff --git a/asm/test.py b/asm/test.py index ed78398..f2c2277 100644 --- a/asm/test.py +++ b/asm/test.py @@ -19,20 +19,13 @@ class PushTest(unittest.TestCase): def test_parse_push_reg(self): p = Program() p.parse_asm_line("push r5") - b = p() - self.assertEqual(b, b'\x01\xC2\x85\x00') - ins = ffi.parse_line(b) - self.assertEqual(ins[0], Instruction.PUSH.opcode) - self.assertEqual(ins[1][0], (1,5)) + self.assertEqual(p(), b'\x01\xC2\x85\x00') def test_parse_push_imm(self): p = Program() p.parse_asm_line("push 5") b = p() self.assertEqual(b, b'\x01\xC0\x85\x00') - ins = ffi.parse_line(b) - self.assertEqual(ins[0], Instruction.PUSH.opcode) - self.assertEqual(ins[1][0], (0,5)) def test_parse_push_catch_typecheck_push_lab(self): p = Program() @@ -175,7 +168,7 @@ class AddTest(unittest.TestCase): def test_exec_add(self): p = Program() p.parse_asm_line("add r0 1 1") - ex = ffi.Environment(p()) + ex = ffi.Environment(p) self.assertEqual(ex(), ffi.RunRet.STOP) self.assertEqual(ex.cenv.reg[0], 2) @@ -201,18 +194,18 @@ class AddTest(unittest.TestCase): def test_exec_add_throw_lab_1(self): p = Program() with self.assertRaises(TypecheckException) as cm: - p.parse_asm_line("add r0 l6 7") + p.parse_asm_line("add r0 .label 7") self.assertEqual(cm.exception.argtype, ArgType.VAL) - self.assertEqual(cm.exception.sarg, 'l6') + self.assertEqual(cm.exception.sarg, '.label') self.assertEqual(cm.exception.i, 1) self.assertEqual(cm.exception.opcode, 3) def test_exec_add_throw_lab_2(self): p = Program() with self.assertRaises(TypecheckException) as cm: - p.parse_asm_line("add r0 12 l24") + p.parse_asm_line("add r0 12 .ab") self.assertEqual(cm.exception.argtype, ArgType.VAL) - self.assertEqual(cm.exception.sarg, 'l24') + self.assertEqual(cm.exception.sarg, '.ab') self.assertEqual(cm.exception.i, 2) self.assertEqual(cm.exception.opcode, 3) @@ -307,18 +300,18 @@ class DivTest(unittest.TestCase): def test_exec_div_throw_lab_1(self): p = Program() with self.assertRaises(TypecheckException) as cm: - p.parse_asm_line("div r0 l123 456") + p.parse_asm_line("div r0 .qqweq 456") self.assertEqual(cm.exception.argtype, ArgType.VAL) - self.assertEqual(cm.exception.sarg, 'l123') + self.assertEqual(cm.exception.sarg, '.qqweq') self.assertEqual(cm.exception.i, 1) self.assertEqual(cm.exception.opcode, 5) def test_exec_div_throw_lab_2(self): p = Program() with self.assertRaises(TypecheckException) as cm: - p.parse_asm_line("div r5 1919 l24") + p.parse_asm_line("div r5 1919 .24") self.assertEqual(cm.exception.argtype, ArgType.VAL) - self.assertEqual(cm.exception.sarg, 'l24') + self.assertEqual(cm.exception.sarg, '.24') self.assertEqual(cm.exception.i, 2) self.assertEqual(cm.exception.opcode, 5) diff --git a/creole.c b/creole.c index 5b2d16d..26b0347 100644 --- a/creole.c +++ b/creole.c @@ -380,7 +380,7 @@ parse_line(struct creole_env *env, struct ins *ins, struct creole_reader *r) return CREOLE_OPCODE_MALFORMED; } - if (opcode_info[ins->opcode].arglen >= CREOLE_MAX_ARG) + if (opcode_info[ins->opcode].arglen > CREOLE_MAX_ARG) return CREOLE_OPCODE_MALFORMED; for (i = 0; i < opcode_info[ins->opcode].arglen; i++) { if (!decode_seq(r, &w)) @@ -446,6 +446,7 @@ creole_compile(struct creole_env *env) add_to_env(env, &ins); } + env->r_current = env->r_start; return CREOLE_COMPILE_OK; } diff --git a/creole.h b/creole.h index 53e6ab7..b98dfa5 100644 --- a/creole.h +++ b/creole.h @@ -65,7 +65,6 @@ enum creole_compiler_ret { CREOLE_LAST_MALFORMED, CREOLE_DATA_OVERFLOW, CREOLE_TYPE_ERROR, - CREOLE_COMPILE_CLEARED_INSTRUCTION, CREOLE_PROGRAM_OVERFLOW, CREOLE_COMPILE_RET_LEN }; @@ -114,6 +113,7 @@ enum creole_run_ret creole_reg_read(struct creole_env *env, unsigned reg, creole_word *w); enum creole_run_ret creole_push(struct creole_env *env, creole_word w); enum creole_run_ret creole_pop(struct creole_env *env, creole_word *w); +int creole_jump(struct creole_env *env, creole_word off); enum creole_run_ret creole_step(struct creole_env *env, creole_word *sc); #endif /* CREOLE_H */