From 3dd29b100483cb9eb34d96f6ed21827e86cd77a2 Mon Sep 17 00:00:00 2001 From: Peter McGoron Date: Mon, 20 Feb 2023 18:45:21 +0000 Subject: [PATCH] add label parser that can deal with forward jumps; all tests pass --- asm/creole.py | 152 +++++++++++++++++++++++++++++++++----------------- asm/test.py | 98 ++++++++++++-------------------- 2 files changed, 138 insertions(+), 112 deletions(-) diff --git a/asm/creole.py b/asm/creole.py index 559d663..a614701 100644 --- a/asm/creole.py +++ b/asm/creole.py @@ -71,7 +71,7 @@ class Argument: class StringArgument(Argument): def __init__(self, *args, **kwargs): - super().__init__(self, *args, **kwargs) + super().__init__(*args, **kwargs) def __bytes__(self): b = bytes() for v in self.val: @@ -80,9 +80,23 @@ class StringArgument(Argument): class LabelArgument(Argument): def __init__(self, *args, **kwargs): - super().__init__(self, *args, **kwargs) - def load_label(self, labels): - self.val = labels[val] + super().__init__(*args, **kwargs) + def __call__(self): + return self.val + +class TypecheckException(Exception): + """ Exception thrown when an argument to an instruction are of the + incorrect type. """ + def __init__(self, got, argtype, sarg, opcode, i): + self.argtype = argtype + self.sarg = sarg + self.opcode = opcode + self.got = got + self.i = i + self.message = f'''\ +opcode {self.opcode.name} has invalid value {self.sarg} ({self.got}) +at {self.i} (expected {self.argtype})\ +''' class ArgType(Enum): """ Class denoting the type of an argument to an instruction. """ @@ -135,27 +149,29 @@ class ArgType(Enum): elif s[0] == 'd' and s[1:].isnumeric(): return Argument(ArgType.DAT, int(s[1:])) elif s[0] == '.': - return Argument(ArgType.LAB, s[1:]) + return LabelArgument(ArgType.LAB, s[:]) else: raise MalformedArgument(s) - def typecheck(self, s): + def typecheck(self, s, opcode, i): """ Parses the type of the string and returns it if it fits the type of the enum value. :param s: String argument representing an argument. - :return: The Argument class containing the object, or None - if the string does not fit the type of self. """ + :param opcode: Opcode of the argument. Used for debugging. + :param i: Argument number. Used for debugging. + :return: The Argument class containing the object. + :raises TypecheckException: """ t = ArgType.gettype(s) if self == ArgType.VAL: if t.at == ArgType.REG or t.at == ArgType.IMM: return t else: - return None + raise TypecheckException(t.at, self, s, opcode, i) elif t.at == self: return t else: - return None + raise TypecheckException(t.at, self, s, opcode, i) class OpcodeException(Exception): pass @@ -168,20 +184,7 @@ class TypecheckLenException(Exception): self.argtypelen = argtypelen def __str__(self): return f'''\ -arguments {self.insargs} to opcode {self.opcode} not of length {self.argtypelen}\ -''' -class TypecheckException(Exception): - """ Exception thrown when an argument to an instruction are of the - incorrect type. """ - def __init__(self, argtype, sarg, i, opcode): - self.argtype = argtype - self.sarg = sarg - self.i = i - self.opcode = opcode - def __str__(self): - return f'''\ -opcode {self.opcode} has invalid value {self.sarg} -(expected {self.argtype} in position {self.i})\ +arguments {self.insargs} to opcode {self.opcode.name} not of length {self.argtypelen}\ ''' class Instruction(Enum): @@ -248,19 +251,14 @@ class Instruction(Enum): :param sargs: List of arguments to the instruction as strings. :return: List of arguments (as Argument objects). - :raises TypeCheckException: :raises TypecheckLenException: """ rargs = [] if len(sargs) != len(self.argtypes): - raise TypecheckLenException(self.opcode, sargs, + raise TypecheckLenException(self, sargs, len(self.argtypes)) for i in range(0, len(sargs)): - t = self.argtypes[i].typecheck(sargs[i]) - if t is None: - raise TypecheckException(self.argtypes[i], - sargs[i], - i, self.opcode) + t = self.argtypes[i].typecheck(sargs[i], self, i) rargs.append(t) return rargs @@ -280,10 +278,11 @@ class Instruction(Enum): return Instruction[self.opcode].render(args) def _render_default(self, args): - b = bytes([self.opcode]) + comps = [bytes([self.opcode])] for a in args: - b = b + a() - return b + bytes([0]) + comps.append(a()) + comps.append(b'\x00') + return comps encoding_types = { # start mask B @@ -296,6 +295,12 @@ encoding_types = { # B : Total number of bits excluding high bits } +def pseudo_utf8_len(n): + for k in sorted(encoding_types): + if n <= encoding_types[k][0]: + return k + return None + class InvalidNumberException(Exception): pass class InvalidLengthException(Exception): @@ -304,10 +309,7 @@ def encode_pseudo_utf8(n, high_bits, to): if n < 0: raise InvalidNumberException(n) if to is None or to < 0: - for k in sorted(encoding_types): - if n <= encoding_types[k][0]: - to = k - break + to = pseudo_utf8_len(n) if to is None: raise InvalidNumberException(n) if to > 8 or to < 0: @@ -343,27 +345,26 @@ class Line: raise RangeCheckException(a.at, a.val, reglen) - - def load_label(self, labels): - for a in self.args: - if a.at == ArgType.LAB: - a.load_label(labels) def __call__(self): return self.ins.render(self.args) class InstructionNotFoundException(Exception): pass class Program: - def asm_push_line(self, ins, args): + def _asm_push_line(self, ins, args): l = Line(ins, args) l.check_line(self.lablen, self.reglen) self.asm.append(l) def parse_asm_line(self, line): + """ Parse and add a single assembly line to the program. + :param line: String containing the line. + :raises InstructionNotFoundException: + """ line = line.strip().split() line[0] = line[0].casefold() if line[0][0] == '.': - self.asm.append(line[0][1:]) + self.asm.append(line[0]) return None try: @@ -372,21 +373,72 @@ class Program: raise InstructionNotFoundException(line[0]) args_w_type = ins.typecheck(line[1:]) - self.asm_push_line(ins, args_w_type) + self._asm_push_line(ins, args_w_type) def parse_lines(self, lines): + """ Parse a list of lines. See parse_asm_line. + :param lines: List of assembly lines. + """ for l in lines: self.parse_asm_line(l) def __call__(self): - b = bytes() + """ Generate bytecode. """ + + # Labels may jump forward in the program, which means + # multiple passes are required to properly calculate + # jump locations. + # This algorithm makes every jump destination the same + # width in each operation, and calculates the smallest + # width that will allow all labels to jump to any location + # in the program. + # The algorithm calculates the length of the program + # with all jump arguments given a length of 0. Each label + # is noted with its offset in the program (with all jump + # arguments given zero length) and the amount of jump arguments + # that occur prior to the label. + # When the code is emitted, the label length is properly + # calculated with the length of each label. + # This method is not optimal, but will work well for small + # programs. + + ins = [] + curlen = 0 + + # This dictonary contains a tuple (len, refs) + # that denotes that a label points to len + lablen*refs + # where lablen is a to-be-determined number. labels = {} + labelrefs = 0 for line in self.asm: if type(line) is str: - labels[line] = len(b) + labels[line] = (curlen, labelrefs) continue - line.load_label(labels) - b = b + line() + + next_ins = line() + for v in next_ins: + if type(v) is str: + labelrefs += 1 + else: + curlen += len(v) + ins.append(next_ins) + + # Calculate a label length, such that the entire program + # can be contained in this length. + for i in encoding_types: + if curlen + labelrefs*i < encoding_types[i][0]: + lablen = i + break + + # Emit bytecode. + b = bytes() + for line in ins: + for arg in line: + if type(arg) is str: + off = labels[arg][0] + labels[arg][1]*lablen + arg = encode_pseudo_utf8(off, 0, lablen) + b = b + arg + assert len(b) < encoding_types[lablen][0] return b def __init__(self, lablen=16, reglen=16): diff --git a/asm/test.py b/asm/test.py index f2c2277..cae3d36 100644 --- a/asm/test.py +++ b/asm/test.py @@ -30,17 +30,17 @@ class PushTest(unittest.TestCase): def test_parse_push_catch_typecheck_push_lab(self): p = Program() with self.assertRaises(TypecheckException) as cm: - p.parse_asm_line("push l0") + p.parse_asm_line("push .l0") self.assertEqual(cm.exception.argtype, ArgType.VAL) - self.assertEqual(cm.exception.sarg, 'l0') + self.assertEqual(cm.exception.sarg, '.l0') self.assertEqual(cm.exception.i, 0) - self.assertEqual(cm.exception.opcode, 1) + self.assertEqual(cm.exception.opcode, Instruction.PUSH) def test_parse_push_catch_typecheck_argument_overflow(self): p = Program() with self.assertRaises(TypecheckLenException) as cm: p.parse_asm_line("push r1 r2") - self.assertEqual(cm.exception.opcode, 1) + self.assertEqual(cm.exception.opcode, Instruction.PUSH) self.assertEqual(cm.exception.insargs, ["r1", "r2"]) self.assertEqual(cm.exception.argtypelen, 1) @@ -48,7 +48,7 @@ class PushTest(unittest.TestCase): p = Program() with self.assertRaises(TypecheckLenException) as cm: p.parse_asm_line("push") - self.assertEqual(cm.exception.opcode, 1) + self.assertEqual(cm.exception.opcode, Instruction.PUSH) self.assertEqual(cm.exception.insargs, []) self.assertEqual(cm.exception.argtypelen, 1) @@ -64,23 +64,6 @@ class PushTest(unittest.TestCase): p.parse_asm_line("PUSH 6") ex = ffi.Environment(p()) - self.assertEqual(ex.cenv.prgend, 2) - - self.assertEqual(ex.cenv.prg[0].opcode, 1) - self.assertEqual(ex.cenv.prg[0].w_flags[0], 1) - self.assertEqual(ex.cenv.prg[0].w_flags[1], 0) - self.assertEqual(ex.cenv.prg[0].w_flags[2], 0) - self.assertEqual(ex.cenv.prg[0].w[0], 0) - self.assertEqual(ex.cenv.prg[0].w[1], 0) - self.assertEqual(ex.cenv.prg[0].w[2], 0) - - self.assertEqual(ex.cenv.prg[1].opcode, 1) - self.assertEqual(ex.cenv.prg[1].w_flags[0], 0) - self.assertEqual(ex.cenv.prg[1].w_flags[1], 0) - self.assertEqual(ex.cenv.prg[1].w_flags[2], 0) - self.assertEqual(ex.cenv.prg[1].w[0], 6) - self.assertEqual(ex.cenv.prg[1].w[1], 0) - self.assertEqual(ex.cenv.prg[1].w[2], 0) def test_push_many(self): p = Program() @@ -107,15 +90,6 @@ class PopTest(unittest.TestCase): b = p() self.assertEqual(b, b'\x02\xC2\x89\x00') ex = ffi.Environment(b) - self.assertEqual(ex.cenv.prgend, 1) - - self.assertEqual(ex.cenv.prg[0].opcode, 2) - self.assertEqual(ex.cenv.prg[0].w_flags[0], 1) - self.assertEqual(ex.cenv.prg[0].w_flags[1], 0) - self.assertEqual(ex.cenv.prg[0].w_flags[2], 0) - self.assertEqual(ex.cenv.prg[0].w[0], 9) - self.assertEqual(ex.cenv.prg[0].w[1], 0) - self.assertEqual(ex.cenv.prg[0].w[2], 0) def test_compile_throw_pop_literal(self): p = Program() @@ -124,22 +98,22 @@ class PopTest(unittest.TestCase): self.assertEqual(cm.exception.argtype, ArgType.REG) self.assertEqual(cm.exception.sarg, '6') self.assertEqual(cm.exception.i, 0) - self.assertEqual(cm.exception.opcode, 2) + self.assertEqual(cm.exception.opcode, Instruction.POP) def test_compile_throw_pop_label(self): p = Program() with self.assertRaises(TypecheckException) as cm: - p.parse_asm_line("pop l9") + p.parse_asm_line("pop .l9") self.assertEqual(cm.exception.argtype, ArgType.REG) - self.assertEqual(cm.exception.sarg, 'l9') + self.assertEqual(cm.exception.sarg, '.l9') self.assertEqual(cm.exception.i, 0) - self.assertEqual(cm.exception.opcode, 2) + self.assertEqual(cm.exception.opcode, Instruction.POP) def test_compile_throw_argument_overflow(self): p = Program() with self.assertRaises(TypecheckLenException) as cm: p.parse_asm_line("pop r1 r2") - self.assertEqual(cm.exception.opcode, 2) + self.assertEqual(cm.exception.opcode, Instruction.POP) self.assertEqual(cm.exception.insargs, ["r1", "r2"]) self.assertEqual(cm.exception.argtypelen, 1) @@ -147,7 +121,7 @@ class PopTest(unittest.TestCase): p = Program() with self.assertRaises(TypecheckLenException) as cm: p.parse_asm_line("pop") - self.assertEqual(cm.exception.opcode, 2) + self.assertEqual(cm.exception.opcode, Instruction.POP) self.assertEqual(cm.exception.insargs, []) def test_pop_underflow(self): @@ -189,7 +163,7 @@ class AddTest(unittest.TestCase): self.assertEqual(cm.exception.argtype, ArgType.REG) self.assertEqual(cm.exception.sarg, '5') self.assertEqual(cm.exception.i, 0) - self.assertEqual(cm.exception.opcode, 3) + self.assertEqual(cm.exception.opcode, Instruction.ADD) def test_exec_add_throw_lab_1(self): p = Program() @@ -198,7 +172,7 @@ class AddTest(unittest.TestCase): self.assertEqual(cm.exception.argtype, ArgType.VAL) self.assertEqual(cm.exception.sarg, '.label') self.assertEqual(cm.exception.i, 1) - self.assertEqual(cm.exception.opcode, 3) + self.assertEqual(cm.exception.opcode, Instruction.ADD) def test_exec_add_throw_lab_2(self): p = Program() @@ -207,7 +181,7 @@ class AddTest(unittest.TestCase): self.assertEqual(cm.exception.argtype, ArgType.VAL) self.assertEqual(cm.exception.sarg, '.ab') self.assertEqual(cm.exception.i, 2) - self.assertEqual(cm.exception.opcode, 3) + self.assertEqual(cm.exception.opcode, Instruction.ADD) class MulTest(unittest.TestCase): def test_exec_mul_imm_imm(self): @@ -233,25 +207,25 @@ class MulTest(unittest.TestCase): self.assertEqual(cm.exception.argtype, ArgType.REG) self.assertEqual(cm.exception.sarg, '942') self.assertEqual(cm.exception.i, 0) - self.assertEqual(cm.exception.opcode, 4) + self.assertEqual(cm.exception.opcode, Instruction.MUL) def test_exec_mul_throw_lab_1(self): p = Program() with self.assertRaises(TypecheckException) as cm: - p.parse_asm_line("mul r9 l2 1991") + p.parse_asm_line("mul r9 .l2 1991") self.assertEqual(cm.exception.argtype, ArgType.VAL) - self.assertEqual(cm.exception.sarg, 'l2') + self.assertEqual(cm.exception.sarg, '.l2') self.assertEqual(cm.exception.i, 1) - self.assertEqual(cm.exception.opcode, 4) + self.assertEqual(cm.exception.opcode, Instruction.MUL) def test_exec_mul_throw_lab_2(self): p = Program() with self.assertRaises(TypecheckException) as cm: - p.parse_asm_line("mul r0 -11 l48") + p.parse_asm_line("mul r0 -11 .l48") self.assertEqual(cm.exception.argtype, ArgType.VAL) - self.assertEqual(cm.exception.sarg, 'l48') + self.assertEqual(cm.exception.sarg, '.l48') self.assertEqual(cm.exception.i, 2) - self.assertEqual(cm.exception.opcode, 4) + self.assertEqual(cm.exception.opcode, Instruction.MUL) class DivTest(unittest.TestCase): def test_div(self): @@ -295,7 +269,7 @@ class DivTest(unittest.TestCase): self.assertEqual(cm.exception.argtype, ArgType.REG) self.assertEqual(cm.exception.sarg, '5') self.assertEqual(cm.exception.i, 0) - self.assertEqual(cm.exception.opcode, 5) + self.assertEqual(cm.exception.opcode, Instruction.DIV) def test_exec_div_throw_lab_1(self): p = Program() @@ -304,7 +278,7 @@ class DivTest(unittest.TestCase): self.assertEqual(cm.exception.argtype, ArgType.VAL) self.assertEqual(cm.exception.sarg, '.qqweq') self.assertEqual(cm.exception.i, 1) - self.assertEqual(cm.exception.opcode, 5) + self.assertEqual(cm.exception.opcode, Instruction.DIV) def test_exec_div_throw_lab_2(self): p = Program() @@ -313,7 +287,7 @@ class DivTest(unittest.TestCase): self.assertEqual(cm.exception.argtype, ArgType.VAL) self.assertEqual(cm.exception.sarg, '.24') self.assertEqual(cm.exception.i, 2) - self.assertEqual(cm.exception.opcode, 5) + self.assertEqual(cm.exception.opcode, Instruction.DIV) class LabelTest(unittest.TestCase): def test_unconditional_jump(self): @@ -321,9 +295,9 @@ class LabelTest(unittest.TestCase): p.parse_lines([ "mov r0 5", "mov r0 6", - "j l0", + "j .l0", "mov r0 7", - "CLB l0", + ".l0", ]) ex = ffi.Environment(p()) self.assertEqual(ex(), ffi.RunRet.STOP) @@ -334,10 +308,10 @@ class LabelTest(unittest.TestCase): p.parse_lines([ "add r0 10 0", "add r1 20 0", - "CLB l0", + ".loop_head", "add r0 r0 -1", "add r1 r1 1", - "jl l0 0 r0" + "jl .loop_head 0 r0" ]) ex = ffi.Environment(p()) self.assertEqual(ex(), ffi.RunRet.STOP) @@ -349,10 +323,10 @@ class LabelTest(unittest.TestCase): p.parse_lines([ "mov r0 30", "mov r1 0", - "CLB l0", + ".l0", "add r0 r0 -1", "add r1 r1 1", - "jls l0 -30 r0" + "jls .l0 -30 r0" ]) ex = ffi.Environment(p()) self.assertEqual(ex(), ffi.RunRet.STOP) @@ -364,11 +338,11 @@ class LabelTest(unittest.TestCase): p.parse_lines([ "mov r0 50", "mov r1 0", - "CLB l0", + ".l0", "add r1 r1 1", "mul r2 r0 -1", "add r2 r2 r1", - "jne l0 r2 0" + "jne .l0 r2 0" ]) ex = ffi.Environment(p()) self.assertEqual(ex(), ffi.RunRet.STOP) @@ -381,14 +355,14 @@ class LabelTest(unittest.TestCase): p.parse_lines([ "mov r0 0", # outer loop counter "mov r2 0", # total iteration counter - "CLB l0", + ".outer_loop", "mov r1 0", # inner loop counter - "CLB l1", + ".inner_loop", "add r1 r1 1", "add r2 r2 1", - "jl l1 r1 50", + "jl .inner_loop r1 50", "add r0 r0 1", - "jl l0 r0 50" + "jl .outer_loop r0 50" ]) ex = ffi.Environment(p()) self.assertEqual(ex(), ffi.RunRet.STOP)