diff options
| author | 2023-02-20 18:45:21 +0000 | |
|---|---|---|
| committer | 2023-02-20 18:45:21 +0000 | |
| commit | 3dd29b100483cb9eb34d96f6ed21827e86cd77a2 (patch) | |
| tree | b07bf066bad51f43c0225da682631ddde9f4e5fb /asm/creole.py | |
| parent | adjust python assembler to new API (diff) | |
add label parser that can deal with forward jumps; all tests pass
Diffstat (limited to 'asm/creole.py')
| -rw-r--r-- | asm/creole.py | 152 |
1 files changed, 102 insertions, 50 deletions
diff --git a/asm/creole.py b/asm/creole.py index 559d663..a614701 100644 --- a/asm/creole.py +++ b/asm/creole.py @@ -71,7 +71,7 @@ class Argument: class StringArgument(Argument): def __init__(self, *args, **kwargs): - super().__init__(self, *args, **kwargs) + super().__init__(*args, **kwargs) def __bytes__(self): b = bytes() for v in self.val: @@ -80,9 +80,23 @@ class StringArgument(Argument): class LabelArgument(Argument): def __init__(self, *args, **kwargs): - super().__init__(self, *args, **kwargs) - def load_label(self, labels): - self.val = labels[val] + super().__init__(*args, **kwargs) + def __call__(self): + return self.val + +class TypecheckException(Exception): + """ Exception thrown when an argument to an instruction are of the + incorrect type. """ + def __init__(self, got, argtype, sarg, opcode, i): + self.argtype = argtype + self.sarg = sarg + self.opcode = opcode + self.got = got + self.i = i + self.message = f'''\ +opcode {self.opcode.name} has invalid value {self.sarg} ({self.got}) +at {self.i} (expected {self.argtype})\ +''' class ArgType(Enum): """ Class denoting the type of an argument to an instruction. """ @@ -135,27 +149,29 @@ class ArgType(Enum): elif s[0] == 'd' and s[1:].isnumeric(): return Argument(ArgType.DAT, int(s[1:])) elif s[0] == '.': - return Argument(ArgType.LAB, s[1:]) + return LabelArgument(ArgType.LAB, s[:]) else: raise MalformedArgument(s) - def typecheck(self, s): + def typecheck(self, s, opcode, i): """ Parses the type of the string and returns it if it fits the type of the enum value. :param s: String argument representing an argument. - :return: The Argument class containing the object, or None - if the string does not fit the type of self. """ + :param opcode: Opcode of the argument. Used for debugging. + :param i: Argument number. Used for debugging. + :return: The Argument class containing the object. + :raises TypecheckException: """ t = ArgType.gettype(s) if self == ArgType.VAL: if t.at == ArgType.REG or t.at == ArgType.IMM: return t else: - return None + raise TypecheckException(t.at, self, s, opcode, i) elif t.at == self: return t else: - return None + raise TypecheckException(t.at, self, s, opcode, i) class OpcodeException(Exception): pass @@ -168,20 +184,7 @@ class TypecheckLenException(Exception): self.argtypelen = argtypelen def __str__(self): return f'''\ -arguments {self.insargs} to opcode {self.opcode} not of length {self.argtypelen}\ -''' -class TypecheckException(Exception): - """ Exception thrown when an argument to an instruction are of the - incorrect type. """ - def __init__(self, argtype, sarg, i, opcode): - self.argtype = argtype - self.sarg = sarg - self.i = i - self.opcode = opcode - def __str__(self): - return f'''\ -opcode {self.opcode} has invalid value {self.sarg} -(expected {self.argtype} in position {self.i})\ +arguments {self.insargs} to opcode {self.opcode.name} not of length {self.argtypelen}\ ''' class Instruction(Enum): @@ -248,19 +251,14 @@ class Instruction(Enum): :param sargs: List of arguments to the instruction as strings. :return: List of arguments (as Argument objects). - :raises TypeCheckException: :raises TypecheckLenException: """ rargs = [] if len(sargs) != len(self.argtypes): - raise TypecheckLenException(self.opcode, sargs, + raise TypecheckLenException(self, sargs, len(self.argtypes)) for i in range(0, len(sargs)): - t = self.argtypes[i].typecheck(sargs[i]) - if t is None: - raise TypecheckException(self.argtypes[i], - sargs[i], - i, self.opcode) + t = self.argtypes[i].typecheck(sargs[i], self, i) rargs.append(t) return rargs @@ -280,10 +278,11 @@ class Instruction(Enum): return Instruction[self.opcode].render(args) def _render_default(self, args): - b = bytes([self.opcode]) + comps = [bytes([self.opcode])] for a in args: - b = b + a() - return b + bytes([0]) + comps.append(a()) + comps.append(b'\x00') + return comps encoding_types = { # start mask B @@ -296,6 +295,12 @@ encoding_types = { # B : Total number of bits excluding high bits } +def pseudo_utf8_len(n): + for k in sorted(encoding_types): + if n <= encoding_types[k][0]: + return k + return None + class InvalidNumberException(Exception): pass class InvalidLengthException(Exception): @@ -304,10 +309,7 @@ def encode_pseudo_utf8(n, high_bits, to): if n < 0: raise InvalidNumberException(n) if to is None or to < 0: - for k in sorted(encoding_types): - if n <= encoding_types[k][0]: - to = k - break + to = pseudo_utf8_len(n) if to is None: raise InvalidNumberException(n) if to > 8 or to < 0: @@ -343,27 +345,26 @@ class Line: raise RangeCheckException(a.at, a.val, reglen) - - def load_label(self, labels): - for a in self.args: - if a.at == ArgType.LAB: - a.load_label(labels) def __call__(self): return self.ins.render(self.args) class InstructionNotFoundException(Exception): pass class Program: - def asm_push_line(self, ins, args): + def _asm_push_line(self, ins, args): l = Line(ins, args) l.check_line(self.lablen, self.reglen) self.asm.append(l) def parse_asm_line(self, line): + """ Parse and add a single assembly line to the program. + :param line: String containing the line. + :raises InstructionNotFoundException: + """ line = line.strip().split() line[0] = line[0].casefold() if line[0][0] == '.': - self.asm.append(line[0][1:]) + self.asm.append(line[0]) return None try: @@ -372,21 +373,72 @@ class Program: raise InstructionNotFoundException(line[0]) args_w_type = ins.typecheck(line[1:]) - self.asm_push_line(ins, args_w_type) + self._asm_push_line(ins, args_w_type) def parse_lines(self, lines): + """ Parse a list of lines. See parse_asm_line. + :param lines: List of assembly lines. + """ for l in lines: self.parse_asm_line(l) def __call__(self): - b = bytes() + """ Generate bytecode. """ + + # Labels may jump forward in the program, which means + # multiple passes are required to properly calculate + # jump locations. + # This algorithm makes every jump destination the same + # width in each operation, and calculates the smallest + # width that will allow all labels to jump to any location + # in the program. + # The algorithm calculates the length of the program + # with all jump arguments given a length of 0. Each label + # is noted with its offset in the program (with all jump + # arguments given zero length) and the amount of jump arguments + # that occur prior to the label. + # When the code is emitted, the label length is properly + # calculated with the length of each label. + # This method is not optimal, but will work well for small + # programs. + + ins = [] + curlen = 0 + + # This dictonary contains a tuple (len, refs) + # that denotes that a label points to len + lablen*refs + # where lablen is a to-be-determined number. labels = {} + labelrefs = 0 for line in self.asm: if type(line) is str: - labels[line] = len(b) + labels[line] = (curlen, labelrefs) continue - line.load_label(labels) - b = b + line() + + next_ins = line() + for v in next_ins: + if type(v) is str: + labelrefs += 1 + else: + curlen += len(v) + ins.append(next_ins) + + # Calculate a label length, such that the entire program + # can be contained in this length. + for i in encoding_types: + if curlen + labelrefs*i < encoding_types[i][0]: + lablen = i + break + + # Emit bytecode. + b = bytes() + for line in ins: + for arg in line: + if type(arg) is str: + off = labels[arg][0] + labels[arg][1]*lablen + arg = encode_pseudo_utf8(off, 0, lablen) + b = b + arg + assert len(b) < encoding_types[lablen][0] return b def __init__(self, lablen=16, reglen=16): |
