diff --git a/asm_test.py b/asm_test.py new file mode 100644 index 0000000..481875b --- /dev/null +++ b/asm_test.py @@ -0,0 +1,17 @@ +from creole_asm import * +import unittest + +class ProgramTest(unittest.TestCase): + def test_oneline(self): + p = Program() + p.parse_asm_line("PUSH r0") + b = p() + self.assertEqual(b, b'\x01\xC2\x80\x00') + def test_large_reg(self): + p = Program(regnum=0x8000000) + p.parse_asm_line("PUSH r134217727") + b = p() + self.assertEqual(b, b'\x01\xFC\x87\xbf\xbf\xbf\xbf\x00') + +if __name__ == "__main__": + unittest.main() diff --git a/creole_asm.py b/creole_asm.py index 06e470f..8f7ab22 100644 --- a/creole_asm.py +++ b/creole_asm.py @@ -2,56 +2,66 @@ from enum import Enum +class MalformedArgument(Exception): + pass + class ArgType(Enum): - TYPE_IMM = 1 - TYPE_REG = 2 - TYPE_VAL = 3 - TYPE_LAB = 4 + IMM = 1 + REG = 2 + VAL = 3 + LAB = 4 def gettype(s): if s.isnumeric(): - return (TYPE_IMM, int(s)) + return (ArgType.IMM, int(s)) elif s[0] == 'r' and s[1:].isnumeric(): - return (TYPE_REG, int(s[1:])) + return (ArgType.REG, int(s[1:])) elif s[0] == 'l' and s[1:].isnumeric(): - return (TYPE_LAB, int(s[1:])) + return (ArgType.LAB, int(s[1:])) else: - return None + raise MalformedArgument(s) def typecheck(self, s): t = ArgType.gettype(s) - if t is None: - return None - if self == TYPE_VAL: - return t[0] == TYPE_REG or t[0] == TYPE_IMM + if self == ArgType.VAL: + return t[0] == ArgType.REG or t[0] == ArgType.IMM else: return t[0] == self +class OpcodeException(Exception): + pass +class TypecheckLenException(Exception): + pass +class TypecheckException(Exception): + pass class Instruction: - def __init__(self, opcode, argtypes): + def __init__(self, opcode, argtypes): + if opcode > 0x7F or opcode < 0: + raise OpcodeException(opcode) + self.opcode = opcode - assert self.opcode < 0x80 and self.opcode >= 0 self.argtypes = argtypes def typecheck(self, sargs): rargs = [] if len(sargs) != len(self.argtypes): - return None + raise TypecheckLenException(sargs, self.argtypes) for i in range(0, len(sargs)): if not self.argtypes[i].typecheck(sargs[i]): - return None + raise TypecheckException(self.argtypes[i], + sargs[i]) rargs.append(ArgType.gettype(sargs[i])) return rargs instructions = { -"NOP" : Instruction(0, []), -"PUSH" : Instruction(1, [ArgType.TYPE_REG]), -"POP" : Instruction(2, [ArgType.TYPE_REG]), -"ADD" : Instruction(3, [ArgType.TYPE_VAL, ArgType.TYPE_VAL, ArgType.TYPE_VAL]), -"MUL" : Instruction(4, [ArgType.TYPE_VAL, ArgType.TYPE_VAL, ArgType.TYPE_VAL]), -"DIV" : Instruction(5, [ArgType.TYPE_VAL, ArgType.TYPE_VAL, ArgType.TYPE_VAL]), -"JL" : Instruction(6, [ArgType.TYPE_LAB, ArgType.TYPE_VAL, ArgType.TYPE_VAL]), -"CLB" : Instruction(7, [ArgType.TYPE_LAB]), -"SYS" : Instruction(8, [ArgType.TYPE_VAL]) +"nop" : Instruction(0, []), +"push" : Instruction(1, [ArgType.REG]), +"pop" : Instruction(2, [ArgType.REG]), +"add" : Instruction(3, [ArgType.VAL, ArgType.VAL, ArgType.VAL]), +"mul" : Instruction(4, [ArgType.VAL, ArgType.VAL, ArgType.VAL]), +"div" : Instruction(5, [ArgType.VAL, ArgType.VAL, ArgType.VAL]), +"jl" : Instruction(6, [ArgType.LAB, ArgType.VAL, ArgType.VAL]), +"clb" : Instruction(7, [ArgType.LAB]), +"sys" : Instruction(8, [ArgType.VAL]) } encoding_types = { @@ -66,18 +76,31 @@ encoding_types = { # B : Total number of bits excluding high bits } +class InvalidNumberException(Exception): + pass +class InvalidLengthException(Exception): + pass def encode_pseudo_utf8(n, high_bits, to): + if n < 0: + raise InvalidNumberException(n) + if to is None or to < 0: + for k in sorted(encoding_types): + if n <= encoding_types[k][0]: + to = k + break + if to is None: + raise InvalidNumberException(n) if to > 8 or to < 0: - return None + raise InvalidLengthException(to) elif to == 1: if n < 0x80: return bytes([n]) else: - return None + raise InvalidNumberException(n,to) (maxval, start_byte, n_tot) = encoding_types[to] if n > maxval or high_bits > 15: - return None + raise InvalidNumberException(n, high_bits) n = n | (high_bits << n_tot) all_bytes = [] for i in range(0, to - 1): @@ -86,32 +109,47 @@ def encode_pseudo_utf8(n, high_bits, to): all_bytes.append(start_byte | n) return bytes(reversed(all_bytes)) +class RangeCheckException(Exception): + pass class Line: - def __init__(self, opcode, args): + def __init__(self, opcode, args, labnum, regnum): self.opcode = opcode self.args = args + for a in args: + if a[0] == ArgType.REG: + if a[1] < 0 or a[1] >= regnum: + raise RangeCheckException(a[0], + a[1], + regnum) + elif a[0] == ArgType.LAB: + if a[1] < 0 or a[1] >= labnum: + raise RangeCheckException(a[0], + a[1], + regnum) + def __call__(self): b = bytes([self.opcode]) - for a in args: - if a[0] == TYPE_REG: + for a in self.args: + if a[0] == ArgType.REG: b = b + encode_pseudo_utf8(a[1],1,None) else: b = b + encode_pseudo_utf8(a[1],0,None) return b + bytes([0]) +class InstructionNotFoundException(Exception): + pass class Program: def asm_push_line(self, ins, args): - self.asm.append(Line(ins, args)) + self.asm.append(Line(ins, args, self.labnum, self.regnum)) def parse_asm_line(self, line): line = line.split() + line[0] = line[0].casefold() if line[0] not in instructions: - raise Exception + raise InstructionNotFoundException(line[0]) else: ins = instructions[line[0]] args_w_type = ins.typecheck(line[1:]) - if r is None: - raise Exception self.asm_push_line(ins.opcode, args_w_type) def __call__(self): @@ -120,5 +158,7 @@ class Program: b = b + line() return b - def __init__(self): + def __init__(self, labnum=16, regnum=16): self.asm = [] + self.labnum = labnum + self.regnum = regnum