aboutsummaryrefslogtreecommitdiffstats
path: root/asm
diff options
context:
space:
mode:
authorGravatar Peter McGoron 2023-02-20 18:45:21 +0000
committerGravatar Peter McGoron 2023-02-20 18:45:21 +0000
commit3dd29b100483cb9eb34d96f6ed21827e86cd77a2 (patch)
treeb07bf066bad51f43c0225da682631ddde9f4e5fb /asm
parentadjust python assembler to new API (diff)
add label parser that can deal with forward jumps; all tests pass
Diffstat (limited to 'asm')
-rw-r--r--asm/creole.py152
-rw-r--r--asm/test.py98
2 files changed, 138 insertions, 112 deletions
diff --git a/asm/creole.py b/asm/creole.py
index 559d663..a614701 100644
--- a/asm/creole.py
+++ b/asm/creole.py
@@ -71,7 +71,7 @@ class Argument:
class StringArgument(Argument):
def __init__(self, *args, **kwargs):
- super().__init__(self, *args, **kwargs)
+ super().__init__(*args, **kwargs)
def __bytes__(self):
b = bytes()
for v in self.val:
@@ -80,9 +80,23 @@ class StringArgument(Argument):
class LabelArgument(Argument):
def __init__(self, *args, **kwargs):
- super().__init__(self, *args, **kwargs)
- def load_label(self, labels):
- self.val = labels[val]
+ super().__init__(*args, **kwargs)
+ def __call__(self):
+ return self.val
+
+class TypecheckException(Exception):
+ """ Exception thrown when an argument to an instruction are of the
+ incorrect type. """
+ def __init__(self, got, argtype, sarg, opcode, i):
+ self.argtype = argtype
+ self.sarg = sarg
+ self.opcode = opcode
+ self.got = got
+ self.i = i
+ self.message = f'''\
+opcode {self.opcode.name} has invalid value {self.sarg} ({self.got})
+at {self.i} (expected {self.argtype})\
+'''
class ArgType(Enum):
""" Class denoting the type of an argument to an instruction. """
@@ -135,27 +149,29 @@ class ArgType(Enum):
elif s[0] == 'd' and s[1:].isnumeric():
return Argument(ArgType.DAT, int(s[1:]))
elif s[0] == '.':
- return Argument(ArgType.LAB, s[1:])
+ return LabelArgument(ArgType.LAB, s[:])
else:
raise MalformedArgument(s)
- def typecheck(self, s):
+ def typecheck(self, s, opcode, i):
""" Parses the type of the string and returns it if it fits
the type of the enum value.
:param s: String argument representing an argument.
- :return: The Argument class containing the object, or None
- if the string does not fit the type of self. """
+ :param opcode: Opcode of the argument. Used for debugging.
+ :param i: Argument number. Used for debugging.
+ :return: The Argument class containing the object.
+ :raises TypecheckException: """
t = ArgType.gettype(s)
if self == ArgType.VAL:
if t.at == ArgType.REG or t.at == ArgType.IMM:
return t
else:
- return None
+ raise TypecheckException(t.at, self, s, opcode, i)
elif t.at == self:
return t
else:
- return None
+ raise TypecheckException(t.at, self, s, opcode, i)
class OpcodeException(Exception):
pass
@@ -168,20 +184,7 @@ class TypecheckLenException(Exception):
self.argtypelen = argtypelen
def __str__(self):
return f'''\
-arguments {self.insargs} to opcode {self.opcode} not of length {self.argtypelen}\
-'''
-class TypecheckException(Exception):
- """ Exception thrown when an argument to an instruction are of the
- incorrect type. """
- def __init__(self, argtype, sarg, i, opcode):
- self.argtype = argtype
- self.sarg = sarg
- self.i = i
- self.opcode = opcode
- def __str__(self):
- return f'''\
-opcode {self.opcode} has invalid value {self.sarg}
-(expected {self.argtype} in position {self.i})\
+arguments {self.insargs} to opcode {self.opcode.name} not of length {self.argtypelen}\
'''
class Instruction(Enum):
@@ -248,19 +251,14 @@ class Instruction(Enum):
:param sargs: List of arguments to the instruction
as strings.
:return: List of arguments (as Argument objects).
- :raises TypeCheckException:
:raises TypecheckLenException:
"""
rargs = []
if len(sargs) != len(self.argtypes):
- raise TypecheckLenException(self.opcode, sargs,
+ raise TypecheckLenException(self, sargs,
len(self.argtypes))
for i in range(0, len(sargs)):
- t = self.argtypes[i].typecheck(sargs[i])
- if t is None:
- raise TypecheckException(self.argtypes[i],
- sargs[i],
- i, self.opcode)
+ t = self.argtypes[i].typecheck(sargs[i], self, i)
rargs.append(t)
return rargs
@@ -280,10 +278,11 @@ class Instruction(Enum):
return Instruction[self.opcode].render(args)
def _render_default(self, args):
- b = bytes([self.opcode])
+ comps = [bytes([self.opcode])]
for a in args:
- b = b + a()
- return b + bytes([0])
+ comps.append(a())
+ comps.append(b'\x00')
+ return comps
encoding_types = {
# start mask B
@@ -296,6 +295,12 @@ encoding_types = {
# B : Total number of bits excluding high bits
}
+def pseudo_utf8_len(n):
+ for k in sorted(encoding_types):
+ if n <= encoding_types[k][0]:
+ return k
+ return None
+
class InvalidNumberException(Exception):
pass
class InvalidLengthException(Exception):
@@ -304,10 +309,7 @@ def encode_pseudo_utf8(n, high_bits, to):
if n < 0:
raise InvalidNumberException(n)
if to is None or to < 0:
- for k in sorted(encoding_types):
- if n <= encoding_types[k][0]:
- to = k
- break
+ to = pseudo_utf8_len(n)
if to is None:
raise InvalidNumberException(n)
if to > 8 or to < 0:
@@ -343,27 +345,26 @@ class Line:
raise RangeCheckException(a.at,
a.val,
reglen)
-
- def load_label(self, labels):
- for a in self.args:
- if a.at == ArgType.LAB:
- a.load_label(labels)
def __call__(self):
return self.ins.render(self.args)
class InstructionNotFoundException(Exception):
pass
class Program:
- def asm_push_line(self, ins, args):
+ def _asm_push_line(self, ins, args):
l = Line(ins, args)
l.check_line(self.lablen, self.reglen)
self.asm.append(l)
def parse_asm_line(self, line):
+ """ Parse and add a single assembly line to the program.
+ :param line: String containing the line.
+ :raises InstructionNotFoundException:
+ """
line = line.strip().split()
line[0] = line[0].casefold()
if line[0][0] == '.':
- self.asm.append(line[0][1:])
+ self.asm.append(line[0])
return None
try:
@@ -372,21 +373,72 @@ class Program:
raise InstructionNotFoundException(line[0])
args_w_type = ins.typecheck(line[1:])
- self.asm_push_line(ins, args_w_type)
+ self._asm_push_line(ins, args_w_type)
def parse_lines(self, lines):
+ """ Parse a list of lines. See parse_asm_line.
+ :param lines: List of assembly lines.
+ """
for l in lines:
self.parse_asm_line(l)
def __call__(self):
- b = bytes()
+ """ Generate bytecode. """
+
+ # Labels may jump forward in the program, which means
+ # multiple passes are required to properly calculate
+ # jump locations.
+ # This algorithm makes every jump destination the same
+ # width in each operation, and calculates the smallest
+ # width that will allow all labels to jump to any location
+ # in the program.
+ # The algorithm calculates the length of the program
+ # with all jump arguments given a length of 0. Each label
+ # is noted with its offset in the program (with all jump
+ # arguments given zero length) and the amount of jump arguments
+ # that occur prior to the label.
+ # When the code is emitted, the label length is properly
+ # calculated with the length of each label.
+ # This method is not optimal, but will work well for small
+ # programs.
+
+ ins = []
+ curlen = 0
+
+ # This dictonary contains a tuple (len, refs)
+ # that denotes that a label points to len + lablen*refs
+ # where lablen is a to-be-determined number.
labels = {}
+ labelrefs = 0
for line in self.asm:
if type(line) is str:
- labels[line] = len(b)
+ labels[line] = (curlen, labelrefs)
continue
- line.load_label(labels)
- b = b + line()
+
+ next_ins = line()
+ for v in next_ins:
+ if type(v) is str:
+ labelrefs += 1
+ else:
+ curlen += len(v)
+ ins.append(next_ins)
+
+ # Calculate a label length, such that the entire program
+ # can be contained in this length.
+ for i in encoding_types:
+ if curlen + labelrefs*i < encoding_types[i][0]:
+ lablen = i
+ break
+
+ # Emit bytecode.
+ b = bytes()
+ for line in ins:
+ for arg in line:
+ if type(arg) is str:
+ off = labels[arg][0] + labels[arg][1]*lablen
+ arg = encode_pseudo_utf8(off, 0, lablen)
+ b = b + arg
+ assert len(b) < encoding_types[lablen][0]
return b
def __init__(self, lablen=16, reglen=16):
diff --git a/asm/test.py b/asm/test.py
index f2c2277..cae3d36 100644
--- a/asm/test.py
+++ b/asm/test.py
@@ -30,17 +30,17 @@ class PushTest(unittest.TestCase):
def test_parse_push_catch_typecheck_push_lab(self):
p = Program()
with self.assertRaises(TypecheckException) as cm:
- p.parse_asm_line("push l0")
+ p.parse_asm_line("push .l0")
self.assertEqual(cm.exception.argtype, ArgType.VAL)
- self.assertEqual(cm.exception.sarg, 'l0')
+ self.assertEqual(cm.exception.sarg, '.l0')
self.assertEqual(cm.exception.i, 0)
- self.assertEqual(cm.exception.opcode, 1)
+ self.assertEqual(cm.exception.opcode, Instruction.PUSH)
def test_parse_push_catch_typecheck_argument_overflow(self):
p = Program()
with self.assertRaises(TypecheckLenException) as cm:
p.parse_asm_line("push r1 r2")
- self.assertEqual(cm.exception.opcode, 1)
+ self.assertEqual(cm.exception.opcode, Instruction.PUSH)
self.assertEqual(cm.exception.insargs, ["r1", "r2"])
self.assertEqual(cm.exception.argtypelen, 1)
@@ -48,7 +48,7 @@ class PushTest(unittest.TestCase):
p = Program()
with self.assertRaises(TypecheckLenException) as cm:
p.parse_asm_line("push")
- self.assertEqual(cm.exception.opcode, 1)
+ self.assertEqual(cm.exception.opcode, Instruction.PUSH)
self.assertEqual(cm.exception.insargs, [])
self.assertEqual(cm.exception.argtypelen, 1)
@@ -64,23 +64,6 @@ class PushTest(unittest.TestCase):
p.parse_asm_line("PUSH 6")
ex = ffi.Environment(p())
- self.assertEqual(ex.cenv.prgend, 2)
-
- self.assertEqual(ex.cenv.prg[0].opcode, 1)
- self.assertEqual(ex.cenv.prg[0].w_flags[0], 1)
- self.assertEqual(ex.cenv.prg[0].w_flags[1], 0)
- self.assertEqual(ex.cenv.prg[0].w_flags[2], 0)
- self.assertEqual(ex.cenv.prg[0].w[0], 0)
- self.assertEqual(ex.cenv.prg[0].w[1], 0)
- self.assertEqual(ex.cenv.prg[0].w[2], 0)
-
- self.assertEqual(ex.cenv.prg[1].opcode, 1)
- self.assertEqual(ex.cenv.prg[1].w_flags[0], 0)
- self.assertEqual(ex.cenv.prg[1].w_flags[1], 0)
- self.assertEqual(ex.cenv.prg[1].w_flags[2], 0)
- self.assertEqual(ex.cenv.prg[1].w[0], 6)
- self.assertEqual(ex.cenv.prg[1].w[1], 0)
- self.assertEqual(ex.cenv.prg[1].w[2], 0)
def test_push_many(self):
p = Program()
@@ -107,15 +90,6 @@ class PopTest(unittest.TestCase):
b = p()
self.assertEqual(b, b'\x02\xC2\x89\x00')
ex = ffi.Environment(b)
- self.assertEqual(ex.cenv.prgend, 1)
-
- self.assertEqual(ex.cenv.prg[0].opcode, 2)
- self.assertEqual(ex.cenv.prg[0].w_flags[0], 1)
- self.assertEqual(ex.cenv.prg[0].w_flags[1], 0)
- self.assertEqual(ex.cenv.prg[0].w_flags[2], 0)
- self.assertEqual(ex.cenv.prg[0].w[0], 9)
- self.assertEqual(ex.cenv.prg[0].w[1], 0)
- self.assertEqual(ex.cenv.prg[0].w[2], 0)
def test_compile_throw_pop_literal(self):
p = Program()
@@ -124,22 +98,22 @@ class PopTest(unittest.TestCase):
self.assertEqual(cm.exception.argtype, ArgType.REG)
self.assertEqual(cm.exception.sarg, '6')
self.assertEqual(cm.exception.i, 0)
- self.assertEqual(cm.exception.opcode, 2)
+ self.assertEqual(cm.exception.opcode, Instruction.POP)
def test_compile_throw_pop_label(self):
p = Program()
with self.assertRaises(TypecheckException) as cm:
- p.parse_asm_line("pop l9")
+ p.parse_asm_line("pop .l9")
self.assertEqual(cm.exception.argtype, ArgType.REG)
- self.assertEqual(cm.exception.sarg, 'l9')
+ self.assertEqual(cm.exception.sarg, '.l9')
self.assertEqual(cm.exception.i, 0)
- self.assertEqual(cm.exception.opcode, 2)
+ self.assertEqual(cm.exception.opcode, Instruction.POP)
def test_compile_throw_argument_overflow(self):
p = Program()
with self.assertRaises(TypecheckLenException) as cm:
p.parse_asm_line("pop r1 r2")
- self.assertEqual(cm.exception.opcode, 2)
+ self.assertEqual(cm.exception.opcode, Instruction.POP)
self.assertEqual(cm.exception.insargs, ["r1", "r2"])
self.assertEqual(cm.exception.argtypelen, 1)
@@ -147,7 +121,7 @@ class PopTest(unittest.TestCase):
p = Program()
with self.assertRaises(TypecheckLenException) as cm:
p.parse_asm_line("pop")
- self.assertEqual(cm.exception.opcode, 2)
+ self.assertEqual(cm.exception.opcode, Instruction.POP)
self.assertEqual(cm.exception.insargs, [])
def test_pop_underflow(self):
@@ -189,7 +163,7 @@ class AddTest(unittest.TestCase):
self.assertEqual(cm.exception.argtype, ArgType.REG)
self.assertEqual(cm.exception.sarg, '5')
self.assertEqual(cm.exception.i, 0)
- self.assertEqual(cm.exception.opcode, 3)
+ self.assertEqual(cm.exception.opcode, Instruction.ADD)
def test_exec_add_throw_lab_1(self):
p = Program()
@@ -198,7 +172,7 @@ class AddTest(unittest.TestCase):
self.assertEqual(cm.exception.argtype, ArgType.VAL)
self.assertEqual(cm.exception.sarg, '.label')
self.assertEqual(cm.exception.i, 1)
- self.assertEqual(cm.exception.opcode, 3)
+ self.assertEqual(cm.exception.opcode, Instruction.ADD)
def test_exec_add_throw_lab_2(self):
p = Program()
@@ -207,7 +181,7 @@ class AddTest(unittest.TestCase):
self.assertEqual(cm.exception.argtype, ArgType.VAL)
self.assertEqual(cm.exception.sarg, '.ab')
self.assertEqual(cm.exception.i, 2)
- self.assertEqual(cm.exception.opcode, 3)
+ self.assertEqual(cm.exception.opcode, Instruction.ADD)
class MulTest(unittest.TestCase):
def test_exec_mul_imm_imm(self):
@@ -233,25 +207,25 @@ class MulTest(unittest.TestCase):
self.assertEqual(cm.exception.argtype, ArgType.REG)
self.assertEqual(cm.exception.sarg, '942')
self.assertEqual(cm.exception.i, 0)
- self.assertEqual(cm.exception.opcode, 4)
+ self.assertEqual(cm.exception.opcode, Instruction.MUL)
def test_exec_mul_throw_lab_1(self):
p = Program()
with self.assertRaises(TypecheckException) as cm:
- p.parse_asm_line("mul r9 l2 1991")
+ p.parse_asm_line("mul r9 .l2 1991")
self.assertEqual(cm.exception.argtype, ArgType.VAL)
- self.assertEqual(cm.exception.sarg, 'l2')
+ self.assertEqual(cm.exception.sarg, '.l2')
self.assertEqual(cm.exception.i, 1)
- self.assertEqual(cm.exception.opcode, 4)
+ self.assertEqual(cm.exception.opcode, Instruction.MUL)
def test_exec_mul_throw_lab_2(self):
p = Program()
with self.assertRaises(TypecheckException) as cm:
- p.parse_asm_line("mul r0 -11 l48")
+ p.parse_asm_line("mul r0 -11 .l48")
self.assertEqual(cm.exception.argtype, ArgType.VAL)
- self.assertEqual(cm.exception.sarg, 'l48')
+ self.assertEqual(cm.exception.sarg, '.l48')
self.assertEqual(cm.exception.i, 2)
- self.assertEqual(cm.exception.opcode, 4)
+ self.assertEqual(cm.exception.opcode, Instruction.MUL)
class DivTest(unittest.TestCase):
def test_div(self):
@@ -295,7 +269,7 @@ class DivTest(unittest.TestCase):
self.assertEqual(cm.exception.argtype, ArgType.REG)
self.assertEqual(cm.exception.sarg, '5')
self.assertEqual(cm.exception.i, 0)
- self.assertEqual(cm.exception.opcode, 5)
+ self.assertEqual(cm.exception.opcode, Instruction.DIV)
def test_exec_div_throw_lab_1(self):
p = Program()
@@ -304,7 +278,7 @@ class DivTest(unittest.TestCase):
self.assertEqual(cm.exception.argtype, ArgType.VAL)
self.assertEqual(cm.exception.sarg, '.qqweq')
self.assertEqual(cm.exception.i, 1)
- self.assertEqual(cm.exception.opcode, 5)
+ self.assertEqual(cm.exception.opcode, Instruction.DIV)
def test_exec_div_throw_lab_2(self):
p = Program()
@@ -313,7 +287,7 @@ class DivTest(unittest.TestCase):
self.assertEqual(cm.exception.argtype, ArgType.VAL)
self.assertEqual(cm.exception.sarg, '.24')
self.assertEqual(cm.exception.i, 2)
- self.assertEqual(cm.exception.opcode, 5)
+ self.assertEqual(cm.exception.opcode, Instruction.DIV)
class LabelTest(unittest.TestCase):
def test_unconditional_jump(self):
@@ -321,9 +295,9 @@ class LabelTest(unittest.TestCase):
p.parse_lines([
"mov r0 5",
"mov r0 6",
- "j l0",
+ "j .l0",
"mov r0 7",
- "CLB l0",
+ ".l0",
])
ex = ffi.Environment(p())
self.assertEqual(ex(), ffi.RunRet.STOP)
@@ -334,10 +308,10 @@ class LabelTest(unittest.TestCase):
p.parse_lines([
"add r0 10 0",
"add r1 20 0",
- "CLB l0",
+ ".loop_head",
"add r0 r0 -1",
"add r1 r1 1",
- "jl l0 0 r0"
+ "jl .loop_head 0 r0"
])
ex = ffi.Environment(p())
self.assertEqual(ex(), ffi.RunRet.STOP)
@@ -349,10 +323,10 @@ class LabelTest(unittest.TestCase):
p.parse_lines([
"mov r0 30",
"mov r1 0",
- "CLB l0",
+ ".l0",
"add r0 r0 -1",
"add r1 r1 1",
- "jls l0 -30 r0"
+ "jls .l0 -30 r0"
])
ex = ffi.Environment(p())
self.assertEqual(ex(), ffi.RunRet.STOP)
@@ -364,11 +338,11 @@ class LabelTest(unittest.TestCase):
p.parse_lines([
"mov r0 50",
"mov r1 0",
- "CLB l0",
+ ".l0",
"add r1 r1 1",
"mul r2 r0 -1",
"add r2 r2 r1",
- "jne l0 r2 0"
+ "jne .l0 r2 0"
])
ex = ffi.Environment(p())
self.assertEqual(ex(), ffi.RunRet.STOP)
@@ -381,14 +355,14 @@ class LabelTest(unittest.TestCase):
p.parse_lines([
"mov r0 0", # outer loop counter
"mov r2 0", # total iteration counter
- "CLB l0",
+ ".outer_loop",
"mov r1 0", # inner loop counter
- "CLB l1",
+ ".inner_loop",
"add r1 r1 1",
"add r2 r2 1",
- "jl l1 r1 50",
+ "jl .inner_loop r1 50",
"add r0 r0 1",
- "jl l0 r0 50"
+ "jl .outer_loop r0 50"
])
ex = ffi.Environment(p())
self.assertEqual(ex(), ffi.RunRet.STOP)