467 lines
14 KiB
Python
467 lines
14 KiB
Python
# Copyright (c) 2023 Peter McGoron <code@mcgoron.com>
|
|
#
|
|
# Permission to use, copy, modify, and/or distribute this software for any
|
|
# purpose with or without fee is hereby granted, provided that the above
|
|
# copyright notice and this permission notice appear in all copies.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
from enum import Enum
|
|
|
|
class MalformedArgument(Exception):
|
|
pass
|
|
|
|
def word_2c(w):
|
|
""" Negate a non-negative integer using 32 bit two's compliment.
|
|
|
|
:param w: An integer in two's compliment. A non-negative Python
|
|
integer will work.
|
|
:return: The negation of the integer stored as a two's compliment
|
|
integer.
|
|
"""
|
|
return (~w + 1) & 0xFFFFFFFF
|
|
def ti(w):
|
|
""" Explicitly transform integer into two's compliment representation.
|
|
|
|
:param w: A python integer.
|
|
:return: The integer in two's compliment.
|
|
"""
|
|
return w if w >= 0 else word_wc(-w)
|
|
def from_2c(w):
|
|
""" Turn two's compliment word into Python integer.
|
|
|
|
:param w: An integer in 32 bit twos compliment.
|
|
:return: The integer as a proper Python string.
|
|
"""
|
|
if (w >> 31) & 1 == 0:
|
|
return w
|
|
return -word_2c(w)
|
|
|
|
class Argument:
|
|
""" Class of arguments. Not used directly: It is used to store
|
|
intermediate information during the assembly process. """
|
|
|
|
def __init__(self, argtype, val, sign=False):
|
|
""" Initialize an argument.
|
|
|
|
:param argtype: Type of the argument (instance of ArgType).
|
|
:param val: Python integer value of the argument.
|
|
:param sign: If the argument should be treated as signed.
|
|
Otherwise, the integer will be interpreted in execution
|
|
as an unsigned integer.
|
|
"""
|
|
self.at = argtype
|
|
self.sign = sign
|
|
self.val = val
|
|
def __str__(self):
|
|
return f'({self.at}, {self.sign}, {self.val})'
|
|
def high_bits(self):
|
|
""" Returns the high bits that the argument would have
|
|
in the opcode. """
|
|
return int(self.sign) << 1 | (self.at == ArgType.REG)
|
|
def __call__(self):
|
|
l = 2 if self.val < 0x80 else None
|
|
return encode_pseudo_utf8(self.val, self.high_bits(), l)
|
|
|
|
class StringArgument(Argument):
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
def __call__(self):
|
|
b = bytes()
|
|
for v in self.val:
|
|
b = b + Argument(ArgType.IMM, int(v, base=16))()
|
|
return b
|
|
|
|
class LabelArgument(Argument):
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
def __call__(self):
|
|
return self.val
|
|
|
|
class TypecheckException(Exception):
|
|
""" Exception thrown when an argument to an instruction are of the
|
|
incorrect type. """
|
|
def __init__(self, got, argtype, sarg, opcode, i):
|
|
self.argtype = argtype
|
|
self.sarg = sarg
|
|
self.opcode = opcode
|
|
self.got = got
|
|
self.i = i
|
|
self.message = f'''\
|
|
opcode {self.opcode.name} has invalid value {self.sarg} ({self.got})
|
|
at {self.i} (expected {self.argtype})\
|
|
'''
|
|
|
|
class ArgType(Enum):
|
|
""" Class denoting the type of an argument to an instruction. """
|
|
|
|
IMM = 1
|
|
""" Immediate values are ones that must be numbers (positive or negative). """
|
|
|
|
REG = 2
|
|
""" Type of registers. """
|
|
|
|
VAL = 3
|
|
""" Type that denotes either immediate values or registers. """
|
|
|
|
DAT = 4
|
|
""" Type of data label. """
|
|
|
|
STR = 5
|
|
""" Type of a string of 32 bit integers. """
|
|
|
|
LAB = 6
|
|
""" Type of a label (.name). """
|
|
|
|
def is_number(t):
|
|
return t == ArgType.IMM or t == ArgType.REG
|
|
|
|
def gettype(s):
|
|
""" Parses the type of the argument represented as a string
|
|
and returns a tuple with the first the first element being
|
|
the type and the second element being the integer value of
|
|
the argument.
|
|
|
|
Valid parameters are:
|
|
|
|
* `r` followed by a nonnegative integer (register)
|
|
* `l` followed by a nonnegative integer (label)
|
|
* any integer (immediate value)
|
|
|
|
:param s: String representing the argument.
|
|
:return: The Argument object representing the argument.
|
|
:raises MalformedArgument:
|
|
"""
|
|
if type(s) is list:
|
|
return StringArgument(ArgType.STR, s)
|
|
elif s.isnumeric():
|
|
return Argument(ArgType.IMM, int(s))
|
|
elif s[0] == "-" and s[1:].isnumeric():
|
|
return Argument(ArgType.IMM, word_2c(int(s[1:])), True)
|
|
elif s[0] == 'r' and s[1:].isnumeric():
|
|
return Argument(ArgType.REG, int(s[1:]))
|
|
elif s[0] == 'd' and s[1:].isnumeric():
|
|
return Argument(ArgType.DAT, int(s[1:]))
|
|
elif s[0] == '.':
|
|
return LabelArgument(ArgType.LAB, s[:])
|
|
else:
|
|
raise MalformedArgument(s)
|
|
|
|
def typecheck(self, s, opcode, i):
|
|
""" Parses the type of the string and returns it if it fits
|
|
the type of the enum value.
|
|
|
|
:param s: String argument representing an argument.
|
|
:param opcode: Opcode of the argument. Used for debugging.
|
|
:param i: Argument number. Used for debugging.
|
|
:return: The Argument class containing the object.
|
|
:raises TypecheckException: """
|
|
t = ArgType.gettype(s)
|
|
if self == ArgType.VAL:
|
|
if t.at == ArgType.REG or t.at == ArgType.IMM:
|
|
return t
|
|
else:
|
|
raise TypecheckException(t.at, self, s, opcode, i)
|
|
elif t.at == self:
|
|
return t
|
|
else:
|
|
raise TypecheckException(t.at, self, s, opcode, i)
|
|
|
|
class OpcodeException(Exception):
|
|
pass
|
|
class TypecheckLenException(Exception):
|
|
""" Exception thrown when arguments to an instruction are of the
|
|
incorrect length. """
|
|
def __init__(self, opcode, insargs, argtypelen):
|
|
self.opcode = opcode
|
|
self.insargs = insargs
|
|
self.argtypelen = argtypelen
|
|
def __str__(self):
|
|
return f'''\
|
|
arguments {self.insargs} to opcode {self.opcode.name} not of length {self.argtypelen}\
|
|
'''
|
|
|
|
class Instruction(Enum):
|
|
""" Class of microcode instructions. The first number is the opcode
|
|
and the suceeding values are the types of each of the
|
|
arguments. The first argument is the opcode and the second
|
|
argument is what function is used to compile the instruction
|
|
(some instructions are actually versions of other instructions). """
|
|
NOP = 0, "_render_default"
|
|
PUSH = 1, "_render_default", ArgType.VAL
|
|
POP = 2, "_render_default", ArgType.REG
|
|
ADD = 3, "_render_default", ArgType.REG, ArgType.VAL, ArgType.VAL
|
|
MOV = "ADD", "_render_mov", ArgType.REG, ArgType.VAL
|
|
MUL = 4, "_render_default", ArgType.REG, ArgType.VAL, ArgType.VAL
|
|
DIV = 5, "_render_default", ArgType.REG, ArgType.VAL, ArgType.VAL
|
|
SDIV = "DIV", "_render_change_args", ArgType.REG, ArgType.VAL, ArgType.VAL
|
|
SYS = 6, "_render_default", ArgType.VAL
|
|
JL = 7, "_render_default", ArgType.LAB, ArgType.VAL, ArgType.VAL
|
|
JLS = "JL", "_render_change_args", ArgType.LAB, ArgType.VAL, ArgType.VAL
|
|
JLE = 8, "_render_default", ArgType.LAB, ArgType.VAL, ArgType.VAL
|
|
JLES = "JLE", "_render_change_args", ArgType.LAB, ArgType.VAL, ArgType.VAL
|
|
JE = 9, "_render_default", ArgType.LAB, ArgType.VAL, ArgType.VAL
|
|
J = "JE", "_render_j", ArgType.LAB
|
|
JNE = 10, "_render_default", ArgType.LAB, ArgType.VAL, ArgType.VAL
|
|
DB = 11, "_render_default", ArgType.DAT, ArgType.STR
|
|
|
|
def __int__(self):
|
|
""" Returns the opcode associated with the Instruction.
|
|
If it is a virtual instruction, it will resolve the string
|
|
name of the opcode and return its opcode. """
|
|
if type(self.opcode) is int:
|
|
return self.opcode
|
|
return int(Instruction[self.opcode])
|
|
|
|
def __init__(self, opcode, renderfun, *args):
|
|
""" Initialize an Instruction. Do not call this function: it is
|
|
used to make enum values. To add a new instruction, modify
|
|
the Instruction enum.
|
|
This function sometimes takes string arguments because
|
|
certain values may not be loaded until later.
|
|
|
|
:param opcode: Opcode of the instruction, or a string
|
|
containing the case-sensitive name of the instruction from
|
|
which this instruction derives from.
|
|
|
|
:param renderfun: a string with the name of a function
|
|
in the class that returns the instruction opcode.
|
|
|
|
:param *args: Type of each argument to the instruction.
|
|
The amount of arguments denotes the amount of instructions.
|
|
"""
|
|
|
|
if type(opcode) is int and (opcode > 0x7F or opcode < 0):
|
|
raise OpcodeException(opcode)
|
|
|
|
self.opcode = opcode
|
|
self.argtypes = args
|
|
self.render = getattr(self, renderfun)
|
|
|
|
def typecheck(self, sargs):
|
|
""" Pass arguments to the instruction and check if the
|
|
arguments are correct.
|
|
|
|
:param sargs: List of arguments to the instruction
|
|
as strings.
|
|
:return: List of arguments (as Argument objects).
|
|
:raises TypecheckLenException:
|
|
"""
|
|
rargs = []
|
|
if len(sargs) != len(self.argtypes):
|
|
raise TypecheckLenException(self, sargs,
|
|
len(self.argtypes))
|
|
for i in range(0, len(sargs)):
|
|
t = self.argtypes[i].typecheck(sargs[i], self, i)
|
|
rargs.append(t)
|
|
return rargs
|
|
|
|
def _render_mov(self, args):
|
|
args = [args[0], args[1], Argument(ArgType.IMM, 0)]
|
|
return Instruction[self.opcode].render(args)
|
|
|
|
def _render_j(self, args):
|
|
args = [args[0], Argument(ArgType.IMM, 0),
|
|
Argument(ArgType.IMM, 0)]
|
|
return Instruction[self.opcode].render(args)
|
|
|
|
def _render_change_args(self, args):
|
|
for i in range(0,len(args)):
|
|
if ArgType.is_number(args[i].at):
|
|
args[i].sign = True
|
|
return Instruction[self.opcode].render(args)
|
|
|
|
def _render_default(self, args):
|
|
comps = [bytes([self.opcode])]
|
|
for a in args:
|
|
comps.append(a())
|
|
comps.append(b'\x00')
|
|
return comps
|
|
|
|
encoding_types = {
|
|
# start mask B
|
|
2: (0x7F, 0xC0, 7),
|
|
3: (0xFFF, 0xE0, 12),
|
|
4: (0x1FFFF, 0xF0, 17),
|
|
5: (0x3FFFFF, 0xF8, 22),
|
|
6: (0x7FFFFFF, 0xFC, 27),
|
|
7: (0xFFFFFFFF, 0xFE, 32),
|
|
# B : Total number of bits excluding high bits
|
|
}
|
|
|
|
def pseudo_utf8_len(n):
|
|
for k in sorted(encoding_types):
|
|
if n <= encoding_types[k][0]:
|
|
return k
|
|
return None
|
|
|
|
class InvalidNumberException(Exception):
|
|
pass
|
|
class InvalidLengthException(Exception):
|
|
pass
|
|
def encode_pseudo_utf8(n, high_bits, to):
|
|
if n < 0:
|
|
raise InvalidNumberException(n)
|
|
if to is None or to < 0:
|
|
to = pseudo_utf8_len(n)
|
|
if to is None:
|
|
raise InvalidNumberException(n)
|
|
if to > 8 or to < 0:
|
|
raise InvalidLengthException(to)
|
|
elif to == 1:
|
|
if n < 0x80:
|
|
return bytes([n])
|
|
else:
|
|
raise InvalidNumberException(n,to)
|
|
|
|
(maxval, start_byte, n_tot) = encoding_types[to]
|
|
if n > maxval or high_bits > 15:
|
|
raise InvalidNumberException(n, high_bits)
|
|
n = n | (high_bits << n_tot)
|
|
all_bytes = []
|
|
for i in range(0, to - 1):
|
|
all_bytes.append(0x80 | (n & 0x3F))
|
|
n >>= 6
|
|
all_bytes.append(start_byte | n)
|
|
return bytes(reversed(all_bytes))
|
|
|
|
class RangeCheckException(Exception):
|
|
pass
|
|
class Line:
|
|
def __init__(self, ins, args):
|
|
self.ins = ins
|
|
self.args = args
|
|
|
|
def check_line(self, reglen, datlen):
|
|
for a in self.args:
|
|
if a.at == ArgType.REG:
|
|
if a.val < 0 or a.val >= reglen:
|
|
raise RangeCheckException(a.at,
|
|
a.val,
|
|
reglen)
|
|
elif a.at == ArgType.DAT:
|
|
if a.val < 0 or a.val >= datlen:
|
|
raise RangeCheckException(a.at,
|
|
a.val,
|
|
reglen)
|
|
def __call__(self):
|
|
return self.ins.render(self.args)
|
|
|
|
class InstructionNotFoundException(Exception):
|
|
pass
|
|
|
|
def _term_sep(s):
|
|
""" Split up the arguments of an instruction.
|
|
OP arg1 arg2 [data,data,data,...]
|
|
"""
|
|
|
|
s = s.strip()
|
|
s_data = s.split('[')
|
|
if len(s_data) == 2:
|
|
return s_data[0].split() + [s_data[1].rstrip('] \t\n\r\v').split(',')]
|
|
else:
|
|
return s.split()
|
|
|
|
class Program:
|
|
def _asm_push_line(self, ins, args):
|
|
l = Line(ins, args)
|
|
l.check_line(self.reglen, self.datlen)
|
|
self.asm.append(l)
|
|
|
|
def parse_asm_line(self, line):
|
|
""" Parse and add a single assembly line to the program.
|
|
:param line: String containing the line.
|
|
:raises InstructionNotFoundException:
|
|
"""
|
|
line = _term_sep(line)
|
|
line[0] = line[0].casefold()
|
|
if line[0][0] == '.':
|
|
self.asm.append(line[0])
|
|
return None
|
|
|
|
try:
|
|
ins = Instruction[line[0].upper()]
|
|
except Exception as e:
|
|
raise InstructionNotFoundException(line[0])
|
|
|
|
args_w_type = ins.typecheck(line[1:])
|
|
self._asm_push_line(ins, args_w_type)
|
|
|
|
def parse_lines(self, lines):
|
|
""" Parse a list of lines. See parse_asm_line.
|
|
:param lines: List of assembly lines.
|
|
"""
|
|
for l in lines:
|
|
self.parse_asm_line(l)
|
|
|
|
def __call__(self):
|
|
""" Generate bytecode. """
|
|
|
|
# Labels may jump forward in the program, which means
|
|
# multiple passes are required to properly calculate
|
|
# jump locations.
|
|
# This algorithm makes every jump destination the same
|
|
# width in each operation, and calculates the smallest
|
|
# width that will allow all labels to jump to any location
|
|
# in the program.
|
|
# The algorithm calculates the length of the program
|
|
# with all jump arguments given a length of 0. Each label
|
|
# is noted with its offset in the program (with all jump
|
|
# arguments given zero length) and the amount of jump arguments
|
|
# that occur prior to the label.
|
|
# When the code is emitted, the label length is properly
|
|
# calculated with the length of each label.
|
|
# This method is not optimal, but will work well for small
|
|
# programs.
|
|
|
|
ins = []
|
|
curlen = 0
|
|
|
|
# This dictonary contains a tuple (len, refs)
|
|
# that denotes that a label points to len + lablen*refs
|
|
# where lablen is a to-be-determined number.
|
|
labels = {}
|
|
labelrefs = 0
|
|
for line in self.asm:
|
|
if type(line) is str:
|
|
labels[line] = (curlen, labelrefs)
|
|
continue
|
|
|
|
next_ins = line()
|
|
for v in next_ins:
|
|
if type(v) is str:
|
|
labelrefs += 1
|
|
else:
|
|
curlen += len(v)
|
|
ins.append(next_ins)
|
|
|
|
# Calculate a label length, such that the entire program
|
|
# can be contained in this length.
|
|
for i in encoding_types:
|
|
if curlen + labelrefs*i < encoding_types[i][0]:
|
|
lablen = i
|
|
break
|
|
|
|
# Emit bytecode.
|
|
b = bytes()
|
|
for line in ins:
|
|
for arg in line:
|
|
if type(arg) is str:
|
|
off = labels[arg][0] + labels[arg][1]*lablen
|
|
arg = encode_pseudo_utf8(off, 0, lablen)
|
|
b = b + arg
|
|
assert len(b) < encoding_types[lablen][0]
|
|
return b
|
|
|
|
def __init__(self, reglen=16, datlen=16):
|
|
self.asm = []
|
|
self.reglen = reglen
|
|
self.datlen = datlen
|
|
|