add pseudoinstructions that compile to regular instructions

This commit is contained in:
Peter McGoron 2023-02-12 18:06:08 +00:00
parent b425e26ea3
commit 7b3eaf9b4f
5 changed files with 146 additions and 103 deletions

View File

@ -17,6 +17,16 @@ def from_2c(w):
return w return w
return -word_2c(w) return -word_2c(w)
class Argument:
def __init__(self, argtype, val, sign=False):
self.at = argtype
self.sign = sign
self.val = val
def __str__(self):
return f'({self.at}, {self.sign}, {self.val})'
def high_bits(self):
return int(self.sign) << 1 | (self.at == ArgType.REG)
class ArgType(Enum): class ArgType(Enum):
""" Class denoting the type of an argument to an instruction. """ """ Class denoting the type of an argument to an instruction. """
@ -39,13 +49,13 @@ class ArgType(Enum):
the argument. the argument.
""" """
if s.isnumeric(): if s.isnumeric():
return (ArgType.IMM, int(s)) return Argument(ArgType.IMM, int(s))
elif s[0] == "-" and s[1:].isnumeric(): elif s[0] == "-" and s[1:].isnumeric():
return (ArgType.IMM, word_2c(int(s[1:]))) return Argument(ArgType.IMM, word_2c(int(s[1:])))
elif s[0] == 'r' and s[1:].isnumeric(): elif s[0] == 'r' and s[1:].isnumeric():
return (ArgType.REG, int(s[1:])) return Argument(ArgType.REG, int(s[1:]))
elif s[0] == 'l' and s[1:].isnumeric(): elif s[0] == 'l' and s[1:].isnumeric():
return (ArgType.LAB, int(s[1:])) return Argument(ArgType.LAB, int(s[1:]))
else: else:
raise MalformedArgument(s) raise MalformedArgument(s)
@ -54,11 +64,11 @@ class ArgType(Enum):
the type of the enum value. """ the type of the enum value. """
t = ArgType.gettype(s) t = ArgType.gettype(s)
if self == ArgType.VAL: if self == ArgType.VAL:
if t[0] == ArgType.REG or t[0] == ArgType.IMM: if t.at == ArgType.REG or t.at == ArgType.IMM:
return t return t
else: else:
return None return None
elif t[0] == self: elif t.at == self:
return t return t
else: else:
return None return None
@ -84,29 +94,38 @@ class TypecheckException(Exception):
class Instruction(Enum): class Instruction(Enum):
""" Class of microcode instructions. The first number is the opcode """ Class of microcode instructions. The first number is the opcode
and the suceeding values are the types of each of the and the suceeding values are the types of each of the
arguments. """ arguments. The first argument is the opcode and the second
NOP = 0 argument is if the argument is a signed variant of another
PUSH = 1, ArgType.VAL opcode. """
POP = 2, ArgType.REG NOP = 0, False
ADD = 3, ArgType.REG, ArgType.VAL, ArgType.VAL PUSH = 1, False, ArgType.VAL
MUL = 4, ArgType.REG, ArgType.VAL, ArgType.VAL POP = 2, False, ArgType.REG
DIV = 5, ArgType.REG, ArgType.VAL, ArgType.VAL ADD = 3, False, ArgType.REG, ArgType.VAL, ArgType.VAL
SDIV = 6, ArgType.REG, ArgType.VAL, ArgType.VAL MUL = 4, False, ArgType.REG, ArgType.VAL, ArgType.VAL
SYS = 7, ArgType.VAL DIV = 5, False, ArgType.REG, ArgType.VAL, ArgType.VAL
CLB = 8, ArgType.LAB SDIV = "DIV", True, ArgType.REG, ArgType.VAL, ArgType.VAL
JL = 9, ArgType.LAB, ArgType.VAL, ArgType.VAL SYS = 6, False, ArgType.VAL
JLE = 10, ArgType.LAB, ArgType.VAL, ArgType.VAL CLB = 7, False, ArgType.LAB
JG = 11, ArgType.LAB, ArgType.VAL, ArgType.VAL JL = 8, False, ArgType.LAB, ArgType.VAL, ArgType.VAL
JGE = 12, ArgType.LAB, ArgType.VAL, ArgType.VAL JLS = "JL", True, ArgType.LAB, ArgType.VAL, ArgType.VAL
JE = 13, ArgType.LAB, ArgType.VAL, ArgType.VAL JLE = 9, False, ArgType.LAB, ArgType.VAL, ArgType.VAL
JNE = 13, ArgType.LAB, ArgType.VAL, ArgType.VAL JLES = "JLE", True, ArgType.LAB, ArgType.VAL, ArgType.VAL
JE = 10, False, ArgType.LAB, ArgType.VAL, ArgType.VAL
JNE = 11, False, ArgType.LAB, ArgType.VAL, ArgType.VAL
def __init__(self, opcode, *args): def __int__(self):
if opcode > 0x7F or opcode < 0: return self.opcode
def __init__(self, opcode, signed_instruction, *args):
if type(opcode) is int and (opcode > 0x7F or opcode < 0):
raise OpcodeException(opcode) raise OpcodeException(opcode)
self.opcode = opcode self.opcode = opcode
self.argtypes = args self.argtypes = args
if signed_instruction:
self.render = self._render_change_args
else:
self.render = self._default_render
def typecheck(self, sargs): def typecheck(self, sargs):
""" Pass arguments to the instruction and check if the """ Pass arguments to the instruction and check if the
@ -124,15 +143,31 @@ class Instruction(Enum):
rargs.append(t) rargs.append(t)
return rargs return rargs
# The following will be called using OPCODE.render() instead of being
# called directly.
def _render_change_args(self, args):
for i in range(0,len(args)):
if args[i].at != ArgType.LAB:
args[i].sign = True
return Instruction[self.opcode].render(args)
def _default_render(self, args):
b = bytes([self.opcode])
for a in args:
l = 2 if a.val < 0x80 else None
bex = encode_pseudo_utf8(a.val, a.high_bits(), l)
b = b + bex
return b + bytes([0])
encoding_types = { encoding_types = {
# start mask A B # start mask B
2: (0x7F, 0xC0, 7), 2: (0x7F, 0xC0, 7),
3: (0xFFF, 0xE0, 12), 3: (0xFFF, 0xE0, 12),
4: (0x1FFFF, 0xF0, 16), 4: (0x1FFFF, 0xF0, 17),
5: (0x3FFFFF, 0xF8, 21), 5: (0x3FFFFF, 0xF8, 22),
6: (0x7FFFFFF, 0xFC, 26), 6: (0x7FFFFFF, 0xFC, 27),
7: (0xFFFFFFFF, 0xFE, 36), 7: (0xFFFFFFFF, 0xFE, 32),
# A : number of bits in start byte
# B : Total number of bits excluding high bits # B : Total number of bits excluding high bits
} }
@ -172,32 +207,25 @@ def encode_pseudo_utf8(n, high_bits, to):
class RangeCheckException(Exception): class RangeCheckException(Exception):
pass pass
class Line: class Line:
def __init__(self, opcode, args): def __init__(self, ins, args):
self.opcode = opcode self.ins = ins
self.args = args self.args = args
def check_line(self, lablen, reglen): def check_line(self, lablen, reglen):
for a in self.args: for a in self.args:
if a[0] == ArgType.REG: if a.at == ArgType.REG:
if a[1] < 0 or a[1] >= reglen: if a.val < 0 or a.val >= reglen:
raise RangeCheckException(a[0], raise RangeCheckException(a.at,
a[1], a.val,
reglen) reglen)
elif a[0] == ArgType.LAB: elif a.at == ArgType.LAB:
if a[1] < 0 or a[1] >= lablen: if a.val < 0 or a.val >= lablen:
raise RangeCheckException(a[0], raise RangeCheckException(a.at,
a[1], a.val,
reglen) reglen)
def __call__(self): def __call__(self):
b = bytes([self.opcode]) return self.ins.render(self.args)
for a in self.args:
l = 2 if a[1] < 0x80 else None
if a[0] == ArgType.REG:
b = b + encode_pseudo_utf8(a[1],1,l)
else:
b = b + encode_pseudo_utf8(a[1],0,l)
return b + bytes([0])
class InstructionNotFoundException(Exception): class InstructionNotFoundException(Exception):
pass pass
@ -212,12 +240,12 @@ class Program:
line[0] = line[0].casefold() line[0] = line[0].casefold()
try: try:
# TODO: is there no better way to do this in Python? # TODO: is there no better way to do this in Python?
ins = getattr(Instruction, line[0].upper()) ins = Instruction[line[0].upper()]
except Exception as e: except Exception as e:
raise InstructionNotFoundException(line[0]) raise InstructionNotFoundException(line[0])
args_w_type = ins.typecheck(line[1:]) args_w_type = ins.typecheck(line[1:])
self.asm_push_line(ins.opcode, args_w_type) self.asm_push_line(ins, args_w_type)
def parse_lines(self, lines): def parse_lines(self, lines):
for l in lines: for l in lines:

View File

@ -22,10 +22,11 @@ class RunRet(Enum):
STOP = 2 STOP = 2
STACK_OVERFLOW = 3 STACK_OVERFLOW = 3
STACK_UNDERFLOW = 4 STACK_UNDERFLOW = 4
RUN_LABEL_OVERFLOW = 5 LABEL_OVERFLOW = 5
REGISTER_OVERFLOW = 6 REGISTER_OVERFLOW = 6
UNKNOWN_OPCODE = 7 UNKNOWN_OPCODE = 7
DIVIDE_BY_ZERO = 8 DIVIDE_BY_ZERO = 8
HIGH_BIT_MALFORMED = 9
def is_halt(self): def is_halt(self):
return not (self == RunRet.CONTINUE or self == RunRet.SYSCALL) return not (self == RunRet.CONTINUE or self == RunRet.SYSCALL)

View File

@ -50,7 +50,7 @@ class PushTest(unittest.TestCase):
p = Program(reglen=0x8000000) p = Program(reglen=0x8000000)
p.parse_asm_line("PUSH r134217727") p.parse_asm_line("PUSH r134217727")
b = p() b = p()
self.assertEqual(b, b'\x01\xFC\x87\xbf\xbf\xbf\xbf\x00') self.assertEqual(b, b'\x01\xFC\x8f\xbf\xbf\xbf\xbf\x00')
def test_compile_push(self): def test_compile_push(self):
p = Program() p = Program()
@ -318,7 +318,7 @@ class LabelTest(unittest.TestCase):
"CLB l0", "CLB l0",
"add r0 r0 -1", "add r0 r0 -1",
"add r1 r1 1", "add r1 r1 1",
"jg l0 r0 0" "jl l0 0 r0"
]) ])
ex = ffi.Environment(p()) ex = ffi.Environment(p())
self.assertEqual(ex(), ffi.RunRet.STOP) self.assertEqual(ex(), ffi.RunRet.STOP)

View File

@ -35,13 +35,10 @@ static const struct {
defop(ADD, 3, TYPE_REG, TYPE_VAL, TYPE_VAL), defop(ADD, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),
defop(MUL, 3, TYPE_REG, TYPE_VAL, TYPE_VAL), defop(MUL, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),
defop(DIV, 3, TYPE_REG, TYPE_VAL, TYPE_VAL), defop(DIV, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),
defop(SDIV, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),
defop(SYS, 1, TYPE_VAL, TYPE_NONE, TYPE_NONE), defop(SYS, 1, TYPE_VAL, TYPE_NONE, TYPE_NONE),
defop(CLB, 1, TYPE_LAB, TYPE_NONE, TYPE_NONE), defop(CLB, 1, TYPE_LAB, TYPE_NONE, TYPE_NONE),
defop(JL, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL), defop(JL, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL),
defop(JLE, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL), defop(JLE, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL),
defop(JG, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL),
defop(JGE, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL),
defop(JE, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL), defop(JE, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL),
defop(JNE, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL) defop(JNE, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL)
}; };
@ -378,13 +375,23 @@ static int typecheck(struct creole_env *env, int val,
} }
} }
static enum creole_word_flag get_type_from_high_bit(unsigned high_bits)
{
if (high_bits & 1) {
return CREOLE_REGISTER;
} else {
return CREOLE_IMMEDIATE;
}
}
static enum creole_compiler_ret typecheck_ins(struct creole_env *env, static enum creole_compiler_ret typecheck_ins(struct creole_env *env,
struct creole_ins *ins) struct creole_ins *ins)
{ {
unsigned i; unsigned i;
for (i = 0; i < opcode_info[ins->opcode].arglen; i++) { for (i = 0; i < opcode_info[ins->opcode].arglen; i++) {
if (!typecheck(env, ins->w[i], ins->w_flags[i], if (!typecheck(env, ins->w[i],
get_type_from_high_bit(ins->w_flags[i]),
opcode_info[ins->opcode].argtype[i])) opcode_info[ins->opcode].argtype[i]))
return CREOLE_TYPE_ERROR; return CREOLE_TYPE_ERROR;
} }
@ -479,7 +486,7 @@ static enum creole_run_ret read_val(struct creole_env *env,
unsigned arg, unsigned arg,
creole_word *w) creole_word *w)
{ {
if (ins->w_flags[arg] == CREOLE_REGISTER) { if (get_type_from_high_bit(ins->w_flags[arg]) == CREOLE_REGISTER) {
return creole_reg_read(env, ins->w[arg], w); return creole_reg_read(env, ins->w[arg], w);
} else { } else {
*w = ins->w[arg]; *w = ins->w[arg];
@ -512,12 +519,43 @@ check_label(struct creole_env *env, creole_word label)
: CREOLE_RUN_LABEL_OVERFLOW; : CREOLE_RUN_LABEL_OVERFLOW;
} }
enum argument_signed {
ALL_UNSIGNED = 0, // 0b00
FIRST_SIGNED = 2, // 0b10
SECOND_SIGNED = 1, // 0b01
ALL_SIGNED = 3 // 0b11
};
static enum argument_signed check_sign_bits(unsigned flags1, unsigned flags2)
{
return (flags1 & 0x2) | ((flags2 & 0x2) >> 1);
}
#define check(fun) do { \ #define check(fun) do { \
rcode = fun; \ rcode = fun; \
if (rcode != CREOLE_STEP_CONTINUE) \ if (rcode != CREOLE_STEP_CONTINUE) \
return rcode; \ return rcode; \
} while(0) } while(0)
#define chk_sign_op(OPER) do { \
switch (check_sign_bits(ins->w_flags[1], ins->w_flags[2])) { \
case ALL_UNSIGNED: \
a1 = a1 OPER a2; \
break; \
case FIRST_SIGNED: \
a1 = (creole_signed)a1 OPER a2; \
break; \
case SECOND_SIGNED: \
a1 = a1 OPER (creole_signed)a2; \
break; \
case ALL_SIGNED: \
a1 = (creole_signed) a1 OPER (creole_signed) a2; \
break; \
default: \
return CREOLE_STEP_HIGH_BIT_MALFORMED; \
} \
} while(0)
enum creole_run_ret creole_step(struct creole_env *env, creole_word *sc) enum creole_run_ret creole_step(struct creole_env *env, creole_word *sc)
{ {
struct creole_ins *ins = env->prg + env->prgptr; struct creole_ins *ins = env->prg + env->prgptr;
@ -550,17 +588,11 @@ enum creole_run_ret creole_step(struct creole_env *env, creole_word *sc)
case CREOLE_DIV: case CREOLE_DIV:
check(read_val(env, ins, 1, &a1)); check(read_val(env, ins, 1, &a1));
check(read_val(env, ins, 2, &a2)); check(read_val(env, ins, 2, &a2));
if (a2 == 0) if (a2 == 0) {
return CREOLE_DIV_BY_ZERO; return CREOLE_DIV_BY_ZERO;
check(creole_reg_write(env, ins->w[0], a1 / a2)); }
break; chk_sign_op(/);
case CREOLE_SDIV: check(creole_reg_write(env, ins->w[0], a1));
check(read_val(env, ins, 1, &a1));
check(read_val(env, ins, 2, &a2));
if (a2 == 0)
return CREOLE_DIV_BY_ZERO;
check(creole_reg_write(env, ins->w[0],
(creole_signed)a1 / (creole_signed)a2));
break; break;
case CREOLE_SYS: case CREOLE_SYS:
check(read_val(env, ins, 0, sc)); check(read_val(env, ins, 0, sc));
@ -570,7 +602,8 @@ enum creole_run_ret creole_step(struct creole_env *env, creole_word *sc)
check(read_val(env, ins, 1, &a1)); check(read_val(env, ins, 1, &a1));
check(read_val(env, ins, 2, &a2)); check(read_val(env, ins, 2, &a2));
check(check_label(env, ins->w[0])); check(check_label(env, ins->w[0]));
if (a1 < a2) { chk_sign_op(<);
if (a1) {
env->prgptr = env->lab[ins->w[0]]; env->prgptr = env->lab[ins->w[0]];
increase_pointer = 0; increase_pointer = 0;
} }
@ -579,25 +612,8 @@ enum creole_run_ret creole_step(struct creole_env *env, creole_word *sc)
check(read_val(env, ins, 1, &a1)); check(read_val(env, ins, 1, &a1));
check(read_val(env, ins, 2, &a2)); check(read_val(env, ins, 2, &a2));
check(check_label(env, ins->w[0])); check(check_label(env, ins->w[0]));
if (a1 <= a2) { chk_sign_op(<=);
env->prgptr = env->lab[ins->w[0]]; if (a1) {
increase_pointer = 0;
}
break;
case CREOLE_JG:
check(read_val(env, ins, 1, &a1));
check(read_val(env, ins, 2, &a2));
check(check_label(env, ins->w[0]));
if (a1 > a2) {
env->prgptr = env->lab[ins->w[0]];
increase_pointer = 0;
}
break;
case CREOLE_JGE:
check(read_val(env, ins, 1, &a1));
check(read_val(env, ins, 2, &a2));
check(check_label(env, ins->w[0]));
if (a1 >= a2) {
env->prgptr = env->lab[ins->w[0]]; env->prgptr = env->lab[ins->w[0]];
increase_pointer = 0; increase_pointer = 0;
} }

View File

@ -17,21 +17,18 @@ typedef CREOLE_WORD creole_word;
typedef CREOLE_SIGNED_WORD creole_signed; typedef CREOLE_SIGNED_WORD creole_signed;
enum creole_opcode { enum creole_opcode {
CREOLE_NOOP = 0, CREOLE_NOOP,
CREOLE_PUSH = 1, CREOLE_PUSH,
CREOLE_POP = 2, CREOLE_POP,
CREOLE_ADD = 3, CREOLE_ADD,
CREOLE_MUL = 4, CREOLE_MUL,
CREOLE_DIV = 5, CREOLE_DIV,
CREOLE_SDIV = 6, CREOLE_SYS,
CREOLE_SYS = 7, CREOLE_CLB,
CREOLE_CLB = 8, CREOLE_JL,
CREOLE_JL = 9, CREOLE_JLE,
CREOLE_JLE = 10, CREOLE_JE,
CREOLE_JG = 11, CREOLE_JNE,
CREOLE_JGE = 12,
CREOLE_JE = 13,
CREOLE_JNE = 14,
CREOLE_OPCODE_LEN CREOLE_OPCODE_LEN
}; };
@ -66,6 +63,7 @@ enum creole_run_ret {
CREOLE_REGISTER_OVERFLOW, CREOLE_REGISTER_OVERFLOW,
CREOLE_STEP_UNKNOWN_OPCODE, CREOLE_STEP_UNKNOWN_OPCODE,
CREOLE_DIV_BY_ZERO, CREOLE_DIV_BY_ZERO,
CREOLE_STEP_HIGH_BIT_MALFORMED,
CREOLE_RUN_RET_LEN CREOLE_RUN_RET_LEN
}; };