export creole_decode; add db test

This commit is contained in:
Peter McGoron 2023-02-25 21:01:03 +00:00
parent b7eed7d44e
commit 09d636c02c
7 changed files with 75 additions and 200 deletions

View File

@ -6,7 +6,7 @@ asm/libcreole.so: creole.c creole.h
test_asm: asm/libcreole.so test_asm: asm/libcreole.so
cd asm && python3 test.py -f cd asm && python3 test.py -f
c_test/encode_decode: c_test/encode_decode.c creole.c creole.h c_test/encode_decode: c_test/encode_decode.c creole.c creole.h
$(CC) c_test/encode_decode.c -Wall -pedantic -std=c89 -g -fopenmp -o c_test/encode_decode $(CC) creole.c c_test/encode_decode.c -Wall -pedantic -std=c89 -g -fopenmp -o c_test/encode_decode
# c_test/encode_decode # c_test/encode_decode
c_test/creole: c_test/creole.c creole.c creole.h c_test/greatest.h c_test/creole: c_test/creole.c creole.c creole.h c_test/greatest.h
$(CC) -g c_test/creole.c -Wall -pedantic -std=c89 -o c_test/creole $(CC) -g c_test/creole.c -Wall -pedantic -std=c89 -o c_test/creole

View File

@ -1,9 +1,15 @@
Creole is a bytecode designed for simple implementations. Creole is a bytecode designed for microcontrollers. It's C source file
is less than 1000 lines long and does not depend on the C standard
library.
## Bytecode Format ## Bytecode Format
Each creole line consists of pseudo-UTF-8 characters. The first byte The syntax of creole instructions are
is an unsigned number between 0 and 127 (the high bit is clear). Each
[1 byte opcode][2 or more byte instruction]*[1 byte all zero]
Each creole instruction consists of pseudo-UTF-8 characters. The first
byte is an unsigned number between 0 and 127 (the high bit is clear). Each
suceeding pseudo-UTF-8 character is encoded as follows: suceeding pseudo-UTF-8 character is encoded as follows:
* `110HHHHx 10xxxxxx` * `110HHHHx 10xxxxxx`
@ -13,19 +19,20 @@ suceeding pseudo-UTF-8 character is encoded as follows:
* `1111110H 10HHHxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx` * `1111110H 10HHHxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx`
* `11111110 10HHHHxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx` * `11111110 10HHHHxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx`
The first four bytes determine the type. The LSB high bit determines The first four bits determine the type. The LSB high bit determines
if the encoded value is a register (`0001`) or immediate (`0010`). if the encoded value is a register (`0001`) or immediate (`00X0`).
The second bit from LSB determines if the value should be treated The second bit from LSB determines if the value should be treated
as a signed 32 bit two's compliment number (`001X`) or should be as a signed 32 bit two's compliment number (`001X`) or should be
treated as an unsigned 32 bit number (`000X`). treated as an unsigned 32 bit number (`000X`). All other values for
the high bits are reserved.
All other values are reserved. Overlong values are allowed, and for some The rest of the bits encode a number that is up to 32 bits long.
argument values they are necessary. All lines are terminated by a byte Overlong encodings are accepted and sometimes used.
of all zeros.
## Assembler ## Assembler
The macro assembler is Python (see the asm directory). The macro assembler is Python (see the asm directory). The macro
assembler supports virtual instructions and jumps with named labels.
## Design Philsophy ## Design Philsophy

View File

@ -44,6 +44,11 @@ class RunRet(Enum):
def is_halt(self): def is_halt(self):
return not (self == RunRet.CONTINUE or self == RunRet.SYSCALL) return not (self == RunRet.CONTINUE or self == RunRet.SYSCALL)
class CWord(Structure):
_fields_ = [("len", c_int),
("high_bits", c_int),
("word", c_uint)]
class CReader(Structure): class CReader(Structure):
_fields_ = [("p", POINTER(c_ubyte)), _fields_ = [("p", POINTER(c_ubyte)),
("left", c_size_t)] ("left", c_size_t)]
@ -78,6 +83,8 @@ class InvalidSyscallError(Exception):
class CompileError(Exception): class CompileError(Exception):
def __init__(self, r): def __init__(self, r):
self.r = r self.r = r
class DataOverflowError(Exception):
pass
class Environment: class Environment:
def getreg(self, reg, signed=False): def getreg(self, reg, signed=False):
@ -106,6 +113,24 @@ class Environment:
return creole.from_2c(self.cenv.stk[stk]) return creole.from_2c(self.cenv.stk[stk])
else: else:
return self.cenv.stk[stk] return self.cenv.stk[stk]
def getdat(self, n):
if n >= self.cenv.datlen or n < 0:
raise DataOverflowError(n)
rdr = CReader()
rdr.p = self.cenv.dats[n]
# Python does not allow for direct pointer arithmetic
rdr_p_v = addressof(rdr.p.contents)
r_start_p_v = addressof(self.cenv.r_start.p)
rdr.left = self.cenv.r_start.left - (rdr_p_v - r_start_p_v)
l = []
w = CWord()
while dll.creole_decode(byref(rdr), byref(w)) == 1:
if w.word == 0 and w.len == 1:
break
l.append(w.word)
return l
def pop(self): def pop(self):
if stk == 0: if stk == 0:

View File

@ -438,6 +438,16 @@ class DataTest(unittest.TestCase):
p = Program() p = Program()
p.parse_asm_line("db d0 [4d2,1234,0,5]") p.parse_asm_line("db d0 [4d2,1234,0,5]")
self.assertEqual(p(), b'\x0b\xc0\x80\xe0\x93\x92\xf0\x81\x88\xb4\xc0\x80\xc0\x85\x00') self.assertEqual(p(), b'\x0b\xc0\x80\xe0\x93\x92\xf0\x81\x88\xb4\xc0\x80\xc0\x85\x00')
def test_alloc_multiple(self):
p = Program()
p.parse_lines([
"db d0 [1,2,3,4]",
"db d1 [10,11,12,13]"
])
ex = ffi.Environment(p())
self.assertEqual(ex(), ffi.RunRet.STOP)
self.assertEqual(ex.getdat(0), [1,2,3,4])
self.assertEqual(ex.getdat(1), [0x10,0x11,0x12,0x13])
class SCEnv(ffi.Environment): class SCEnv(ffi.Environment):
def syscall(self, s): def syscall(self, s):

View File

@ -11,188 +11,17 @@ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#include <string.h> #include <string.h>
#include <assert.h> #include <assert.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include "../creole.c" #include "../creole.h"
#if 0
struct seq {
creole_word max;
unsigned encode_to;
unsigned high_bits;
unsigned char minbuf[7];
unsigned char maxbuf[7];
};
void bprint(unsigned char c) {
int i;
for (i = 0; i < 8; i++) {
printf("%u", (c >> (7 - i)) & 1);
}
}
void bprintb(unsigned char *b, int len) {
while (len-- > 0) {
bprint(*b++);
printf(" ");
}
}
static void encode_byte_seq(struct seq *s) {
creole_word i = 0;
int j;
unsigned char buf[7];
for (;;) {
assert(creole_encode(i, s->encode_to, s->high_bits,
buf) == 1);
if (memcmp(s->minbuf, buf, s->encode_to) != 0) {
printf("0x%X ", i);
bprintb(s->minbuf, s->encode_to);
printf("|");
bprintb(buf, s->encode_to);
printf("\n");
abort();
}
if (i == s->max)
break;
i++;
for (j = s->encode_to - 1; j > 0; j--) {
if (s->minbuf[j] == 0xBF) {
s->minbuf[j] = 0x80;
} else {
s->minbuf[j]++;
break;
}
}
if (j == 0)
s->minbuf[0]++;
}
assert(memcmp(s->maxbuf, s->minbuf, s->encode_to) == 0);
}
static void encode_1(void) {
struct seq s;
s.max = 0x7F;
s.encode_to = 1;
s.high_bits = 0;
s.minbuf[0] = 0x00;
s.maxbuf[0] = 0x7F;
encode_byte_seq(&s);
}
static void encode_2(unsigned high_bits) {
struct seq s;
s.high_bits = high_bits;
memset(s.maxbuf, 0xBF, sizeof(s.maxbuf));
memset(s.minbuf, 0x80, sizeof(s.minbuf));
s.max = 0x7F;
s.encode_to = 2;
s.maxbuf[0] = s.minbuf[0] = 0xC0 | (high_bits << 1);
s.maxbuf[0] = 0xC1 | (high_bits << 1);
s.minbuf[1] = 0x80;
encode_byte_seq(&s);
}
static void encode_3(unsigned high_bits) {
struct seq s;
s.high_bits = high_bits;
memset(s.maxbuf, 0xBF, sizeof(s.maxbuf));
memset(s.minbuf, 0x80, sizeof(s.minbuf));
s.max = 0xFFF;
s.encode_to = 3;
s.minbuf[0] = 0xE0 | high_bits;
s.maxbuf[0] = 0xE0 | high_bits;
s.minbuf[1] = 0x80;
encode_byte_seq(&s);
}
static void encode_4(unsigned high_bits) {
struct seq s;
s.high_bits = high_bits;
memset(s.maxbuf, 0xBF, sizeof(s.maxbuf));
memset(s.minbuf, 0x80, sizeof(s.minbuf));
s.max = 0x1FFFF;
s.encode_to = 4;
s.maxbuf[0] = s.minbuf[0] = 0xF0 | (high_bits >> 1);
s.minbuf[1] = 0x80 | ((high_bits & 0x1) << 5);
s.maxbuf[1] = 0x9F | ((high_bits & 0x1) << 5);
encode_byte_seq(&s);
}
static void encode_5(unsigned high_bits) {
struct seq s;
s.high_bits = high_bits;
memset(s.maxbuf, 0xBF, sizeof(s.maxbuf));
memset(s.minbuf, 0x80, sizeof(s.minbuf));
s.max = 0x3FFFFF;
s.encode_to = 5;
s.maxbuf[0] = s.minbuf[0] = 0xF8 | (high_bits >> 2);
s.minbuf[1] = 0x80 | ((high_bits & 0x3) << 4);
s.maxbuf[1] = 0x8F | ((high_bits & 0x3) << 4);
encode_byte_seq(&s);
}
static void encode_6(unsigned high_bits) {
struct seq s;
s.high_bits = high_bits;
memset(s.maxbuf, 0xBF, sizeof(s.maxbuf));
memset(s.minbuf, 0x80, sizeof(s.minbuf));
s.max = 0x7FFFFFF;
s.encode_to = 6;
s.maxbuf[0] = s.minbuf[0] = 0xFC | (high_bits >> 3);
s.minbuf[1] = 0x80 | ((high_bits & 0x7) << 3);
s.maxbuf[1] = 0x87 | ((high_bits & 0x7) << 3);
encode_byte_seq(&s);
}
static void encode_7(unsigned high_bits) {
struct seq s;
s.high_bits = high_bits;
memset(s.maxbuf, 0xBF, sizeof(s.maxbuf));
memset(s.minbuf, 0x80, sizeof(s.minbuf));
s.max = 0xFFFFFFFF;
s.encode_to = 7;
s.maxbuf[0] = s.minbuf[0] = 0xFE;
s.minbuf[1] = 0x80 | (high_bits << 2);
s.maxbuf[1] = 0x83 | (high_bits << 2);
encode_byte_seq(&s);
}
static void test_encode(void) {
void (*tests[6])(unsigned) = {encode_2, encode_3, encode_4, encode_5, encode_6, encode_7};
unsigned high_bits;
unsigned test;
encode_1();
# pragma omp parallel for collapse(2) num_threads(8)
for (high_bits = 0; high_bits < 16; high_bits++) {
for (test = 0; test < 6; test++)
tests[test](high_bits);
}
}
#endif
void encode_decode_byte_seq(creole_word max, unsigned encode_to, unsigned high_bits) { void encode_decode_byte_seq(creole_word max, unsigned encode_to, unsigned high_bits) {
unsigned char buf[7]; unsigned char buf[7];
struct creole_reader r = {0}; struct creole_reader r = {0};
struct word w; struct creole_word w;
creole_word i = 0; creole_word i = 0;
for (;;) { for (;;) {
@ -201,7 +30,7 @@ void encode_decode_byte_seq(creole_word max, unsigned encode_to, unsigned high_b
assert(creole_encode(i, encode_to, high_bits, assert(creole_encode(i, encode_to, high_bits,
buf) == 1); buf) == 1);
assert(decode_seq(&r, &w) == 1); assert(creole_decode(&r, &w) == 1);
assert(w.len == encode_to); assert(w.len == encode_to);
if (w.high_bits != high_bits) { if (w.high_bits != high_bits) {
printf("high bits %u != %u\n", w.high_bits, high_bits); printf("high bits %u != %u\n", w.high_bits, high_bits);

View File

@ -99,16 +99,11 @@ static int read_eof(struct creole_reader *r)
* * lower bits are the encoded word. * * lower bits are the encoded word.
*/ */
#define MAX_HIGH_BITS 15 #define MAX_HIGH_BITS 15
struct word {
int len;
int high_bits;
creole_word word;
};
/* Decode a set of continuation bytes directly into the word. This assumes /* Decode a set of continuation bytes directly into the word. This assumes
* that each continuation byte contains no high words. * that each continuation byte contains no high words.
*/ */
static int read_continue(struct creole_reader *r, struct word *w, static int read_continue(struct creole_reader *r, struct creole_word *w,
int to_read) int to_read)
{ {
int i; int i;
@ -137,7 +132,7 @@ static int read_continue(struct creole_reader *r, struct word *w,
* byte. * byte.
*/ */
#define START_BYTE_NUM 7 #define START_BYTE_NUM 7
static int parse_start_byte(unsigned char c, struct word *w) static int parse_start_byte(unsigned char c, struct creole_word *w)
{ {
static const struct { static const struct {
/* The algorithm compares the mask to the start byte /* The algorithm compares the mask to the start byte
@ -200,7 +195,7 @@ static int parse_start_byte(unsigned char c, struct word *w)
/* This parses the first continuation byte if it is special. */ /* This parses the first continuation byte if it is special. */
#define SPECIAL_CONTINUE_BYTE_NUM (START_BYTE_NUM - 3) #define SPECIAL_CONTINUE_BYTE_NUM (START_BYTE_NUM - 3)
static int parse_special_byte(unsigned char c, struct word *w) static int parse_special_byte(unsigned char c, struct creole_word *w)
{ {
/* The index denotes the amount of high bits that were in /* The index denotes the amount of high bits that were in
* the start byte. This is the amount that the stored value * the start byte. This is the amount that the stored value
@ -232,7 +227,7 @@ static int parse_special_byte(unsigned char c, struct word *w)
} }
/* Parse an entire Pseudo-UTF8 sequence. */ /* Parse an entire Pseudo-UTF8 sequence. */
static int decode_seq(struct creole_reader *r, struct word *w) int creole_decode(struct creole_reader *r, struct creole_word *w)
{ {
int r_ret; int r_ret;
int to_read; int to_read;
@ -368,11 +363,11 @@ static enum creole_word_flag arg_get_type(unsigned high_bits)
static enum creole_compiler_ret static enum creole_compiler_ret
parse_line(struct creole_env *env, struct ins *ins, struct creole_reader *r) parse_line(struct creole_env *env, struct ins *ins, struct creole_reader *r)
{ {
struct word w = {0}; struct creole_word w = {0};
int i; int i;
ins->start = r->p; ins->start = r->p;
if (!decode_seq(r, &w)) if (!creole_decode(r, &w))
return CREOLE_OPCODE_READ_ERROR; return CREOLE_OPCODE_READ_ERROR;
ins->opcode = w.word; ins->opcode = w.word;
@ -383,7 +378,7 @@ parse_line(struct creole_env *env, struct ins *ins, struct creole_reader *r)
if (opcode_info[ins->opcode].arglen > CREOLE_MAX_ARG) if (opcode_info[ins->opcode].arglen > CREOLE_MAX_ARG)
return CREOLE_OPCODE_MALFORMED; return CREOLE_OPCODE_MALFORMED;
for (i = 0; i < opcode_info[ins->opcode].arglen; i++) { for (i = 0; i < opcode_info[ins->opcode].arglen; i++) {
if (!decode_seq(r, &w)) if (!creole_decode(r, &w))
return CREOLE_ARG_READ_ERROR; return CREOLE_ARG_READ_ERROR;
if (w.len == 1) if (w.len == 1)
return CREOLE_ARG_MALFORMED; return CREOLE_ARG_MALFORMED;
@ -399,7 +394,7 @@ parse_line(struct creole_env *env, struct ins *ins, struct creole_reader *r)
if (ins->opcode == CREOLE_DB) { if (ins->opcode == CREOLE_DB) {
ins->datapt = r->p; ins->datapt = r->p;
do { do {
if (!decode_seq(r, &w)) if (!creole_decode(r, &w))
return CREOLE_ARG_READ_ERROR; return CREOLE_ARG_READ_ERROR;
} while (w.len != 1); } while (w.len != 1);
if (w.word != 0) if (w.word != 0)
@ -408,7 +403,7 @@ parse_line(struct creole_env *env, struct ins *ins, struct creole_reader *r)
} }
ins->datapt = NULL; ins->datapt = NULL;
if (!decode_seq(r, &w)) if (!creole_decode(r, &w))
return CREOLE_LAST_READ_ERROR; return CREOLE_LAST_READ_ERROR;
if (w.word != 0 || w.len != 1) if (w.word != 0 || w.len != 1)
return CREOLE_LAST_MALFORMED; return CREOLE_LAST_MALFORMED;
@ -563,6 +558,8 @@ enum creole_run_ret creole_step(struct creole_env *env, creole_word *sc)
return CREOLE_RUN_DECODE_ERROR; return CREOLE_RUN_DECODE_ERROR;
switch (ins.opcode) { switch (ins.opcode) {
case CREOLE_DB:
break;
case CREOLE_PUSH: case CREOLE_PUSH:
check(read_val(env, &ins, 0, &a1)); check(read_val(env, &ins, 0, &a1));
check(creole_push(env, a1)); check(creole_push(env, a1));

View File

@ -84,6 +84,12 @@ enum creole_run_ret {
CREOLE_RUN_RET_LEN CREOLE_RUN_RET_LEN
}; };
struct creole_word {
int len;
int high_bits;
creole_word word;
};
struct creole_reader { struct creole_reader {
unsigned char *p; unsigned char *p;
size_t left; size_t left;
@ -103,6 +109,7 @@ struct creole_env {
struct creole_reader r_start; struct creole_reader r_start;
}; };
int creole_decode(struct creole_reader *r, struct creole_word *w);
int creole_encode(creole_word i, unsigned encode_to, unsigned high_bits, int creole_encode(creole_word i, unsigned encode_to, unsigned high_bits,
unsigned char buf[7]); unsigned char buf[7]);
enum creole_compiler_ret creole_compile(struct creole_env *env); enum creole_compiler_ret creole_compile(struct creole_env *env);