From 09d636c02cdbb13d10f1435d918cc36116715fc4 Mon Sep 17 00:00:00 2001 From: Peter McGoron Date: Sat, 25 Feb 2023 21:01:03 +0000 Subject: [PATCH] export creole_decode; add db test --- Makefile | 2 +- README.md | 27 ++++--- asm/ffi.py | 25 ++++++ asm/test.py | 10 +++ c_test/encode_decode.c | 179 +---------------------------------------- creole.c | 25 +++--- creole.h | 7 ++ 7 files changed, 75 insertions(+), 200 deletions(-) diff --git a/Makefile b/Makefile index bac519e..d65fe98 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ asm/libcreole.so: creole.c creole.h test_asm: asm/libcreole.so cd asm && python3 test.py -f c_test/encode_decode: c_test/encode_decode.c creole.c creole.h - $(CC) c_test/encode_decode.c -Wall -pedantic -std=c89 -g -fopenmp -o c_test/encode_decode + $(CC) creole.c c_test/encode_decode.c -Wall -pedantic -std=c89 -g -fopenmp -o c_test/encode_decode # c_test/encode_decode c_test/creole: c_test/creole.c creole.c creole.h c_test/greatest.h $(CC) -g c_test/creole.c -Wall -pedantic -std=c89 -o c_test/creole diff --git a/README.md b/README.md index 5f34760..6d0b731 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,15 @@ -Creole is a bytecode designed for simple implementations. +Creole is a bytecode designed for microcontrollers. It's C source file +is less than 1000 lines long and does not depend on the C standard +library. ## Bytecode Format -Each creole line consists of pseudo-UTF-8 characters. The first byte -is an unsigned number between 0 and 127 (the high bit is clear). Each +The syntax of creole instructions are + + [1 byte opcode][2 or more byte instruction]*[1 byte all zero] + +Each creole instruction consists of pseudo-UTF-8 characters. The first +byte is an unsigned number between 0 and 127 (the high bit is clear). Each suceeding pseudo-UTF-8 character is encoded as follows: * `110HHHHx 10xxxxxx` @@ -13,19 +19,20 @@ suceeding pseudo-UTF-8 character is encoded as follows: * `1111110H 10HHHxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx` * `11111110 10HHHHxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx` -The first four bytes determine the type. The LSB high bit determines -if the encoded value is a register (`0001`) or immediate (`0010`). +The first four bits determine the type. The LSB high bit determines +if the encoded value is a register (`0001`) or immediate (`00X0`). The second bit from LSB determines if the value should be treated as a signed 32 bit two's compliment number (`001X`) or should be -treated as an unsigned 32 bit number (`000X`). +treated as an unsigned 32 bit number (`000X`). All other values for +the high bits are reserved. -All other values are reserved. Overlong values are allowed, and for some -argument values they are necessary. All lines are terminated by a byte -of all zeros. +The rest of the bits encode a number that is up to 32 bits long. +Overlong encodings are accepted and sometimes used. ## Assembler -The macro assembler is Python (see the asm directory). +The macro assembler is Python (see the asm directory). The macro +assembler supports virtual instructions and jumps with named labels. ## Design Philsophy diff --git a/asm/ffi.py b/asm/ffi.py index 13731dd..e7903af 100644 --- a/asm/ffi.py +++ b/asm/ffi.py @@ -44,6 +44,11 @@ class RunRet(Enum): def is_halt(self): return not (self == RunRet.CONTINUE or self == RunRet.SYSCALL) +class CWord(Structure): + _fields_ = [("len", c_int), + ("high_bits", c_int), + ("word", c_uint)] + class CReader(Structure): _fields_ = [("p", POINTER(c_ubyte)), ("left", c_size_t)] @@ -78,6 +83,8 @@ class InvalidSyscallError(Exception): class CompileError(Exception): def __init__(self, r): self.r = r +class DataOverflowError(Exception): + pass class Environment: def getreg(self, reg, signed=False): @@ -106,6 +113,24 @@ class Environment: return creole.from_2c(self.cenv.stk[stk]) else: return self.cenv.stk[stk] + def getdat(self, n): + if n >= self.cenv.datlen or n < 0: + raise DataOverflowError(n) + rdr = CReader() + rdr.p = self.cenv.dats[n] + # Python does not allow for direct pointer arithmetic + rdr_p_v = addressof(rdr.p.contents) + r_start_p_v = addressof(self.cenv.r_start.p) + + rdr.left = self.cenv.r_start.left - (rdr_p_v - r_start_p_v) + + l = [] + w = CWord() + while dll.creole_decode(byref(rdr), byref(w)) == 1: + if w.word == 0 and w.len == 1: + break + l.append(w.word) + return l def pop(self): if stk == 0: diff --git a/asm/test.py b/asm/test.py index e49d46d..986a06d 100644 --- a/asm/test.py +++ b/asm/test.py @@ -438,6 +438,16 @@ class DataTest(unittest.TestCase): p = Program() p.parse_asm_line("db d0 [4d2,1234,0,5]") self.assertEqual(p(), b'\x0b\xc0\x80\xe0\x93\x92\xf0\x81\x88\xb4\xc0\x80\xc0\x85\x00') + def test_alloc_multiple(self): + p = Program() + p.parse_lines([ + "db d0 [1,2,3,4]", + "db d1 [10,11,12,13]" + ]) + ex = ffi.Environment(p()) + self.assertEqual(ex(), ffi.RunRet.STOP) + self.assertEqual(ex.getdat(0), [1,2,3,4]) + self.assertEqual(ex.getdat(1), [0x10,0x11,0x12,0x13]) class SCEnv(ffi.Environment): def syscall(self, s): diff --git a/c_test/encode_decode.c b/c_test/encode_decode.c index 835305c..2366049 100644 --- a/c_test/encode_decode.c +++ b/c_test/encode_decode.c @@ -11,188 +11,17 @@ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ + #include #include #include #include -#include "../creole.c" - -#if 0 -struct seq { - creole_word max; - unsigned encode_to; - unsigned high_bits; - - unsigned char minbuf[7]; - unsigned char maxbuf[7]; -}; - -void bprint(unsigned char c) { - int i; - - for (i = 0; i < 8; i++) { - printf("%u", (c >> (7 - i)) & 1); - } -} - -void bprintb(unsigned char *b, int len) { - while (len-- > 0) { - bprint(*b++); - printf(" "); - } -} - -static void encode_byte_seq(struct seq *s) { - creole_word i = 0; - int j; - unsigned char buf[7]; - - for (;;) { - - assert(creole_encode(i, s->encode_to, s->high_bits, - buf) == 1); - if (memcmp(s->minbuf, buf, s->encode_to) != 0) { - printf("0x%X ", i); - bprintb(s->minbuf, s->encode_to); - printf("|"); - bprintb(buf, s->encode_to); - printf("\n"); - abort(); - } - - if (i == s->max) - break; - i++; - - for (j = s->encode_to - 1; j > 0; j--) { - if (s->minbuf[j] == 0xBF) { - s->minbuf[j] = 0x80; - } else { - s->minbuf[j]++; - break; - } - } - - if (j == 0) - s->minbuf[0]++; - } - assert(memcmp(s->maxbuf, s->minbuf, s->encode_to) == 0); -} - -static void encode_1(void) { - struct seq s; - - s.max = 0x7F; - s.encode_to = 1; - s.high_bits = 0; - s.minbuf[0] = 0x00; - s.maxbuf[0] = 0x7F; - - encode_byte_seq(&s); -} - -static void encode_2(unsigned high_bits) { - struct seq s; - s.high_bits = high_bits; - - memset(s.maxbuf, 0xBF, sizeof(s.maxbuf)); - memset(s.minbuf, 0x80, sizeof(s.minbuf)); - s.max = 0x7F; - s.encode_to = 2; - s.maxbuf[0] = s.minbuf[0] = 0xC0 | (high_bits << 1); - s.maxbuf[0] = 0xC1 | (high_bits << 1); - s.minbuf[1] = 0x80; - encode_byte_seq(&s); -} - -static void encode_3(unsigned high_bits) { - struct seq s; - s.high_bits = high_bits; - - memset(s.maxbuf, 0xBF, sizeof(s.maxbuf)); - memset(s.minbuf, 0x80, sizeof(s.minbuf)); - s.max = 0xFFF; - s.encode_to = 3; - s.minbuf[0] = 0xE0 | high_bits; - s.maxbuf[0] = 0xE0 | high_bits; - s.minbuf[1] = 0x80; - encode_byte_seq(&s); -} - -static void encode_4(unsigned high_bits) { - struct seq s; - s.high_bits = high_bits; - - memset(s.maxbuf, 0xBF, sizeof(s.maxbuf)); - memset(s.minbuf, 0x80, sizeof(s.minbuf)); - s.max = 0x1FFFF; - s.encode_to = 4; - s.maxbuf[0] = s.minbuf[0] = 0xF0 | (high_bits >> 1); - s.minbuf[1] = 0x80 | ((high_bits & 0x1) << 5); - s.maxbuf[1] = 0x9F | ((high_bits & 0x1) << 5); - encode_byte_seq(&s); -} - -static void encode_5(unsigned high_bits) { - struct seq s; - s.high_bits = high_bits; - - memset(s.maxbuf, 0xBF, sizeof(s.maxbuf)); - memset(s.minbuf, 0x80, sizeof(s.minbuf)); - s.max = 0x3FFFFF; - s.encode_to = 5; - s.maxbuf[0] = s.minbuf[0] = 0xF8 | (high_bits >> 2); - s.minbuf[1] = 0x80 | ((high_bits & 0x3) << 4); - s.maxbuf[1] = 0x8F | ((high_bits & 0x3) << 4); - encode_byte_seq(&s); -} - -static void encode_6(unsigned high_bits) { - struct seq s; - s.high_bits = high_bits; - - memset(s.maxbuf, 0xBF, sizeof(s.maxbuf)); - memset(s.minbuf, 0x80, sizeof(s.minbuf)); - s.max = 0x7FFFFFF; - s.encode_to = 6; - s.maxbuf[0] = s.minbuf[0] = 0xFC | (high_bits >> 3); - s.minbuf[1] = 0x80 | ((high_bits & 0x7) << 3); - s.maxbuf[1] = 0x87 | ((high_bits & 0x7) << 3); - encode_byte_seq(&s); -} - -static void encode_7(unsigned high_bits) { - struct seq s; - s.high_bits = high_bits; - - memset(s.maxbuf, 0xBF, sizeof(s.maxbuf)); - memset(s.minbuf, 0x80, sizeof(s.minbuf)); - s.max = 0xFFFFFFFF; - s.encode_to = 7; - s.maxbuf[0] = s.minbuf[0] = 0xFE; - s.minbuf[1] = 0x80 | (high_bits << 2); - s.maxbuf[1] = 0x83 | (high_bits << 2); - encode_byte_seq(&s); -} - -static void test_encode(void) { - void (*tests[6])(unsigned) = {encode_2, encode_3, encode_4, encode_5, encode_6, encode_7}; - unsigned high_bits; - unsigned test; - encode_1(); - -# pragma omp parallel for collapse(2) num_threads(8) - for (high_bits = 0; high_bits < 16; high_bits++) { - for (test = 0; test < 6; test++) - tests[test](high_bits); - } -} -#endif +#include "../creole.h" void encode_decode_byte_seq(creole_word max, unsigned encode_to, unsigned high_bits) { unsigned char buf[7]; struct creole_reader r = {0}; - struct word w; + struct creole_word w; creole_word i = 0; for (;;) { @@ -201,7 +30,7 @@ void encode_decode_byte_seq(creole_word max, unsigned encode_to, unsigned high_b assert(creole_encode(i, encode_to, high_bits, buf) == 1); - assert(decode_seq(&r, &w) == 1); + assert(creole_decode(&r, &w) == 1); assert(w.len == encode_to); if (w.high_bits != high_bits) { printf("high bits %u != %u\n", w.high_bits, high_bits); diff --git a/creole.c b/creole.c index 26b0347..d3d51e4 100644 --- a/creole.c +++ b/creole.c @@ -99,16 +99,11 @@ static int read_eof(struct creole_reader *r) * * lower bits are the encoded word. */ #define MAX_HIGH_BITS 15 -struct word { - int len; - int high_bits; - creole_word word; -}; /* Decode a set of continuation bytes directly into the word. This assumes * that each continuation byte contains no high words. */ -static int read_continue(struct creole_reader *r, struct word *w, +static int read_continue(struct creole_reader *r, struct creole_word *w, int to_read) { int i; @@ -137,7 +132,7 @@ static int read_continue(struct creole_reader *r, struct word *w, * byte. */ #define START_BYTE_NUM 7 -static int parse_start_byte(unsigned char c, struct word *w) +static int parse_start_byte(unsigned char c, struct creole_word *w) { static const struct { /* The algorithm compares the mask to the start byte @@ -200,7 +195,7 @@ static int parse_start_byte(unsigned char c, struct word *w) /* This parses the first continuation byte if it is special. */ #define SPECIAL_CONTINUE_BYTE_NUM (START_BYTE_NUM - 3) -static int parse_special_byte(unsigned char c, struct word *w) +static int parse_special_byte(unsigned char c, struct creole_word *w) { /* The index denotes the amount of high bits that were in * the start byte. This is the amount that the stored value @@ -232,7 +227,7 @@ static int parse_special_byte(unsigned char c, struct word *w) } /* Parse an entire Pseudo-UTF8 sequence. */ -static int decode_seq(struct creole_reader *r, struct word *w) +int creole_decode(struct creole_reader *r, struct creole_word *w) { int r_ret; int to_read; @@ -368,11 +363,11 @@ static enum creole_word_flag arg_get_type(unsigned high_bits) static enum creole_compiler_ret parse_line(struct creole_env *env, struct ins *ins, struct creole_reader *r) { - struct word w = {0}; + struct creole_word w = {0}; int i; ins->start = r->p; - if (!decode_seq(r, &w)) + if (!creole_decode(r, &w)) return CREOLE_OPCODE_READ_ERROR; ins->opcode = w.word; @@ -383,7 +378,7 @@ parse_line(struct creole_env *env, struct ins *ins, struct creole_reader *r) if (opcode_info[ins->opcode].arglen > CREOLE_MAX_ARG) return CREOLE_OPCODE_MALFORMED; for (i = 0; i < opcode_info[ins->opcode].arglen; i++) { - if (!decode_seq(r, &w)) + if (!creole_decode(r, &w)) return CREOLE_ARG_READ_ERROR; if (w.len == 1) return CREOLE_ARG_MALFORMED; @@ -399,7 +394,7 @@ parse_line(struct creole_env *env, struct ins *ins, struct creole_reader *r) if (ins->opcode == CREOLE_DB) { ins->datapt = r->p; do { - if (!decode_seq(r, &w)) + if (!creole_decode(r, &w)) return CREOLE_ARG_READ_ERROR; } while (w.len != 1); if (w.word != 0) @@ -408,7 +403,7 @@ parse_line(struct creole_env *env, struct ins *ins, struct creole_reader *r) } ins->datapt = NULL; - if (!decode_seq(r, &w)) + if (!creole_decode(r, &w)) return CREOLE_LAST_READ_ERROR; if (w.word != 0 || w.len != 1) return CREOLE_LAST_MALFORMED; @@ -563,6 +558,8 @@ enum creole_run_ret creole_step(struct creole_env *env, creole_word *sc) return CREOLE_RUN_DECODE_ERROR; switch (ins.opcode) { + case CREOLE_DB: + break; case CREOLE_PUSH: check(read_val(env, &ins, 0, &a1)); check(creole_push(env, a1)); diff --git a/creole.h b/creole.h index b98dfa5..9ca8247 100644 --- a/creole.h +++ b/creole.h @@ -84,6 +84,12 @@ enum creole_run_ret { CREOLE_RUN_RET_LEN }; +struct creole_word { + int len; + int high_bits; + creole_word word; +}; + struct creole_reader { unsigned char *p; size_t left; @@ -103,6 +109,7 @@ struct creole_env { struct creole_reader r_start; }; +int creole_decode(struct creole_reader *r, struct creole_word *w); int creole_encode(creole_word i, unsigned encode_to, unsigned high_bits, unsigned char buf[7]); enum creole_compiler_ret creole_compile(struct creole_env *env);