export creole_decode; add db test
This commit is contained in:
parent
b7eed7d44e
commit
09d636c02c
2
Makefile
2
Makefile
|
@ -6,7 +6,7 @@ asm/libcreole.so: creole.c creole.h
|
|||
test_asm: asm/libcreole.so
|
||||
cd asm && python3 test.py -f
|
||||
c_test/encode_decode: c_test/encode_decode.c creole.c creole.h
|
||||
$(CC) c_test/encode_decode.c -Wall -pedantic -std=c89 -g -fopenmp -o c_test/encode_decode
|
||||
$(CC) creole.c c_test/encode_decode.c -Wall -pedantic -std=c89 -g -fopenmp -o c_test/encode_decode
|
||||
# c_test/encode_decode
|
||||
c_test/creole: c_test/creole.c creole.c creole.h c_test/greatest.h
|
||||
$(CC) -g c_test/creole.c -Wall -pedantic -std=c89 -o c_test/creole
|
||||
|
|
27
README.md
27
README.md
|
@ -1,9 +1,15 @@
|
|||
Creole is a bytecode designed for simple implementations.
|
||||
Creole is a bytecode designed for microcontrollers. It's C source file
|
||||
is less than 1000 lines long and does not depend on the C standard
|
||||
library.
|
||||
|
||||
## Bytecode Format
|
||||
|
||||
Each creole line consists of pseudo-UTF-8 characters. The first byte
|
||||
is an unsigned number between 0 and 127 (the high bit is clear). Each
|
||||
The syntax of creole instructions are
|
||||
|
||||
[1 byte opcode][2 or more byte instruction]*[1 byte all zero]
|
||||
|
||||
Each creole instruction consists of pseudo-UTF-8 characters. The first
|
||||
byte is an unsigned number between 0 and 127 (the high bit is clear). Each
|
||||
suceeding pseudo-UTF-8 character is encoded as follows:
|
||||
|
||||
* `110HHHHx 10xxxxxx`
|
||||
|
@ -13,19 +19,20 @@ suceeding pseudo-UTF-8 character is encoded as follows:
|
|||
* `1111110H 10HHHxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx`
|
||||
* `11111110 10HHHHxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx`
|
||||
|
||||
The first four bytes determine the type. The LSB high bit determines
|
||||
if the encoded value is a register (`0001`) or immediate (`0010`).
|
||||
The first four bits determine the type. The LSB high bit determines
|
||||
if the encoded value is a register (`0001`) or immediate (`00X0`).
|
||||
The second bit from LSB determines if the value should be treated
|
||||
as a signed 32 bit two's compliment number (`001X`) or should be
|
||||
treated as an unsigned 32 bit number (`000X`).
|
||||
treated as an unsigned 32 bit number (`000X`). All other values for
|
||||
the high bits are reserved.
|
||||
|
||||
All other values are reserved. Overlong values are allowed, and for some
|
||||
argument values they are necessary. All lines are terminated by a byte
|
||||
of all zeros.
|
||||
The rest of the bits encode a number that is up to 32 bits long.
|
||||
Overlong encodings are accepted and sometimes used.
|
||||
|
||||
## Assembler
|
||||
|
||||
The macro assembler is Python (see the asm directory).
|
||||
The macro assembler is Python (see the asm directory). The macro
|
||||
assembler supports virtual instructions and jumps with named labels.
|
||||
|
||||
## Design Philsophy
|
||||
|
||||
|
|
25
asm/ffi.py
25
asm/ffi.py
|
@ -44,6 +44,11 @@ class RunRet(Enum):
|
|||
def is_halt(self):
|
||||
return not (self == RunRet.CONTINUE or self == RunRet.SYSCALL)
|
||||
|
||||
class CWord(Structure):
|
||||
_fields_ = [("len", c_int),
|
||||
("high_bits", c_int),
|
||||
("word", c_uint)]
|
||||
|
||||
class CReader(Structure):
|
||||
_fields_ = [("p", POINTER(c_ubyte)),
|
||||
("left", c_size_t)]
|
||||
|
@ -78,6 +83,8 @@ class InvalidSyscallError(Exception):
|
|||
class CompileError(Exception):
|
||||
def __init__(self, r):
|
||||
self.r = r
|
||||
class DataOverflowError(Exception):
|
||||
pass
|
||||
|
||||
class Environment:
|
||||
def getreg(self, reg, signed=False):
|
||||
|
@ -106,6 +113,24 @@ class Environment:
|
|||
return creole.from_2c(self.cenv.stk[stk])
|
||||
else:
|
||||
return self.cenv.stk[stk]
|
||||
def getdat(self, n):
|
||||
if n >= self.cenv.datlen or n < 0:
|
||||
raise DataOverflowError(n)
|
||||
rdr = CReader()
|
||||
rdr.p = self.cenv.dats[n]
|
||||
# Python does not allow for direct pointer arithmetic
|
||||
rdr_p_v = addressof(rdr.p.contents)
|
||||
r_start_p_v = addressof(self.cenv.r_start.p)
|
||||
|
||||
rdr.left = self.cenv.r_start.left - (rdr_p_v - r_start_p_v)
|
||||
|
||||
l = []
|
||||
w = CWord()
|
||||
while dll.creole_decode(byref(rdr), byref(w)) == 1:
|
||||
if w.word == 0 and w.len == 1:
|
||||
break
|
||||
l.append(w.word)
|
||||
return l
|
||||
|
||||
def pop(self):
|
||||
if stk == 0:
|
||||
|
|
10
asm/test.py
10
asm/test.py
|
@ -438,6 +438,16 @@ class DataTest(unittest.TestCase):
|
|||
p = Program()
|
||||
p.parse_asm_line("db d0 [4d2,1234,0,5]")
|
||||
self.assertEqual(p(), b'\x0b\xc0\x80\xe0\x93\x92\xf0\x81\x88\xb4\xc0\x80\xc0\x85\x00')
|
||||
def test_alloc_multiple(self):
|
||||
p = Program()
|
||||
p.parse_lines([
|
||||
"db d0 [1,2,3,4]",
|
||||
"db d1 [10,11,12,13]"
|
||||
])
|
||||
ex = ffi.Environment(p())
|
||||
self.assertEqual(ex(), ffi.RunRet.STOP)
|
||||
self.assertEqual(ex.getdat(0), [1,2,3,4])
|
||||
self.assertEqual(ex.getdat(1), [0x10,0x11,0x12,0x13])
|
||||
|
||||
class SCEnv(ffi.Environment):
|
||||
def syscall(self, s):
|
||||
|
|
|
@ -11,188 +11,17 @@ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "../creole.c"
|
||||
|
||||
#if 0
|
||||
struct seq {
|
||||
creole_word max;
|
||||
unsigned encode_to;
|
||||
unsigned high_bits;
|
||||
|
||||
unsigned char minbuf[7];
|
||||
unsigned char maxbuf[7];
|
||||
};
|
||||
|
||||
void bprint(unsigned char c) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
printf("%u", (c >> (7 - i)) & 1);
|
||||
}
|
||||
}
|
||||
|
||||
void bprintb(unsigned char *b, int len) {
|
||||
while (len-- > 0) {
|
||||
bprint(*b++);
|
||||
printf(" ");
|
||||
}
|
||||
}
|
||||
|
||||
static void encode_byte_seq(struct seq *s) {
|
||||
creole_word i = 0;
|
||||
int j;
|
||||
unsigned char buf[7];
|
||||
|
||||
for (;;) {
|
||||
|
||||
assert(creole_encode(i, s->encode_to, s->high_bits,
|
||||
buf) == 1);
|
||||
if (memcmp(s->minbuf, buf, s->encode_to) != 0) {
|
||||
printf("0x%X ", i);
|
||||
bprintb(s->minbuf, s->encode_to);
|
||||
printf("|");
|
||||
bprintb(buf, s->encode_to);
|
||||
printf("\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
if (i == s->max)
|
||||
break;
|
||||
i++;
|
||||
|
||||
for (j = s->encode_to - 1; j > 0; j--) {
|
||||
if (s->minbuf[j] == 0xBF) {
|
||||
s->minbuf[j] = 0x80;
|
||||
} else {
|
||||
s->minbuf[j]++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (j == 0)
|
||||
s->minbuf[0]++;
|
||||
}
|
||||
assert(memcmp(s->maxbuf, s->minbuf, s->encode_to) == 0);
|
||||
}
|
||||
|
||||
static void encode_1(void) {
|
||||
struct seq s;
|
||||
|
||||
s.max = 0x7F;
|
||||
s.encode_to = 1;
|
||||
s.high_bits = 0;
|
||||
s.minbuf[0] = 0x00;
|
||||
s.maxbuf[0] = 0x7F;
|
||||
|
||||
encode_byte_seq(&s);
|
||||
}
|
||||
|
||||
static void encode_2(unsigned high_bits) {
|
||||
struct seq s;
|
||||
s.high_bits = high_bits;
|
||||
|
||||
memset(s.maxbuf, 0xBF, sizeof(s.maxbuf));
|
||||
memset(s.minbuf, 0x80, sizeof(s.minbuf));
|
||||
s.max = 0x7F;
|
||||
s.encode_to = 2;
|
||||
s.maxbuf[0] = s.minbuf[0] = 0xC0 | (high_bits << 1);
|
||||
s.maxbuf[0] = 0xC1 | (high_bits << 1);
|
||||
s.minbuf[1] = 0x80;
|
||||
encode_byte_seq(&s);
|
||||
}
|
||||
|
||||
static void encode_3(unsigned high_bits) {
|
||||
struct seq s;
|
||||
s.high_bits = high_bits;
|
||||
|
||||
memset(s.maxbuf, 0xBF, sizeof(s.maxbuf));
|
||||
memset(s.minbuf, 0x80, sizeof(s.minbuf));
|
||||
s.max = 0xFFF;
|
||||
s.encode_to = 3;
|
||||
s.minbuf[0] = 0xE0 | high_bits;
|
||||
s.maxbuf[0] = 0xE0 | high_bits;
|
||||
s.minbuf[1] = 0x80;
|
||||
encode_byte_seq(&s);
|
||||
}
|
||||
|
||||
static void encode_4(unsigned high_bits) {
|
||||
struct seq s;
|
||||
s.high_bits = high_bits;
|
||||
|
||||
memset(s.maxbuf, 0xBF, sizeof(s.maxbuf));
|
||||
memset(s.minbuf, 0x80, sizeof(s.minbuf));
|
||||
s.max = 0x1FFFF;
|
||||
s.encode_to = 4;
|
||||
s.maxbuf[0] = s.minbuf[0] = 0xF0 | (high_bits >> 1);
|
||||
s.minbuf[1] = 0x80 | ((high_bits & 0x1) << 5);
|
||||
s.maxbuf[1] = 0x9F | ((high_bits & 0x1) << 5);
|
||||
encode_byte_seq(&s);
|
||||
}
|
||||
|
||||
static void encode_5(unsigned high_bits) {
|
||||
struct seq s;
|
||||
s.high_bits = high_bits;
|
||||
|
||||
memset(s.maxbuf, 0xBF, sizeof(s.maxbuf));
|
||||
memset(s.minbuf, 0x80, sizeof(s.minbuf));
|
||||
s.max = 0x3FFFFF;
|
||||
s.encode_to = 5;
|
||||
s.maxbuf[0] = s.minbuf[0] = 0xF8 | (high_bits >> 2);
|
||||
s.minbuf[1] = 0x80 | ((high_bits & 0x3) << 4);
|
||||
s.maxbuf[1] = 0x8F | ((high_bits & 0x3) << 4);
|
||||
encode_byte_seq(&s);
|
||||
}
|
||||
|
||||
static void encode_6(unsigned high_bits) {
|
||||
struct seq s;
|
||||
s.high_bits = high_bits;
|
||||
|
||||
memset(s.maxbuf, 0xBF, sizeof(s.maxbuf));
|
||||
memset(s.minbuf, 0x80, sizeof(s.minbuf));
|
||||
s.max = 0x7FFFFFF;
|
||||
s.encode_to = 6;
|
||||
s.maxbuf[0] = s.minbuf[0] = 0xFC | (high_bits >> 3);
|
||||
s.minbuf[1] = 0x80 | ((high_bits & 0x7) << 3);
|
||||
s.maxbuf[1] = 0x87 | ((high_bits & 0x7) << 3);
|
||||
encode_byte_seq(&s);
|
||||
}
|
||||
|
||||
static void encode_7(unsigned high_bits) {
|
||||
struct seq s;
|
||||
s.high_bits = high_bits;
|
||||
|
||||
memset(s.maxbuf, 0xBF, sizeof(s.maxbuf));
|
||||
memset(s.minbuf, 0x80, sizeof(s.minbuf));
|
||||
s.max = 0xFFFFFFFF;
|
||||
s.encode_to = 7;
|
||||
s.maxbuf[0] = s.minbuf[0] = 0xFE;
|
||||
s.minbuf[1] = 0x80 | (high_bits << 2);
|
||||
s.maxbuf[1] = 0x83 | (high_bits << 2);
|
||||
encode_byte_seq(&s);
|
||||
}
|
||||
|
||||
static void test_encode(void) {
|
||||
void (*tests[6])(unsigned) = {encode_2, encode_3, encode_4, encode_5, encode_6, encode_7};
|
||||
unsigned high_bits;
|
||||
unsigned test;
|
||||
encode_1();
|
||||
|
||||
# pragma omp parallel for collapse(2) num_threads(8)
|
||||
for (high_bits = 0; high_bits < 16; high_bits++) {
|
||||
for (test = 0; test < 6; test++)
|
||||
tests[test](high_bits);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#include "../creole.h"
|
||||
|
||||
void encode_decode_byte_seq(creole_word max, unsigned encode_to, unsigned high_bits) {
|
||||
unsigned char buf[7];
|
||||
struct creole_reader r = {0};
|
||||
struct word w;
|
||||
struct creole_word w;
|
||||
creole_word i = 0;
|
||||
|
||||
for (;;) {
|
||||
|
@ -201,7 +30,7 @@ void encode_decode_byte_seq(creole_word max, unsigned encode_to, unsigned high_b
|
|||
assert(creole_encode(i, encode_to, high_bits,
|
||||
buf) == 1);
|
||||
|
||||
assert(decode_seq(&r, &w) == 1);
|
||||
assert(creole_decode(&r, &w) == 1);
|
||||
assert(w.len == encode_to);
|
||||
if (w.high_bits != high_bits) {
|
||||
printf("high bits %u != %u\n", w.high_bits, high_bits);
|
||||
|
|
25
creole.c
25
creole.c
|
@ -99,16 +99,11 @@ static int read_eof(struct creole_reader *r)
|
|||
* * lower bits are the encoded word.
|
||||
*/
|
||||
#define MAX_HIGH_BITS 15
|
||||
struct word {
|
||||
int len;
|
||||
int high_bits;
|
||||
creole_word word;
|
||||
};
|
||||
|
||||
/* Decode a set of continuation bytes directly into the word. This assumes
|
||||
* that each continuation byte contains no high words.
|
||||
*/
|
||||
static int read_continue(struct creole_reader *r, struct word *w,
|
||||
static int read_continue(struct creole_reader *r, struct creole_word *w,
|
||||
int to_read)
|
||||
{
|
||||
int i;
|
||||
|
@ -137,7 +132,7 @@ static int read_continue(struct creole_reader *r, struct word *w,
|
|||
* byte.
|
||||
*/
|
||||
#define START_BYTE_NUM 7
|
||||
static int parse_start_byte(unsigned char c, struct word *w)
|
||||
static int parse_start_byte(unsigned char c, struct creole_word *w)
|
||||
{
|
||||
static const struct {
|
||||
/* The algorithm compares the mask to the start byte
|
||||
|
@ -200,7 +195,7 @@ static int parse_start_byte(unsigned char c, struct word *w)
|
|||
|
||||
/* This parses the first continuation byte if it is special. */
|
||||
#define SPECIAL_CONTINUE_BYTE_NUM (START_BYTE_NUM - 3)
|
||||
static int parse_special_byte(unsigned char c, struct word *w)
|
||||
static int parse_special_byte(unsigned char c, struct creole_word *w)
|
||||
{
|
||||
/* The index denotes the amount of high bits that were in
|
||||
* the start byte. This is the amount that the stored value
|
||||
|
@ -232,7 +227,7 @@ static int parse_special_byte(unsigned char c, struct word *w)
|
|||
}
|
||||
|
||||
/* Parse an entire Pseudo-UTF8 sequence. */
|
||||
static int decode_seq(struct creole_reader *r, struct word *w)
|
||||
int creole_decode(struct creole_reader *r, struct creole_word *w)
|
||||
{
|
||||
int r_ret;
|
||||
int to_read;
|
||||
|
@ -368,11 +363,11 @@ static enum creole_word_flag arg_get_type(unsigned high_bits)
|
|||
static enum creole_compiler_ret
|
||||
parse_line(struct creole_env *env, struct ins *ins, struct creole_reader *r)
|
||||
{
|
||||
struct word w = {0};
|
||||
struct creole_word w = {0};
|
||||
int i;
|
||||
|
||||
ins->start = r->p;
|
||||
if (!decode_seq(r, &w))
|
||||
if (!creole_decode(r, &w))
|
||||
return CREOLE_OPCODE_READ_ERROR;
|
||||
|
||||
ins->opcode = w.word;
|
||||
|
@ -383,7 +378,7 @@ parse_line(struct creole_env *env, struct ins *ins, struct creole_reader *r)
|
|||
if (opcode_info[ins->opcode].arglen > CREOLE_MAX_ARG)
|
||||
return CREOLE_OPCODE_MALFORMED;
|
||||
for (i = 0; i < opcode_info[ins->opcode].arglen; i++) {
|
||||
if (!decode_seq(r, &w))
|
||||
if (!creole_decode(r, &w))
|
||||
return CREOLE_ARG_READ_ERROR;
|
||||
if (w.len == 1)
|
||||
return CREOLE_ARG_MALFORMED;
|
||||
|
@ -399,7 +394,7 @@ parse_line(struct creole_env *env, struct ins *ins, struct creole_reader *r)
|
|||
if (ins->opcode == CREOLE_DB) {
|
||||
ins->datapt = r->p;
|
||||
do {
|
||||
if (!decode_seq(r, &w))
|
||||
if (!creole_decode(r, &w))
|
||||
return CREOLE_ARG_READ_ERROR;
|
||||
} while (w.len != 1);
|
||||
if (w.word != 0)
|
||||
|
@ -408,7 +403,7 @@ parse_line(struct creole_env *env, struct ins *ins, struct creole_reader *r)
|
|||
}
|
||||
|
||||
ins->datapt = NULL;
|
||||
if (!decode_seq(r, &w))
|
||||
if (!creole_decode(r, &w))
|
||||
return CREOLE_LAST_READ_ERROR;
|
||||
if (w.word != 0 || w.len != 1)
|
||||
return CREOLE_LAST_MALFORMED;
|
||||
|
@ -563,6 +558,8 @@ enum creole_run_ret creole_step(struct creole_env *env, creole_word *sc)
|
|||
return CREOLE_RUN_DECODE_ERROR;
|
||||
|
||||
switch (ins.opcode) {
|
||||
case CREOLE_DB:
|
||||
break;
|
||||
case CREOLE_PUSH:
|
||||
check(read_val(env, &ins, 0, &a1));
|
||||
check(creole_push(env, a1));
|
||||
|
|
7
creole.h
7
creole.h
|
@ -84,6 +84,12 @@ enum creole_run_ret {
|
|||
CREOLE_RUN_RET_LEN
|
||||
};
|
||||
|
||||
struct creole_word {
|
||||
int len;
|
||||
int high_bits;
|
||||
creole_word word;
|
||||
};
|
||||
|
||||
struct creole_reader {
|
||||
unsigned char *p;
|
||||
size_t left;
|
||||
|
@ -103,6 +109,7 @@ struct creole_env {
|
|||
struct creole_reader r_start;
|
||||
};
|
||||
|
||||
int creole_decode(struct creole_reader *r, struct creole_word *w);
|
||||
int creole_encode(creole_word i, unsigned encode_to, unsigned high_bits,
|
||||
unsigned char buf[7]);
|
||||
enum creole_compiler_ret creole_compile(struct creole_env *env);
|
||||
|
|
Loading…
Reference in New Issue