1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
|
#include "creole.h"
/*************************************************************************
* Static information
************************************************************************/
/* Arguments to opcodes can accept the following:
* immediate values only (as of now, no values are like this)
* register values only (push, pop, etc.)
* either values (math operations)
* labels (jumps)
* none (do not give an argument)
*/
enum creole_arg_type {
TYPE_NONE,
TYPE_IMM,
TYPE_REG,
TYPE_VAL,
TYPE_LAB,
CREOLE_ARG_TYPE_LEN
};
/* C99+ allows for designating the array index when initializing arrays:
[i] = v,
* in C89 indicies are implicit from 0 to the maximum filled-in value.
*/
#define defop(s, n, a1, a2, a3) {n, {a1, a2, a3}}
static const struct {
unsigned arglen;
enum creole_arg_type argtype[CREOLE_MAX_ARG];
} opcode_info[CREOLE_OPCODE_LEN] = {
defop(NOOP, 0, TYPE_NONE, TYPE_NONE, TYPE_NONE),
defop(PUSH, 1, TYPE_VAL, TYPE_NONE, TYPE_NONE),
defop(POP, 1, TYPE_REG, TYPE_NONE, TYPE_NONE),
defop(ADD, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),
defop(MUL, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),
defop(DIV, 3, TYPE_REG, TYPE_VAL, TYPE_VAL),
defop(JL, 3, TYPE_LAB, TYPE_VAL, TYPE_VAL),
defop(CLB, 1, TYPE_LAB, TYPE_NONE, TYPE_NONE),
defop(SYS, 1, TYPE_VAL, TYPE_NONE, TYPE_NONE)
};
/*************************************************************************
* Reading from the buffer
************************************************************************/
static int read(struct creole_reader *r)
{
if (r->left == 0)
return -1;
r->left--;
return *r->p++;
}
static int read_eof(struct creole_reader *r)
{
return r->left == 0;
}
/*************************************************************************
* Pseudo-UTF-8 lexing
*
* Pseudo-UTF-8 is based off of UTF-8 but adds more
* bytes and allows (requires!) overlong encodings.
*
* Possible values:
* 0xxxxxxx (7 bits)
* 110HHHHx 10xxxxxx (11 bits)
* 1110HHHH 10xxxxxx 10xxxxxx (16 bits)
* 11110HHH 10Hxxxxx 10xxxxxx 10xxxxxx (21 bits)
* 111110HH 10HHxxxx 10xxxxxx 10xxxxxx 10xxxxxx (26 bits)
* 1111110H 10HHHxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx (31 bits)
* 11111110 10HHHHxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx (36 bits)
* 10xxxxxx
************************************************************************/
/* A Psuedo-UTF-8 sequence can be either
*
* * A 1 byte sequence, where the lower 7 bits are the encoded
* * word (no high bits).
* * A multi-byte sequence where the 4 MSB are flags, and the
* * lower bits are the encoded word.
*/
#define MAX_HIGH_BITS 15
struct word {
int len;
int high_bits;
creole_word word;
};
/* Decode a set of continuation bytes directly into the word. This assumes
* that each continuation byte contains no high words.
*/
static int read_continue(struct creole_reader *r, struct word *w,
int to_read)
{
int i;
int r_ret;
unsigned char c;
for (i = 0; i < to_read; i++) {
r_ret = read(r);
if (r_ret < 0) {
return 0;
}
/* Characters might not be 8 bits! */
c = (unsigned char)(r_ret & 0xFF);
if (c >> 6 != 0x2) {
return 0;
}
w->word = (w->word << 6) | (c & 0x3F);
}
return 1;
}
/* Start bytes must be treated differently. Depending on the scenario,
* start bytes will contain parts of the encoded word and high-bit flags.
* In some cases, not all of the high-bit flags are part of the start
* byte.
*/
#define START_BYTE_NUM 7
static int parse_start_byte(unsigned char c, struct word *w)
{
static const struct {
/* The algorithm compares the mask to the start byte
* by shifting both to the right by the amount of 'x's
* (blank spaces). The array is arranged in reverse
* order so that the index indicates the amount of
* bits to shift.
*/
unsigned char mask;
/* The word bits, if they exist, always start from the
* LSB, so there is no need to shift the bits away. The
* word_mask gets the low bits. If there are no bits, set
* to 0.
*/
unsigned char word_mask;
/* The high bits may not start from the LSB. There needs
* to be a shift to get the bits to the LSB, and a mask
* to discard the higher bits.
*/
unsigned char high_bit_mask;
int high_bit_shift;
/* The amount of NORMAL continuation bytes to read.
* This does NOT include continuation bytes that have
* high-bit flags in them.
*/
int to_read;
} start_data[START_BYTE_NUM-1] = {
{0xFE, 0x00, 0x0, 0, 5}, /* 11111110 */
{0xFC, 0x00, 0x1, 0, 4}, /* 1111110x */
{0xF8, 0x00, 0x3, 0, 3}, /* 111110xx */
{0xF0, 0x00, 0x7, 0, 2}, /* 11110xxx */
{0xE0, 0x00, 0xF, 0, 2}, /* 1110xxxx */
{0xC0, 0x01, 0xF, 1, 1} /* 110xxxxx */
};
int i;
for (i = 0; i < START_BYTE_NUM-1; i++) {
if (c >> i == start_data[i].mask >> i) {
w->len = START_BYTE_NUM - i;
w->word = c & start_data[i].word_mask;
w->high_bits = (c >> start_data[i].high_bit_shift)
& start_data[i].high_bit_mask;
return start_data[i].to_read;
}
}
/* i == 7 */
if (c >> 7 == 0) {
w->len = 1;
w->word = c;
w->high_bits = 0;
return 0;
}
return -1;
}
/* This parses the first continuation byte if it is special. */
#define SPECIAL_CONTINUE_BYTE_NUM (START_BYTE_NUM - 3)
static int parse_special_byte(unsigned char c, struct word *w)
{
/* The index denotes the amount of high bits that were in
* the start byte. This is the amount that the stored value
* must be shifted.
*
* The amount of bits that must be shifted out in the continue
* byte increase with the index. The amount shifted is (i + 2).
*
* Each value stored in the array is the mask applied after
* shifting the continue byte bits.
*/
static const unsigned char mask[SPECIAL_CONTINUE_BYTE_NUM] = {
0x1, /* 11110HHH 10Hxxxxx */
0x3, /* 111110HH 10HHxxxx */
0x7, /* 1111110H 10HHHxxx */
0xF /* 11111110 10HHHHxx */
};
static const unsigned char wordmask[SPECIAL_CONTINUE_BYTE_NUM] = {
0x1F, 0xF, 0x7, 0x3
};
int i = w->len - 4;
if (i >= SPECIAL_CONTINUE_BYTE_NUM)
return 0;
w->high_bits = (w->high_bits << (i + 1)) | ((c >> (5 - i)) & mask[i]);
w->word = c & wordmask[i];
return 1;
}
/* Parse an entire Pseudo-UTF8 sequence. */
static int decode_seq(struct creole_reader *r, struct word *w)
{
int r_ret;
int to_read;
w->high_bits = 0;
r_ret = read(r);
if (r_ret < 0)
return 0;
to_read = parse_start_byte((unsigned char)(r_ret & 0xFF), w);
if (to_read < 0)
return 0;
/* If to_read is not one less than w->len, that means there are
* high bits in the first continuation byte.
*/
if (w->len - to_read > 1) {
r_ret = read(r);
if (r_ret < 0)
return 0;
if (!parse_special_byte((unsigned char)(r_ret & 0xFF), w))
return 0;
}
return read_continue(r, w, to_read);
}
int creole_encode(creole_word i, unsigned encode_to, unsigned high_bits,
unsigned char buf[7])
{
static const struct {
creole_word max;
unsigned char b1_mask;
int high_bit_shift_b1;
int high_bit_shift_to_right_b1;
int data_shift_b1;
int high_bit_mask_b2;
int high_bit_shift_b2;
unsigned char b2_data_mask;
} d[] = {
{0x7F, 0xC0, 0, 1, 6, 0x0, 0, 0x3F}, /* 2 */
{0xFFF, 0xE0, 0, 0, 12, 0x0, 0, 0x3F}, /* 3 */
{0x1FFFF, 0xF0, 1, 0, 17, 0x1, 5, 0x1F}, /* 4 */
{0x3FFFFF, 0xF8, 2, 0, 22, 0x3, 4, 0x0F}, /* 5 */
{0x7FFFFFF, 0xFC, 3, 0, 27, 0x7, 3, 0x07}, /* 6 */
{0xFFFFFFFF, 0xFE, 4, 0, 32, 0xF, 2, 0x03} /* 7 */
};
int lb;
unsigned j;
if (encode_to > 8)
return 0;
if (encode_to == 1) {
if (i < 0x80) {
buf[0] = i;
return 1;
}
return 0;
}
lb = encode_to - 2;
if (i > d[lb].max) {
return 0;
}
buf[0] = (d[lb].b1_mask | (high_bits >> d[lb].high_bit_shift_b1
<< d[lb].high_bit_shift_to_right_b1));
/* shifts greater than or equal to the bit size of a type are
* undefined. Data in the first byte is always aligned with the LSB.
*/
if (d[lb].data_shift_b1 < sizeof(i) * CHAR_BIT)
buf[0] |= i >> d[lb].data_shift_b1;
buf[1] = 0x80 | ((high_bits & d[lb].high_bit_mask_b2)
<< d[lb].high_bit_shift_b2)
| ((i >> ((encode_to - 2) * 6))
& d[lb].b2_data_mask);
for (j = 2; j < encode_to; j++) {
buf[j] = 0x80 | ((i >> ((encode_to - j - 1) * 6)) & 0x3F);
}
return 1;
}
/*************************************************************************
* Parsing instructions
*
* This parses an entire instruction, which is
* a single byte sequence,
* zero or more multibyte sequences,
* one single byte of all zeros.
*************************************************************************/
enum creole_compiler_ret
creole_parse_line(struct creole_ins *ins, struct creole_reader *r)
{
struct word w = {0};
unsigned arg = 0;
if (!decode_seq(r, &w))
return CREOLE_OPCODE_READ_ERROR;
ins->opcode = w.word;
if (w.word >= CREOLE_ARG_TYPE_LEN || w.len != 1)
return CREOLE_OPCODE_MALFORMED;
for (arg = 0; arg < opcode_info[ins->opcode].arglen; arg++) {
if (!decode_seq(r, &w))
return CREOLE_ARG_READ_ERROR;
if (w.len == 1)
return CREOLE_ARG_MALFORMED;
ins->w[arg] = w.word;
ins->w_flags[arg] = w.high_bits;
}
if (!decode_seq(r, &w))
return CREOLE_LAST_READ_ERROR;
if (w.word != 0 || w.len != 1)
return CREOLE_LAST_MALFORMED;
return CREOLE_COMPILE_OK;
}
#if 0
/**************************************************************************
* High level compiling interface
*************************************************************************/
static void clear_instruction(struct creole_env *env,
struct creole_ins *ins)
{
memset(ins, 0, sizeof(ins));
env->prgptr--;
}
static int typecheck(enum creole_word_flag fl, enum creole_arg_type typ)
{
switch (typ) {
case TYPE_NONE: return 0;
case TYPE_IMM: return fl == CREOLE_IMMEDIATE;
case TYPE_REG: return fl == CREOLE_REGISTER;
case TYPE_VAL: return fl == CREOLE_IMMEDIATE
| fl == CREOLE_REGISTER;
case TYPE_LAB: return fl == CREOLE_IMMEDIATE;
default: return 0;
}
}
static enum creole_compiler_ret typecheck_ins(struct creole_env *env,
struct creole_ins *ins)
{
unsigned i;
for (i = 0; i < opcode_info[env->opcode].arglen; i++) {
if (!typecheck(ins->w[i],
opcode_info[env->opcode].argtype[i]))
return CREOLE_TYPE_ERROR;
}
return CREOLE_COMPILE_OK;
}
static enum creole_compiler_ret
handle_compiletime_immediate(struct creole_env *env,
struct creole_ins *ins)
{
switch (ins->opcode) {
case CREOLE_CLB:
if (ins->w[0] >= ins->lablen)
return CREOLE_LABEL_OVERFLOW;
ins->lab[ins->w[0]] = env->prgptr;
/* Delete instruction because it is a compile time
* instruction. Place next instruction in its place. */
clear_instruction(env, ins);
return CREOLE_COMPILE_OK;
case CREOLE_NOOP:
clear_instruction(env, ins);
return CREOLE_COMPILE_OK;
default:
return typecheck_ins(env, ins);
}
}
int creole_compile(struct creole_env *env, struct creole_reader *r)
{
struct creole_ins *cur_ins = env->prg;
int rcode;
while (env->prgptr < env->prglen) {
if (!creole_parse_line(cur_ins, r))
return CREOLE_PARSE_ERROR;
/* Increase prgptr here. If the instruction is a compile
* time instruction, then this will be decremented since
* the instruction will not be executed.
*/
env->prgptr++;
rcode = handle_compiletime_immediate(env, cur_ins);
if (rcode != CREOLE_COMPILE_OK)
return rcode;
if (read_eof(r))
break;
cur_ins += 1;
}
if (env->prgptr == env->prglen && *line)
return CREOLE_PROGRAM_OVERFLOW;
env->prgend = env->prgptr;
env->prgptr = 0;
return CREOLE_COMPILE_OK;
}
static creole_word read_word(struct creole_ins *ins, int i)
{
if (env->w_flags[i] == CREOLE_REGISTER)
return env->reg[env->w[i]];
else
return env->w[i];
}
int creole_step(struct creole_env *env)
{
struct creole_ins *ins = env->prg + env->prgptr;
creole_word a1, a2;
if (env->prgptr == env->prgend)
return CREOLE_STEP_STOP;
env->prgptr++;
switch (ins->opcode) {
case CREOLE_PUSH:
if (env->stkptr == env->stklen)
return CREOLE_STACK_OVERFLOW;
env->stk[env->stkptr++] = env->reg[env->w[0]];
break;
case CREOLE_POP:
if (env->stkptr == 0)
return CREOLE_STACK_OVERFLOW;
env->reg[env->w[0]] = env->stk[--env->stkptr];
break;
case CREOLE_ADD:
a1 = read_word(ins, 1);
a2 = read_word(ins, 2);
env->reg[env->w[0]] = a1 + a2;
break;
case CREOLE_MUL:
a1 = read_word(ins, 1);
a2 = read_word(ins, 2);
env->reg[env->w[0]] = a1 * a2;
break;
case CREOLE_DIV:
a1 = read_word(ins, 1);
a2 = read_word(ins, 2);
env->reg[env->w[0]] = a1 / a2;
break;
case CREOLE_JL:
a1 = read_word(ins, 1);
a2 = read_word(ins, 2);
if (a1 < a2)
env->prgptr = env->lab[env->w[0]];
break;
case SYS:
a1 = read_word(ins, 1);
/* do syscall */
break;
}
}
#endif
|