From 72395822512cb3b7ef590e772dbe0c046c999249 Mon Sep 17 00:00:00 2001 From: Peter McGoron Date: Tue, 7 Feb 2023 06:19:40 +0000 Subject: [PATCH] test encode and decode --- Makefile | 2 +- creole.c | 52 ++++++++++++++++++++++++++++---------------- test_encode_decode.c | 50 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 82 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index 8d544a2..49c74b9 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ test_encode_decode: test_encode_decode.c creole.c creole.h - $(CC) test_encode_decode.c -Wall -pedantic -std=c89 -O2 -fopenmp -o test_encode_decode + $(CC) test_encode_decode.c -Wall -pedantic -std=c89 -g -fopenmp -o test_encode_decode test_creole: test_creole.c creole.c creole.h greatest.h $(CC) -g test_creole.c -Wall -pedantic -std=c89 -o test_creole diff --git a/creole.c b/creole.c index 58b6c83..3da1e3e 100644 --- a/creole.c +++ b/creole.c @@ -101,13 +101,15 @@ static int read_continue(struct creole_reader *r, struct word *w, for (i = 0; i < to_read; i++) { r_ret = read(r); - if (r_ret < 0) + if (r_ret < 0) { return 0; + } /* Characters might not be 8 bits! */ c = (unsigned char)(r_ret & 0xFF); - if (c >> 6 != 0x2) + if (c >> 6 != 0x2) { return 0; - w->word = w->word << 6 | (c & 0x3F); + } + w->word = (w->word << 6) | (c & 0x3F); } return 1; @@ -150,12 +152,12 @@ static int parse_start_byte(unsigned char c, struct word *w) */ int to_read; } start_data[START_BYTE_NUM-1] = { - {0xFE, 0x00, 0, 0x0, 5}, /* 11111110 */ - {0xFC, 0x00, 0, 0x1, 4}, /* 1111110x */ - {0xF8, 0x00, 0, 0x3, 3}, /* 111110xx */ - {0xF0, 0x00, 0, 0x7, 2}, /* 11110xxx */ - {0xE0, 0x00, 0, 0xF, 2}, /* 1110xxxx */ - {0xC0, 0x01, 1, 0xF, 1} /* 110xxxxx */ + {0xFE, 0x00, 0x0, 0, 5}, /* 11111110 */ + {0xFC, 0x00, 0x1, 0, 4}, /* 1111110x */ + {0xF8, 0x00, 0x3, 0, 3}, /* 111110xx */ + {0xF0, 0x00, 0x7, 0, 2}, /* 11110xxx */ + {0xE0, 0x00, 0xF, 0, 2}, /* 1110xxxx */ + {0xC0, 0x01, 0xF, 1, 1} /* 110xxxxx */ }; int i; @@ -182,7 +184,7 @@ static int parse_start_byte(unsigned char c, struct word *w) /* This parses the first continuation byte if it is special. */ #define SPECIAL_CONTINUE_BYTE_NUM (START_BYTE_NUM - 3) -static void parse_special_byte(unsigned char c, struct word *w) +static int parse_special_byte(unsigned char c, struct word *w) { /* The index denotes the amount of high bits that were in * the start byte. This is the amount that the stored value @@ -195,13 +197,22 @@ static void parse_special_byte(unsigned char c, struct word *w) * shifting the continue byte bits. */ static const unsigned char mask[SPECIAL_CONTINUE_BYTE_NUM] = { - 0xF, /* 1111110 10HHHHxx */ - 0x7, /* 111110H 10HHHxxx */ - 0x3, /* 11110HH 10HHxxxx */ - 0x1 /* 1110HHH 10Hxxxxx */ + 0x1, /* 11110HHH 10Hxxxxx */ + 0x3, /* 111110HH 10HHxxxx */ + 0x7, /* 1111110H 10HHHxxx */ + 0xF /* 11111110 10HHHHxx */ }; - int i = w->len - START_BYTE_NUM; - w->high_bits = (w->high_bits << i) | ((c >> (2 + i)) & mask[i]); + static const unsigned char wordmask[SPECIAL_CONTINUE_BYTE_NUM] = { + 0x1F, 0xF, 0x7, 0x3 + }; + + int i = w->len - 4; + if (i >= SPECIAL_CONTINUE_BYTE_NUM) + return 0; + + w->high_bits = (w->high_bits << i) | ((c >> (5 - i)) & mask[i]); + w->word = c & wordmask[i]; + return 1; } /* Parse an entire Pseudo-UTF8 sequence. */ @@ -209,6 +220,7 @@ static int decode_seq(struct creole_reader *r, struct word *w) { int r_ret; int to_read; + w->high_bits = 0; r_ret = read(r); if (r_ret < 0) @@ -225,7 +237,8 @@ static int decode_seq(struct creole_reader *r, struct word *w) r_ret = read(r); if (r_ret < 0) return 0; - parse_special_byte((unsigned char)(r_ret & 0xFF), w); + if (!parse_special_byte((unsigned char)(r_ret & 0xFF), w)) + return 0; } return read_continue(r, w, to_read); @@ -252,7 +265,7 @@ int creole_encode(creole_word i, unsigned encode_to, unsigned high_bits, {0x7FFFFFF, 0xFC, 3, 0, 27, 0x7, 3, 0x07}, /* 6 */ {0xFFFFFFFF, 0xFE, 4, 0, 32, 0xF, 2, 0x03} /* 7 */ }; - unsigned lb; + int lb; unsigned j; if (encode_to > 8) @@ -267,8 +280,9 @@ int creole_encode(creole_word i, unsigned encode_to, unsigned high_bits, } lb = encode_to - 2; - if (i > d[lb].max) + if (i > d[lb].max) { return 0; + } buf[0] = (d[lb].b1_mask | (high_bits >> d[lb].high_bit_shift_b1 << d[lb].high_bit_shift_to_right_b1)); diff --git a/test_encode_decode.c b/test_encode_decode.c index 3b117cf..e165e1e 100644 --- a/test_encode_decode.c +++ b/test_encode_decode.c @@ -1,9 +1,10 @@ -#include "creole.c" #include #include #include #include +#include "creole.c" +#if 0 struct seq { creole_word max; unsigned encode_to; @@ -173,10 +174,55 @@ static void test_encode(void) { tests[test](high_bits); } } +#endif + +void encode_decode_byte_seq(creole_word max, unsigned encode_to, unsigned high_bits) { + unsigned char buf[7]; + struct creole_reader r = {0}; + struct word w; + creole_word i = 0; + + for (;;) { + r.p = buf; + r.left = encode_to; + assert(creole_encode(i, encode_to, high_bits, + buf) == 1); + + assert(decode_seq(&r, &w) == 1); + assert(w.len == encode_to); + if (w.high_bits != high_bits) { + printf("high bits %u != %u\n", w.high_bits, high_bits); + abort(); + } + + if (w.word != i) { + printf("word %X != %X\n", w.word, i); + abort(); + } + + if (i == max) + break; + i++; + } +} + +static void test_encode_decode(void) { + unsigned high_bits = 0; + int encode_len; + creole_word maxima[6] = {0x7F, 0xFFF, 0x1FFFF, 0x3FFFFF, 0x7FFFFFF, 0xFFFFFFFF}; + + encode_decode_byte_seq(0x7F, 1, 0); + +#pragma omp parallel for collapse(2) num_threads(8) + for (high_bits = 0; high_bits < 16; high_bits++) { + for (encode_len = 2; encode_len < 8; encode_len++) + encode_decode_byte_seq(maxima[encode_len - 2], encode_len, high_bits); + } +} int main(void) { printf("test encode\n"); - test_encode(); + test_encode_decode(); printf("finished\n"); return 0;