test encode and decode

This commit is contained in:
Peter McGoron 2023-02-07 06:19:40 +00:00
parent c5cff40bbf
commit 7239582251
3 changed files with 82 additions and 22 deletions

View File

@ -1,4 +1,4 @@
test_encode_decode: test_encode_decode.c creole.c creole.h
$(CC) test_encode_decode.c -Wall -pedantic -std=c89 -O2 -fopenmp -o test_encode_decode
$(CC) test_encode_decode.c -Wall -pedantic -std=c89 -g -fopenmp -o test_encode_decode
test_creole: test_creole.c creole.c creole.h greatest.h
$(CC) -g test_creole.c -Wall -pedantic -std=c89 -o test_creole

View File

@ -101,13 +101,15 @@ static int read_continue(struct creole_reader *r, struct word *w,
for (i = 0; i < to_read; i++) {
r_ret = read(r);
if (r_ret < 0)
if (r_ret < 0) {
return 0;
}
/* Characters might not be 8 bits! */
c = (unsigned char)(r_ret & 0xFF);
if (c >> 6 != 0x2)
if (c >> 6 != 0x2) {
return 0;
w->word = w->word << 6 | (c & 0x3F);
}
w->word = (w->word << 6) | (c & 0x3F);
}
return 1;
@ -150,12 +152,12 @@ static int parse_start_byte(unsigned char c, struct word *w)
*/
int to_read;
} start_data[START_BYTE_NUM-1] = {
{0xFE, 0x00, 0, 0x0, 5}, /* 11111110 */
{0xFC, 0x00, 0, 0x1, 4}, /* 1111110x */
{0xF8, 0x00, 0, 0x3, 3}, /* 111110xx */
{0xF0, 0x00, 0, 0x7, 2}, /* 11110xxx */
{0xE0, 0x00, 0, 0xF, 2}, /* 1110xxxx */
{0xC0, 0x01, 1, 0xF, 1} /* 110xxxxx */
{0xFE, 0x00, 0x0, 0, 5}, /* 11111110 */
{0xFC, 0x00, 0x1, 0, 4}, /* 1111110x */
{0xF8, 0x00, 0x3, 0, 3}, /* 111110xx */
{0xF0, 0x00, 0x7, 0, 2}, /* 11110xxx */
{0xE0, 0x00, 0xF, 0, 2}, /* 1110xxxx */
{0xC0, 0x01, 0xF, 1, 1} /* 110xxxxx */
};
int i;
@ -182,7 +184,7 @@ static int parse_start_byte(unsigned char c, struct word *w)
/* This parses the first continuation byte if it is special. */
#define SPECIAL_CONTINUE_BYTE_NUM (START_BYTE_NUM - 3)
static void parse_special_byte(unsigned char c, struct word *w)
static int parse_special_byte(unsigned char c, struct word *w)
{
/* The index denotes the amount of high bits that were in
* the start byte. This is the amount that the stored value
@ -195,13 +197,22 @@ static void parse_special_byte(unsigned char c, struct word *w)
* shifting the continue byte bits.
*/
static const unsigned char mask[SPECIAL_CONTINUE_BYTE_NUM] = {
0xF, /* 1111110 10HHHHxx */
0x7, /* 111110H 10HHHxxx */
0x3, /* 11110HH 10HHxxxx */
0x1 /* 1110HHH 10Hxxxxx */
0x1, /* 11110HHH 10Hxxxxx */
0x3, /* 111110HH 10HHxxxx */
0x7, /* 1111110H 10HHHxxx */
0xF /* 11111110 10HHHHxx */
};
int i = w->len - START_BYTE_NUM;
w->high_bits = (w->high_bits << i) | ((c >> (2 + i)) & mask[i]);
static const unsigned char wordmask[SPECIAL_CONTINUE_BYTE_NUM] = {
0x1F, 0xF, 0x7, 0x3
};
int i = w->len - 4;
if (i >= SPECIAL_CONTINUE_BYTE_NUM)
return 0;
w->high_bits = (w->high_bits << i) | ((c >> (5 - i)) & mask[i]);
w->word = c & wordmask[i];
return 1;
}
/* Parse an entire Pseudo-UTF8 sequence. */
@ -209,6 +220,7 @@ static int decode_seq(struct creole_reader *r, struct word *w)
{
int r_ret;
int to_read;
w->high_bits = 0;
r_ret = read(r);
if (r_ret < 0)
@ -225,7 +237,8 @@ static int decode_seq(struct creole_reader *r, struct word *w)
r_ret = read(r);
if (r_ret < 0)
return 0;
parse_special_byte((unsigned char)(r_ret & 0xFF), w);
if (!parse_special_byte((unsigned char)(r_ret & 0xFF), w))
return 0;
}
return read_continue(r, w, to_read);
@ -252,7 +265,7 @@ int creole_encode(creole_word i, unsigned encode_to, unsigned high_bits,
{0x7FFFFFF, 0xFC, 3, 0, 27, 0x7, 3, 0x07}, /* 6 */
{0xFFFFFFFF, 0xFE, 4, 0, 32, 0xF, 2, 0x03} /* 7 */
};
unsigned lb;
int lb;
unsigned j;
if (encode_to > 8)
@ -267,8 +280,9 @@ int creole_encode(creole_word i, unsigned encode_to, unsigned high_bits,
}
lb = encode_to - 2;
if (i > d[lb].max)
if (i > d[lb].max) {
return 0;
}
buf[0] = (d[lb].b1_mask | (high_bits >> d[lb].high_bit_shift_b1
<< d[lb].high_bit_shift_to_right_b1));

View File

@ -1,9 +1,10 @@
#include "creole.c"
#include <string.h>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include "creole.c"
#if 0
struct seq {
creole_word max;
unsigned encode_to;
@ -173,10 +174,55 @@ static void test_encode(void) {
tests[test](high_bits);
}
}
#endif
void encode_decode_byte_seq(creole_word max, unsigned encode_to, unsigned high_bits) {
unsigned char buf[7];
struct creole_reader r = {0};
struct word w;
creole_word i = 0;
for (;;) {
r.p = buf;
r.left = encode_to;
assert(creole_encode(i, encode_to, high_bits,
buf) == 1);
assert(decode_seq(&r, &w) == 1);
assert(w.len == encode_to);
if (w.high_bits != high_bits) {
printf("high bits %u != %u\n", w.high_bits, high_bits);
abort();
}
if (w.word != i) {
printf("word %X != %X\n", w.word, i);
abort();
}
if (i == max)
break;
i++;
}
}
static void test_encode_decode(void) {
unsigned high_bits = 0;
int encode_len;
creole_word maxima[6] = {0x7F, 0xFFF, 0x1FFFF, 0x3FFFFF, 0x7FFFFFF, 0xFFFFFFFF};
encode_decode_byte_seq(0x7F, 1, 0);
#pragma omp parallel for collapse(2) num_threads(8)
for (high_bits = 0; high_bits < 16; high_bits++) {
for (encode_len = 2; encode_len < 8; encode_len++)
encode_decode_byte_seq(maxima[encode_len - 2], encode_len, high_bits);
}
}
int main(void) {
printf("test encode\n");
test_encode();
test_encode_decode();
printf("finished\n");
return 0;