test encode and decode
This commit is contained in:
parent
c5cff40bbf
commit
7239582251
2
Makefile
2
Makefile
|
@ -1,4 +1,4 @@
|
||||||
test_encode_decode: test_encode_decode.c creole.c creole.h
|
test_encode_decode: test_encode_decode.c creole.c creole.h
|
||||||
$(CC) test_encode_decode.c -Wall -pedantic -std=c89 -O2 -fopenmp -o test_encode_decode
|
$(CC) test_encode_decode.c -Wall -pedantic -std=c89 -g -fopenmp -o test_encode_decode
|
||||||
test_creole: test_creole.c creole.c creole.h greatest.h
|
test_creole: test_creole.c creole.c creole.h greatest.h
|
||||||
$(CC) -g test_creole.c -Wall -pedantic -std=c89 -o test_creole
|
$(CC) -g test_creole.c -Wall -pedantic -std=c89 -o test_creole
|
||||||
|
|
52
creole.c
52
creole.c
|
@ -101,13 +101,15 @@ static int read_continue(struct creole_reader *r, struct word *w,
|
||||||
|
|
||||||
for (i = 0; i < to_read; i++) {
|
for (i = 0; i < to_read; i++) {
|
||||||
r_ret = read(r);
|
r_ret = read(r);
|
||||||
if (r_ret < 0)
|
if (r_ret < 0) {
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
/* Characters might not be 8 bits! */
|
/* Characters might not be 8 bits! */
|
||||||
c = (unsigned char)(r_ret & 0xFF);
|
c = (unsigned char)(r_ret & 0xFF);
|
||||||
if (c >> 6 != 0x2)
|
if (c >> 6 != 0x2) {
|
||||||
return 0;
|
return 0;
|
||||||
w->word = w->word << 6 | (c & 0x3F);
|
}
|
||||||
|
w->word = (w->word << 6) | (c & 0x3F);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -150,12 +152,12 @@ static int parse_start_byte(unsigned char c, struct word *w)
|
||||||
*/
|
*/
|
||||||
int to_read;
|
int to_read;
|
||||||
} start_data[START_BYTE_NUM-1] = {
|
} start_data[START_BYTE_NUM-1] = {
|
||||||
{0xFE, 0x00, 0, 0x0, 5}, /* 11111110 */
|
{0xFE, 0x00, 0x0, 0, 5}, /* 11111110 */
|
||||||
{0xFC, 0x00, 0, 0x1, 4}, /* 1111110x */
|
{0xFC, 0x00, 0x1, 0, 4}, /* 1111110x */
|
||||||
{0xF8, 0x00, 0, 0x3, 3}, /* 111110xx */
|
{0xF8, 0x00, 0x3, 0, 3}, /* 111110xx */
|
||||||
{0xF0, 0x00, 0, 0x7, 2}, /* 11110xxx */
|
{0xF0, 0x00, 0x7, 0, 2}, /* 11110xxx */
|
||||||
{0xE0, 0x00, 0, 0xF, 2}, /* 1110xxxx */
|
{0xE0, 0x00, 0xF, 0, 2}, /* 1110xxxx */
|
||||||
{0xC0, 0x01, 1, 0xF, 1} /* 110xxxxx */
|
{0xC0, 0x01, 0xF, 1, 1} /* 110xxxxx */
|
||||||
};
|
};
|
||||||
|
|
||||||
int i;
|
int i;
|
||||||
|
@ -182,7 +184,7 @@ static int parse_start_byte(unsigned char c, struct word *w)
|
||||||
|
|
||||||
/* This parses the first continuation byte if it is special. */
|
/* This parses the first continuation byte if it is special. */
|
||||||
#define SPECIAL_CONTINUE_BYTE_NUM (START_BYTE_NUM - 3)
|
#define SPECIAL_CONTINUE_BYTE_NUM (START_BYTE_NUM - 3)
|
||||||
static void parse_special_byte(unsigned char c, struct word *w)
|
static int parse_special_byte(unsigned char c, struct word *w)
|
||||||
{
|
{
|
||||||
/* The index denotes the amount of high bits that were in
|
/* The index denotes the amount of high bits that were in
|
||||||
* the start byte. This is the amount that the stored value
|
* the start byte. This is the amount that the stored value
|
||||||
|
@ -195,13 +197,22 @@ static void parse_special_byte(unsigned char c, struct word *w)
|
||||||
* shifting the continue byte bits.
|
* shifting the continue byte bits.
|
||||||
*/
|
*/
|
||||||
static const unsigned char mask[SPECIAL_CONTINUE_BYTE_NUM] = {
|
static const unsigned char mask[SPECIAL_CONTINUE_BYTE_NUM] = {
|
||||||
0xF, /* 1111110 10HHHHxx */
|
0x1, /* 11110HHH 10Hxxxxx */
|
||||||
0x7, /* 111110H 10HHHxxx */
|
0x3, /* 111110HH 10HHxxxx */
|
||||||
0x3, /* 11110HH 10HHxxxx */
|
0x7, /* 1111110H 10HHHxxx */
|
||||||
0x1 /* 1110HHH 10Hxxxxx */
|
0xF /* 11111110 10HHHHxx */
|
||||||
};
|
};
|
||||||
int i = w->len - START_BYTE_NUM;
|
static const unsigned char wordmask[SPECIAL_CONTINUE_BYTE_NUM] = {
|
||||||
w->high_bits = (w->high_bits << i) | ((c >> (2 + i)) & mask[i]);
|
0x1F, 0xF, 0x7, 0x3
|
||||||
|
};
|
||||||
|
|
||||||
|
int i = w->len - 4;
|
||||||
|
if (i >= SPECIAL_CONTINUE_BYTE_NUM)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
w->high_bits = (w->high_bits << i) | ((c >> (5 - i)) & mask[i]);
|
||||||
|
w->word = c & wordmask[i];
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Parse an entire Pseudo-UTF8 sequence. */
|
/* Parse an entire Pseudo-UTF8 sequence. */
|
||||||
|
@ -209,6 +220,7 @@ static int decode_seq(struct creole_reader *r, struct word *w)
|
||||||
{
|
{
|
||||||
int r_ret;
|
int r_ret;
|
||||||
int to_read;
|
int to_read;
|
||||||
|
w->high_bits = 0;
|
||||||
|
|
||||||
r_ret = read(r);
|
r_ret = read(r);
|
||||||
if (r_ret < 0)
|
if (r_ret < 0)
|
||||||
|
@ -225,7 +237,8 @@ static int decode_seq(struct creole_reader *r, struct word *w)
|
||||||
r_ret = read(r);
|
r_ret = read(r);
|
||||||
if (r_ret < 0)
|
if (r_ret < 0)
|
||||||
return 0;
|
return 0;
|
||||||
parse_special_byte((unsigned char)(r_ret & 0xFF), w);
|
if (!parse_special_byte((unsigned char)(r_ret & 0xFF), w))
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return read_continue(r, w, to_read);
|
return read_continue(r, w, to_read);
|
||||||
|
@ -252,7 +265,7 @@ int creole_encode(creole_word i, unsigned encode_to, unsigned high_bits,
|
||||||
{0x7FFFFFF, 0xFC, 3, 0, 27, 0x7, 3, 0x07}, /* 6 */
|
{0x7FFFFFF, 0xFC, 3, 0, 27, 0x7, 3, 0x07}, /* 6 */
|
||||||
{0xFFFFFFFF, 0xFE, 4, 0, 32, 0xF, 2, 0x03} /* 7 */
|
{0xFFFFFFFF, 0xFE, 4, 0, 32, 0xF, 2, 0x03} /* 7 */
|
||||||
};
|
};
|
||||||
unsigned lb;
|
int lb;
|
||||||
unsigned j;
|
unsigned j;
|
||||||
|
|
||||||
if (encode_to > 8)
|
if (encode_to > 8)
|
||||||
|
@ -267,8 +280,9 @@ int creole_encode(creole_word i, unsigned encode_to, unsigned high_bits,
|
||||||
}
|
}
|
||||||
|
|
||||||
lb = encode_to - 2;
|
lb = encode_to - 2;
|
||||||
if (i > d[lb].max)
|
if (i > d[lb].max) {
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
buf[0] = (d[lb].b1_mask | (high_bits >> d[lb].high_bit_shift_b1
|
buf[0] = (d[lb].b1_mask | (high_bits >> d[lb].high_bit_shift_b1
|
||||||
<< d[lb].high_bit_shift_to_right_b1));
|
<< d[lb].high_bit_shift_to_right_b1));
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
#include "creole.c"
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include "creole.c"
|
||||||
|
|
||||||
|
#if 0
|
||||||
struct seq {
|
struct seq {
|
||||||
creole_word max;
|
creole_word max;
|
||||||
unsigned encode_to;
|
unsigned encode_to;
|
||||||
|
@ -173,10 +174,55 @@ static void test_encode(void) {
|
||||||
tests[test](high_bits);
|
tests[test](high_bits);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void encode_decode_byte_seq(creole_word max, unsigned encode_to, unsigned high_bits) {
|
||||||
|
unsigned char buf[7];
|
||||||
|
struct creole_reader r = {0};
|
||||||
|
struct word w;
|
||||||
|
creole_word i = 0;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
r.p = buf;
|
||||||
|
r.left = encode_to;
|
||||||
|
assert(creole_encode(i, encode_to, high_bits,
|
||||||
|
buf) == 1);
|
||||||
|
|
||||||
|
assert(decode_seq(&r, &w) == 1);
|
||||||
|
assert(w.len == encode_to);
|
||||||
|
if (w.high_bits != high_bits) {
|
||||||
|
printf("high bits %u != %u\n", w.high_bits, high_bits);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (w.word != i) {
|
||||||
|
printf("word %X != %X\n", w.word, i);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == max)
|
||||||
|
break;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_encode_decode(void) {
|
||||||
|
unsigned high_bits = 0;
|
||||||
|
int encode_len;
|
||||||
|
creole_word maxima[6] = {0x7F, 0xFFF, 0x1FFFF, 0x3FFFFF, 0x7FFFFFF, 0xFFFFFFFF};
|
||||||
|
|
||||||
|
encode_decode_byte_seq(0x7F, 1, 0);
|
||||||
|
|
||||||
|
#pragma omp parallel for collapse(2) num_threads(8)
|
||||||
|
for (high_bits = 0; high_bits < 16; high_bits++) {
|
||||||
|
for (encode_len = 2; encode_len < 8; encode_len++)
|
||||||
|
encode_decode_byte_seq(maxima[encode_len - 2], encode_len, high_bits);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
printf("test encode\n");
|
printf("test encode\n");
|
||||||
test_encode();
|
test_encode_decode();
|
||||||
|
|
||||||
printf("finished\n");
|
printf("finished\n");
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Loading…
Reference in New Issue