diff options
| author | 2023-02-07 06:19:40 +0000 | |
|---|---|---|
| committer | 2023-02-07 06:19:40 +0000 | |
| commit | 72395822512cb3b7ef590e772dbe0c046c999249 (patch) | |
| tree | a7611e16491af407fb51081178740cb545a451a0 /creole.c | |
| parent | test_encode_decode: add messages (diff) | |
test encode and decode
Diffstat (limited to 'creole.c')
| -rw-r--r-- | creole.c | 52 |
1 files changed, 33 insertions, 19 deletions
@@ -101,13 +101,15 @@ static int read_continue(struct creole_reader *r, struct word *w, for (i = 0; i < to_read; i++) { r_ret = read(r); - if (r_ret < 0) + if (r_ret < 0) { return 0; + } /* Characters might not be 8 bits! */ c = (unsigned char)(r_ret & 0xFF); - if (c >> 6 != 0x2) + if (c >> 6 != 0x2) { return 0; - w->word = w->word << 6 | (c & 0x3F); + } + w->word = (w->word << 6) | (c & 0x3F); } return 1; @@ -150,12 +152,12 @@ static int parse_start_byte(unsigned char c, struct word *w) */ int to_read; } start_data[START_BYTE_NUM-1] = { - {0xFE, 0x00, 0, 0x0, 5}, /* 11111110 */ - {0xFC, 0x00, 0, 0x1, 4}, /* 1111110x */ - {0xF8, 0x00, 0, 0x3, 3}, /* 111110xx */ - {0xF0, 0x00, 0, 0x7, 2}, /* 11110xxx */ - {0xE0, 0x00, 0, 0xF, 2}, /* 1110xxxx */ - {0xC0, 0x01, 1, 0xF, 1} /* 110xxxxx */ + {0xFE, 0x00, 0x0, 0, 5}, /* 11111110 */ + {0xFC, 0x00, 0x1, 0, 4}, /* 1111110x */ + {0xF8, 0x00, 0x3, 0, 3}, /* 111110xx */ + {0xF0, 0x00, 0x7, 0, 2}, /* 11110xxx */ + {0xE0, 0x00, 0xF, 0, 2}, /* 1110xxxx */ + {0xC0, 0x01, 0xF, 1, 1} /* 110xxxxx */ }; int i; @@ -182,7 +184,7 @@ static int parse_start_byte(unsigned char c, struct word *w) /* This parses the first continuation byte if it is special. */ #define SPECIAL_CONTINUE_BYTE_NUM (START_BYTE_NUM - 3) -static void parse_special_byte(unsigned char c, struct word *w) +static int parse_special_byte(unsigned char c, struct word *w) { /* The index denotes the amount of high bits that were in * the start byte. This is the amount that the stored value @@ -195,13 +197,22 @@ static void parse_special_byte(unsigned char c, struct word *w) * shifting the continue byte bits. */ static const unsigned char mask[SPECIAL_CONTINUE_BYTE_NUM] = { - 0xF, /* 1111110 10HHHHxx */ - 0x7, /* 111110H 10HHHxxx */ - 0x3, /* 11110HH 10HHxxxx */ - 0x1 /* 1110HHH 10Hxxxxx */ + 0x1, /* 11110HHH 10Hxxxxx */ + 0x3, /* 111110HH 10HHxxxx */ + 0x7, /* 1111110H 10HHHxxx */ + 0xF /* 11111110 10HHHHxx */ }; - int i = w->len - START_BYTE_NUM; - w->high_bits = (w->high_bits << i) | ((c >> (2 + i)) & mask[i]); + static const unsigned char wordmask[SPECIAL_CONTINUE_BYTE_NUM] = { + 0x1F, 0xF, 0x7, 0x3 + }; + + int i = w->len - 4; + if (i >= SPECIAL_CONTINUE_BYTE_NUM) + return 0; + + w->high_bits = (w->high_bits << i) | ((c >> (5 - i)) & mask[i]); + w->word = c & wordmask[i]; + return 1; } /* Parse an entire Pseudo-UTF8 sequence. */ @@ -209,6 +220,7 @@ static int decode_seq(struct creole_reader *r, struct word *w) { int r_ret; int to_read; + w->high_bits = 0; r_ret = read(r); if (r_ret < 0) @@ -225,7 +237,8 @@ static int decode_seq(struct creole_reader *r, struct word *w) r_ret = read(r); if (r_ret < 0) return 0; - parse_special_byte((unsigned char)(r_ret & 0xFF), w); + if (!parse_special_byte((unsigned char)(r_ret & 0xFF), w)) + return 0; } return read_continue(r, w, to_read); @@ -252,7 +265,7 @@ int creole_encode(creole_word i, unsigned encode_to, unsigned high_bits, {0x7FFFFFF, 0xFC, 3, 0, 27, 0x7, 3, 0x07}, /* 6 */ {0xFFFFFFFF, 0xFE, 4, 0, 32, 0xF, 2, 0x03} /* 7 */ }; - unsigned lb; + int lb; unsigned j; if (encode_to > 8) @@ -267,8 +280,9 @@ int creole_encode(creole_word i, unsigned encode_to, unsigned high_bits, } lb = encode_to - 2; - if (i > d[lb].max) + if (i > d[lb].max) { return 0; + } buf[0] = (d[lb].b1_mask | (high_bits >> d[lb].high_bit_shift_b1 << d[lb].high_bit_shift_to_right_b1)); |
