[ Thread Index |
Date Index
| More lists.liballeg.org/allegro-developers Archives
]
Could I request that you add some comments? Bit manipulation is a hard
to follow..
Chris wrote:
> This patch simplifies the UTF-8 encoding functions. Makes it a bit faster and
> easier to read.
>
> ------------------------------------------------------------------------
>
> Index: src/unicode.c
> ===================================================================
> --- src/unicode.c (revision 5814)
> +++ src/unicode.c (working copy)
> @@ -376,37 +376,29 @@
> */
> static int utf8_setc(char *s, int c)
> {
> - int size, bits, b, i;
> -
> - if (c < 128) {
> - *s = c;
> + if (c<=0x7F) {
> + s[0] = c;
> return 1;
> }
> -
> - bits = 7;
> - while (c >= (1<<bits))
> - bits++;
> -
> - size = 2;
> - b = 11;
> -
> - while (b < bits) {
> - size++;
> - b += 5;
> + if (c<=0x7FF) {
> + s[0] = 0xC0 | (c>>6);
> + s[1] = 0x80 | (c&0x3F);
> + return 2;
> }
> -
> - b -= (7-size);
> - s[0] = c>>b;
> -
> - for (i=0; i<size; i++)
> - s[0] |= (0x80>>i);
> -
> - for (i=1; i<size; i++) {
> - b -= 6;
> - s[i] = 0x80 | ((c>>b)&0x3F);
> + if (c<=0xFFFF) {
> + s[0] = 0xE0 | (c>>12);
> + s[1] = 0x80 | ((c>>6)&0x3F);
> + s[2] = 0x80 | (c&0x3F);
> + return 3;
> }
> -
> - return size;
> + if (c<=0x10FFFF) {
> + s[0] = 0xF0 | (c>>18);
> + s[1] = 0x80 | ((c>>12)&0x3F);
> + s[2] = 0x80 | ((c>>6)&0x3F);
> + s[3] = 0x80 | (c&0x3F);
> + return 4;
> + }
> + return 0;
> }
>
>
> @@ -434,24 +426,15 @@
> */
> static int utf8_cwidth(int c)
> {
> - int size, bits, b;
> -
> - if (c < 128)
> + if (c<=0x7F)
> return 1;
> -
> - bits = 7;
> - while (c >= (1<<bits))
> - bits++;
> -
> - size = 2;
> - b = 11;
> -
> - while (b < bits) {
> - size++;
> - b += 5;
> - }
> -
> - return size;
> + if (c<=0x7FF)
> + return 2;
> + if (c<=0xFFFF)
> + return 3;
> + if (c<=0x10FFFF)
> + return 4;
> + return 0;
> }
>
>
>