Re: [AD] UTF-8 patch

[ Thread Index | Date Index | More lists.liballeg.org/allegro-developers Archives ]


Could I request that you add some comments? Bit manipulation is a hard
to follow..

Chris wrote:
> This patch simplifies the UTF-8 encoding functions. Makes it a bit faster and 
> easier to read.
>   
> ------------------------------------------------------------------------
>
> Index: src/unicode.c
> ===================================================================
> --- src/unicode.c	(revision 5814)
> +++ src/unicode.c	(working copy)
> @@ -376,37 +376,29 @@
>   */
>  static int utf8_setc(char *s, int c)
>  {
> -   int size, bits, b, i;
> -
> -   if (c < 128) {
> -      *s = c;
> +   if (c<=0x7F) {
> +      s[0] = c;
>        return 1;
>     }
> -
> -   bits = 7;
> -   while (c >= (1<<bits))
> -      bits++;
> -
> -   size = 2;
> -   b = 11;
> -
> -   while (b < bits) {
> -      size++;
> -      b += 5;
> +   if (c<=0x7FF) {
> +      s[0] = 0xC0 | (c>>6);
> +      s[1] = 0x80 | (c&0x3F);
> +      return 2;
>     }
> -
> -   b -= (7-size);
> -   s[0] = c>>b;
> -
> -   for (i=0; i<size; i++)
> -      s[0] |= (0x80>>i);
> -
> -   for (i=1; i<size; i++) {
> -      b -= 6;
> -      s[i] = 0x80 | ((c>>b)&0x3F);
> +   if (c<=0xFFFF) {
> +      s[0] = 0xE0 | (c>>12);
> +      s[1] = 0x80 | ((c>>6)&0x3F);
> +      s[2] = 0x80 | (c&0x3F);
> +      return 3;
>     }
> -
> -   return size;
> +   if (c<=0x10FFFF) {
> +      s[0] = 0xF0 | (c>>18);
> +      s[1] = 0x80 | ((c>>12)&0x3F);
> +      s[2] = 0x80 | ((c>>6)&0x3F);
> +      s[3] = 0x80 | (c&0x3F);
> +      return 4;
> +   }
> +   return 0;
>  }
>  
>  
> @@ -434,24 +426,15 @@
>   */
>  static int utf8_cwidth(int c)
>  {
> -   int size, bits, b;
> -
> -   if (c < 128)
> +   if (c<=0x7F)
>        return 1;
> -
> -   bits = 7;
> -   while (c >= (1<<bits))
> -      bits++;
> -
> -   size = 2;
> -   b = 11;
> -
> -   while (b < bits) {
> -      size++;
> -      b += 5;
> -   }
> -
> -   return size;
> +   if (c<=0x7FF)
> +      return 2;
> +   if (c<=0xFFFF)
> +      return 3;
> +   if (c<=0x10FFFF)
> +      return 4;
> +   return 0;
>  }
>  
>  
>   




Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/