[AD] UTF-8 patch

[ Thread Index | Date Index | More lists.liballeg.org/allegro-developers Archives ]


This patch simplifies the UTF-8 encoding functions. Makes it a bit faster and 
easier to read.
Index: src/unicode.c
===================================================================
--- src/unicode.c	(revision 5814)
+++ src/unicode.c	(working copy)
@@ -376,37 +376,29 @@
  */
 static int utf8_setc(char *s, int c)
 {
-   int size, bits, b, i;
-
-   if (c < 128) {
-      *s = c;
+   if (c<=0x7F) {
+      s[0] = c;
       return 1;
    }
-
-   bits = 7;
-   while (c >= (1<<bits))
-      bits++;
-
-   size = 2;
-   b = 11;
-
-   while (b < bits) {
-      size++;
-      b += 5;
+   if (c<=0x7FF) {
+      s[0] = 0xC0 | (c>>6);
+      s[1] = 0x80 | (c&0x3F);
+      return 2;
    }
-
-   b -= (7-size);
-   s[0] = c>>b;
-
-   for (i=0; i<size; i++)
-      s[0] |= (0x80>>i);
-
-   for (i=1; i<size; i++) {
-      b -= 6;
-      s[i] = 0x80 | ((c>>b)&0x3F);
+   if (c<=0xFFFF) {
+      s[0] = 0xE0 | (c>>12);
+      s[1] = 0x80 | ((c>>6)&0x3F);
+      s[2] = 0x80 | (c&0x3F);
+      return 3;
    }
-
-   return size;
+   if (c<=0x10FFFF) {
+      s[0] = 0xF0 | (c>>18);
+      s[1] = 0x80 | ((c>>12)&0x3F);
+      s[2] = 0x80 | ((c>>6)&0x3F);
+      s[3] = 0x80 | (c&0x3F);
+      return 4;
+   }
+   return 0;
 }
 
 
@@ -434,24 +426,15 @@
  */
 static int utf8_cwidth(int c)
 {
-   int size, bits, b;
-
-   if (c < 128)
+   if (c<=0x7F)
       return 1;
-
-   bits = 7;
-   while (c >= (1<<bits))
-      bits++;
-
-   size = 2;
-   b = 11;
-
-   while (b < bits) {
-      size++;
-      b += 5;
-   }
-
-   return size;
+   if (c<=0x7FF)
+      return 2;
+   if (c<=0xFFFF)
+      return 3;
+   if (c<=0x10FFFF)
+      return 4;
+   return 0;
 }
 
 


Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/