[AD] utf16, file functions |
[ Thread Index |
Date Index
| More lists.liballeg.org/allegro-developers Archives
]
I made a patch to add utf16 conversion as requested on the bug tracker.
Is the API like you expected? Basically two functions:
ALLEGRO_USTR *al_ustr_new_from_utf16(const char *s, size_t size)
to go along with
ALLEGRO_USTR *al_ustr_new_from_buffer(const char *s, size_t size)
and
char *al_utf16_dup(const ALLEGRO_USTR *us)
to go along with
char *al_cstr_dup(const ALLEGRO_USTR *us)
There is no support for UTF-16BE and UTF-16LE. I assume filenames will
always be returned in native order already - and it's probably the only
use for those functions. Else it would be trivial to add 4 more
functions with explicit endianness...
I also wanted to replace the remaining uses of uconvert with this, but
found that I'm confused about the state of our files API. Specifically:
- What are src/win/wfile.c and src/unix/ufile.c? Are they superseded by
src/fshook_stdio.c and can be removed?
- What for is the addons/packfile directory? At first glance it looks
like it was accidentally created during the fshook merge and has a copy
of the old A4 API, so should it be removed?
--
Elias Pschernig <elias@xxxxxxxxxx>
Index: include/allegro5/utf8.h
===================================================================
--- include/allegro5/utf8.h (revision 11879)
+++ include/allegro5/utf8.h (working copy)
@@ -129,6 +129,10 @@
AL_FUNC(size_t, al_utf8_width, (int32_t c));
AL_FUNC(size_t, al_utf8_encode, (char s[], int32_t c));
+/* UTF-16 */
+AL_FUNC(ALLEGRO_USTR *, al_ustr_new_from_utf16, (const char *s, size_t size));
+AL_FUNC(char *, al_utf16_dup, (const ALLEGRO_USTR *us));
+
#ifdef __cplusplus
}
#endif
Index: src/utf8.c
===================================================================
--- src/utf8.c (revision 11879)
+++ src/utf8.c (working copy)
@@ -1005,4 +1005,105 @@
return 0;
}
+
+/* Function: al_utf16_width
+ */
+size_t al_utf16_width(int c)
+{
+ /* So we don't need to check for negative values nor use unsigned ints
+ * in the interface, which are a pain.
+ */
+ uint32_t uc = c;
+
+ /* We do not check for invalid code points. */
+ if (uc <= 0xffff)
+ return 2;
+ if (uc <= 0x10ffff)
+ return 4;
+
+ /* The rest are illegal. */
+ return 0;
+}
+
+
+/* Function: al_utf16_encode
+ */
+size_t al_utf16_encode(char s[], int32_t c)
+{
+ uint32_t uc = c;
+
+ if (uc <= 0xffff) {
+ /* Note: We always assume the native endianness here. */
+ ((uint16_t *)s)[0] = uc;
+ return 2;
+ }
+
+ if (uc <= 0x10ffff) {
+ uint32_t u_ = uc - 0x10000;
+ /* Note: We always assume the native endianness here. */
+ ((uint16_t *)s)[0] = 0xd800 | (u_ >> 10);
+ ((uint16_t *)s)[1] = 0xdc00 | (u_ & 0x3ff);
+ return 4;
+ }
+
+ /* Otherwise is illegal. */
+ return 0;
+}
+
+
+static size_t _al_utf16_get(uint16_t const *s, int n, int *c)
+{
+ if (s[0] < 0xd800 || s[0] > 0xdfff) {
+ *c = s[0];
+ return 1;
+ }
+ if (n < 2) return 0;
+ *c = 0x10000 | ((s[0] & 0x3ff) << 10) | (s[1] & 0x3ff);
+ return 2;
+}
+
+
+/* Function: al_ustr_new_from_utf16
+ */
+ALLEGRO_USTR *al_ustr_new_from_utf16(const char *s, size_t size)
+{
+ unsigned int i;
+ uint16_t const *us = (void *)s;
+ ALLEGRO_USTR *ustr = al_ustr_new("");
+ /* If size is odd, this rounds down and we ignore the last byte. */
+ size_t size2 = size / 2;
+ for (i = 0; i < size2;) {
+ int c;
+ size_t n = _al_utf16_get(us + i, size2 - i, &c);
+ if (!n) break;
+ al_ustr_append_chr(ustr, c);
+ i += n;
+ }
+ return ustr;
+}
+
+
+/* Function: al_utf16_dup
+ */
+char *al_utf16_dup(const ALLEGRO_USTR *us)
+{
+ int pos = 0, sz;
+ char *ret;
+ ALLEGRO_USTR *utf16_char;
+ // TODO: Can ALLEGRO_USTR be used to hold UTF-16?
+ ALLEGRO_USTR *utf16 = al_ustr_new("");
+ while (1) {
+ ALLEGRO_USTR_INFO info;
+ char encoded[4];
+ int32_t c = al_ustr_get_next(us, &pos);
+ if (c < 0) break;
+ sz = al_utf16_encode(encoded, c);
+ utf16_char = al_ref_buffer(&info, encoded, sz);
+ al_ustr_append(utf16, utf16_char);
+ }
+ ret = al_cstr_dup(utf16);
+ al_ustr_free(utf16);
+ return ret;
+}
+
/* vim: set sts=3 sw=3 et: */
Index: examples/ex_utf8.c
===================================================================
--- examples/ex_utf8.c (revision 11879)
+++ examples/ex_utf8.c (working copy)
@@ -1104,6 +1104,33 @@
al_ustr_free(us);
}
+/* Test UTF-16 conversion. */
+void t50(void)
+{
+ ALLEGRO_USTR *us;
+ char utf8[] = "⅛-note: 𝅘𝅥𝅮, domino: 🁡";
+ uint16_t *utf16;
+ /* Only native byte order supported right now, so have to specify
+ * elements as uint16_t and not as char.
+ */
+ uint16_t utf16_ref[] = {
+ 0x215b, 0x002d, 0x006e, 0x006f, 0x0074,
+ 0x0065, 0x003a, 0x0020, 0xd834, 0xdd60,
+ 0x002c, 0x0020, 0x0064, 0x006f, 0x006d,
+ 0x0069, 0x006e, 0x006f, 0x003a, 0x0020,
+ 0xd83c, 0xdc61};
+
+ us = al_ustr_new_from_utf16((char *)utf16_ref, 44);
+ CHECK(0 == strcmp(utf8, al_cstr(us)));
+ al_ustr_free(us);
+
+ us = al_ustr_new(utf8);
+ utf16 = (uint16_t *)al_utf16_dup(us);
+ CHECK(0 == memcmp(utf16, utf16_ref, 44));
+ al_ustr_free(us);
+ free(utf16); // FIXME: al_free
+}
+
/*---------------------------------------------------------------------------*/
const test_t all_tests[] =
@@ -1112,7 +1139,8 @@
t10, t11, t12, t13, t14, t15, t16, t17, t18, t19,
t20, t21, t22, t23, t24, t25, t26, t27, t28, t29,
t30, t31, t32, t33, t34, t35, t36, t37, t38, t39,
- t40, t41, t42, t43, t44, t45, t46, t47, t48, t49
+ t40, t41, t42, t43, t44, t45, t46, t47, t48, t49,
+ t50
};
#define NUM_TESTS (int)(sizeof(all_tests) / sizeof(all_tests[0]))