1 /* utf8.c - Operations on UTF-8 strings.
3 * Copyright (C) 2002 Timo Sirainen
5 * Based on GLib code by
7 * Copyright (C) 1999 Tom Tromey
8 * Copyright (C) 2000 Red Hat, Inc.
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 02111-1307, USA.
28 #define UTF8_COMPUTE(Char, Mask, Len) \
34 else if ((Char & 0xe0) == 0xc0) \
39 else if ((Char & 0xf0) == 0xe0) \
44 else if ((Char & 0xf8) == 0xf0) \
49 else if ((Char & 0xfc) == 0xf8) \
54 else if ((Char & 0xfe) == 0xfc) \
62 #define UTF8_GET(Result, Chars, Count, Mask, Len) \
63 (Result) = (Chars)[0] & (Mask); \
64 for ((Count) = 1; (Count) < (Len); ++(Count)) \
66 if (((Chars)[(Count)] & 0xc0) != 0x80) \
72 (Result) |= ((Chars)[(Count)] & 0x3f); \
75 unichar get_utf8_char(const unsigned char **ptr, int len)
77 int i, result, mask, chrlen;
80 UTF8_COMPUTE(**ptr, mask, chrlen);
87 UTF8_GET(result, *ptr, i, mask, chrlen);
95 int strlen_utf8(const char *str)
97 const unsigned char *p = (const unsigned char *) str;
101 while (*p != '\0' && get_utf8_char(&p, 6) > 0) {
108 int utf16_char_to_utf8(unichar c, char *outbuf)
116 } else if (c < 0x800) {
119 } else if (c < 0x10000) {
122 } else if (c < 0x200000) {
125 } else if (c < 0x4000000) {
134 for (i = len - 1; i > 0; --i) {
135 outbuf[i] = (c & 0x3f) | 0x80;
138 outbuf[0] = c | first;
144 void utf8_to_utf16(const char *str, unichar *out)
146 const unsigned char *p = (const unsigned char *) str;
147 int i, result, mask, len;
151 UTF8_COMPUTE(*p, mask, len);
155 UTF8_GET(result, p, i, mask, len);
166 void utf16_to_utf8(const unichar *str, char *out)
170 while (*str != '\0') {
171 len = utf16_char_to_utf8(*str, out);