updates.
[crypto.git] / apps / irssi / src / fe-text / utf8.c
1 /* utf8.c - Operations on UTF-8 strings.
2  *
3  * Copyright (C) 2002 Timo Sirainen
4  *
5  * Based on GLib code by
6  *
7  * Copyright (C) 1999 Tom Tromey
8  * Copyright (C) 2000 Red Hat, Inc.
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the
22  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23  * Boston, MA 02111-1307, USA.
24  */
25
26 #include "module.h"
27
28 #define UTF8_COMPUTE(Char, Mask, Len)                                         \
29   if (Char < 128)                                                             \
30     {                                                                         \
31       Len = 1;                                                                \
32       Mask = 0x7f;                                                            \
33     }                                                                         \
34   else if ((Char & 0xe0) == 0xc0)                                             \
35     {                                                                         \
36       Len = 2;                                                                \
37       Mask = 0x1f;                                                            \
38     }                                                                         \
39   else if ((Char & 0xf0) == 0xe0)                                             \
40     {                                                                         \
41       Len = 3;                                                                \
42       Mask = 0x0f;                                                            \
43     }                                                                         \
44   else if ((Char & 0xf8) == 0xf0)                                             \
45     {                                                                         \
46       Len = 4;                                                                \
47       Mask = 0x07;                                                            \
48     }                                                                         \
49   else if ((Char & 0xfc) == 0xf8)                                             \
50     {                                                                         \
51       Len = 5;                                                                \
52       Mask = 0x03;                                                            \
53     }                                                                         \
54   else if ((Char & 0xfe) == 0xfc)                                             \
55     {                                                                         \
56       Len = 6;                                                                \
57       Mask = 0x01;                                                            \
58     }                                                                         \
59   else                                                                        \
60     Len = -1;
61
62 #define UTF8_GET(Result, Chars, Count, Mask, Len)                             \
63   (Result) = (Chars)[0] & (Mask);                                             \
64   for ((Count) = 1; (Count) < (Len); ++(Count))                               \
65     {                                                                         \
66       if (((Chars)[(Count)] & 0xc0) != 0x80)                                  \
67         {                                                                     \
68           (Result) = -1;                                                      \
69           break;                                                              \
70         }                                                                     \
71       (Result) <<= 6;                                                         \
72       (Result) |= ((Chars)[(Count)] & 0x3f);                                  \
73     }
74
75 unichar get_utf8_char(const unsigned char **ptr, int len)
76 {
77         int i, result, mask, chrlen;
78
79         mask = 0;
80         UTF8_COMPUTE(**ptr, mask, chrlen);
81         if (chrlen == -1)
82                 return (unichar) -2;
83
84         if (chrlen > len)
85                 return (unichar) -1;
86
87         UTF8_GET(result, *ptr, i, mask, chrlen);
88         if (result == -1)
89                 return (unichar) -2;
90
91         *ptr += chrlen-1;
92         return result;
93 }
94
95 int strlen_utf8(const char *str)
96 {
97         const unsigned char *p = (const unsigned char *) str;
98         int len;
99
100         len = 0;
101         while (*p != '\0' && get_utf8_char(&p, 6) > 0) {
102                 len++;
103                 p++;
104         }
105         return len;
106 }
107
108 int utf16_char_to_utf8(unichar c, char *outbuf)
109 {
110         int len, i, first;
111
112         len = 0;
113         if (c < 0x80) {
114                 first = 0;
115                 len = 1;
116         } else if (c < 0x800) {
117                 first = 0xc0;
118                 len = 2;
119         } else if (c < 0x10000) {
120                 first = 0xe0;
121                 len = 3;
122         } else if (c < 0x200000) {
123                 first = 0xf0;
124                 len = 4;
125         } else if (c < 0x4000000) {
126                 first = 0xf8;
127                 len = 5;
128         } else {
129                 first = 0xfc;
130                 len = 6;
131         }
132
133         if (outbuf) {
134                 for (i = len - 1; i > 0; --i) {
135                         outbuf[i] = (c & 0x3f) | 0x80;
136                         c >>= 6;
137                 }
138                 outbuf[0] = c | first;
139         }
140
141         return len;
142 }
143
144 void utf8_to_utf16(const char *str, unichar *out)
145 {
146         const unsigned char *p = (const unsigned char *) str;
147         int i, result, mask, len;
148
149         while (*p != '\0') {
150                 mask = 0;
151                 UTF8_COMPUTE(*p, mask, len);
152                 if (len == -1)
153                         break;
154
155                 UTF8_GET(result, p, i, mask, len);
156                 if (result == -1)
157                         break;
158
159                 p += len;
160                 *out++ = result;
161         }
162
163         *out = '\0';
164 }
165
166 void utf16_to_utf8(const unichar *str, char *out)
167 {
168         int len;
169
170         while (*str != '\0') {
171                 len = utf16_char_to_utf8(*str, out);
172                 out += len;
173
174                 str++;
175         }
176         *out = '\0';
177 }