-/* Encodes the string `bin' of which encoding is `bin_encoding' to the
- UTF-8 encoding into the buffer `utf8' which is of size of `utf8_size'.
- Returns the length of the UTF-8 encoded string, or zero (0) on error.
- By default `bin_encoding' is ASCII, and the caller needs to know the
- encoding of the input string if it is anything else. */
-
-SilcUInt32 silc_utf8_encode(const unsigned char *bin, SilcUInt32 bin_len,
- SilcStringEncoding bin_encoding,
- unsigned char *utf8, SilcUInt32 utf8_size)
-{
- SilcUInt32 enclen = 0, i, charval = 0;
-
- if (!bin || !bin_len)
- return 0;
-
- if (silc_utf8_valid(bin, bin_len) && bin_len <= utf8_size) {
- memcpy(utf8, bin, bin_len);
- return bin_len;
- }
-
- if (bin_encoding == SILC_STRING_LANGUAGE) {
-#if defined(HAVE_ICONV) && defined(HAVE_NL_LANGINFO) && defined(CODESET)
- char *fromconv, *icp, *ocp;
- iconv_t icd;
- size_t inlen, outlen;
-
- setlocale(LC_CTYPE, "");
- fromconv = nl_langinfo(CODESET);
- if (fromconv && strlen(fromconv)) {
- icd = iconv_open("UTF-8", fromconv);
- icp = (char *)bin;
- ocp = (char *)utf8;
- inlen = bin_len;
- outlen = utf8_size;
- if (icp && ocp && icd != (iconv_t)-1) {
- if (iconv(icd, &icp, &inlen, &ocp, &outlen) != -1) {
- utf8_size -= outlen;
- iconv_close(icd);
- return utf8_size;
- }
- }
- if (icd != (iconv_t)-1)
- iconv_close(icd);
- }
-#endif
-
- /* Fallback to 8-bit ASCII */
- bin_encoding = SILC_STRING_ASCII;
- }
-
- for (i = 0; i < bin_len; i++) {
- switch (bin_encoding) {
- case SILC_STRING_ASCII:
- charval = bin[i];
- break;
- case SILC_STRING_ASCII_ESC:
- SILC_NOT_IMPLEMENTED("SILC_STRING_ASCII_ESC");
- return 0;
- break;
- case SILC_STRING_BMP:
- if (i + 1 >= bin_len)
- return 0;
- SILC_GET16_MSB(charval, bin + i);
- i += 1;
- break;
- case SILC_STRING_BMP_LSB:
- if (i + 1 >= bin_len)
- return 0;
- SILC_GET16_LSB(charval, bin + i);
- i += 1;
- break;
- case SILC_STRING_UNIVERSAL:
- if (i + 3 >= bin_len)
- return 0;
- SILC_GET32_MSB(charval, bin + i);
- i += 3;
- break;
- case SILC_STRING_UNIVERSAL_LSB:
- if (i + 3 >= bin_len)
- return 0;
- SILC_GET32_LSB(charval, bin + i);
- i += 3;
- break;
- default:
- return 0;
- break;
- }
-
- if (charval < 0x80) {
- if (utf8) {
- if (enclen > utf8_size)
- return 0;
-
- utf8[enclen] = (unsigned char)charval;
- }
- enclen++;
- } else if (charval < 0x800) {
- if (utf8) {
- if (enclen + 2 > utf8_size)
- return 0;
-
- utf8[enclen ] = (unsigned char )(((charval >> 6) & 0x1f) | 0xc0);
- utf8[enclen + 1] = (unsigned char )((charval & 0x3f) | 0x80);
- }
- enclen += 2;
- } else if (charval < 0x10000) {
- if (utf8) {
- if (enclen + 3 > utf8_size)
- return 0;
-
- utf8[enclen ] = (unsigned char )(((charval >> 12) & 0xf) | 0xe0);
- utf8[enclen + 1] = (unsigned char )(((charval >> 6) & 0x3f) | 0x80);
- utf8[enclen + 2] = (unsigned char )((charval & 0x3f) | 0x80);
- }
- enclen += 3;
- } else if (charval < 0x200000) {
- if (utf8) {
- if (enclen + 4 > utf8_size)
- return 0;
-
- utf8[enclen ] = (unsigned char )(((charval >> 18) & 0x7) | 0xf0);
- utf8[enclen + 1] = (unsigned char )(((charval >> 12) & 0x3f) | 0x80);
- utf8[enclen + 2] = (unsigned char )(((charval >> 6) & 0x3f) | 0x80);
- utf8[enclen + 3] = (unsigned char )((charval & 0x3f) | 0x80);
- }
- enclen += 4;
- } else if (charval < 0x4000000) {
- if (utf8) {
- if (enclen + 5 > utf8_size)
- return 0;
-
- utf8[enclen ] = (unsigned char )(((charval >> 24) & 0x3) | 0xf8);
- utf8[enclen + 1] = (unsigned char )(((charval >> 18) & 0x3f) | 0x80);
- utf8[enclen + 2] = (unsigned char )(((charval >> 12) & 0x3f) | 0x80);
- utf8[enclen + 3] = (unsigned char )(((charval >> 6) & 0x3f) | 0x80);
- utf8[enclen + 4] = (unsigned char )((charval & 0x3f) | 0x80);
- }
- enclen += 5;
- } else {
- if (utf8) {
- if (enclen + 6 > utf8_size)
- return 0;
-
- utf8[enclen ] = (unsigned char )(((charval >> 30) & 0x1) | 0xfc);
- utf8[enclen + 1] = (unsigned char )(((charval >> 24) & 0x3f) | 0x80);
- utf8[enclen + 2] = (unsigned char )(((charval >> 18) & 0x3f) | 0x80);
- utf8[enclen + 3] = (unsigned char )(((charval >> 12) & 0x3f) | 0x80);
- utf8[enclen + 4] = (unsigned char )(((charval >> 6) & 0x3f) | 0x80);
- utf8[enclen + 5] = (unsigned char )((charval & 0x3f) | 0x80);
- }
- enclen += 6;
- }
- }
-
- return enclen;
-}
-
-/* Decodes UTF-8 encoded string `utf8' to string of which encoding is
- to be `bin_encoding', into the `bin' buffer of size of `bin_size'.
- Returns the length of the decoded buffer, or zero (0) on error.
- By default `bin_encoding' is ASCII, and the caller needs to know to
- which encoding the output string is to be encoded if ASCII is not
- desired. */
-
-SilcUInt32 silc_utf8_decode(const unsigned char *utf8, SilcUInt32 utf8_len,
- SilcStringEncoding bin_encoding,
- unsigned char *bin, SilcUInt32 bin_size)
-{
- SilcUInt32 enclen = 0, i, charval;
-
- if (!utf8 || !utf8_len)
- return 0;
-
- if (bin_encoding == SILC_STRING_LANGUAGE) {
-#if defined(HAVE_ICONV) && defined(HAVE_NL_LANGINFO) && defined(CODESET)
- char *toconv, *icp, *ocp;
- iconv_t icd;
- size_t inlen, outlen;
-
- setlocale(LC_CTYPE, "");
- toconv = nl_langinfo(CODESET);
- if (toconv && strlen(toconv)) {
- icd = iconv_open(toconv, "UTF-8");
- icp = (char *)utf8;
- ocp = (char *)bin;
- inlen = utf8_len;
- outlen = bin_size;
- if (icp && ocp && icd != (iconv_t)-1) {
- if (iconv(icd, &icp, &inlen, &ocp, &outlen) != -1) {
- bin_size -= outlen;
- iconv_close(icd);
- return bin_size;
- }
- }
- if (icd != (iconv_t)-1)
- iconv_close(icd);
- }
-#endif
-
- /* Fallback to 8-bit ASCII */
- bin_encoding = SILC_STRING_ASCII;
- }
-
- for (i = 0; i < utf8_len; i++) {
- if ((utf8[i] & 0x80) == 0x00) {
- charval = utf8[i] & 0x7f;
- } else if ((utf8[i] & 0xe0) == 0xc0) {
- if (i + 1 >= utf8_len)
- return 0;
-
- if ((utf8[i + 1] & 0xc0) != 0x80)
- return 0;
-
- charval = (utf8[i++] & 0x1f) << 6;
- charval |= utf8[i] & 0x3f;
- if (charval < 0x80)
- return 0;
- } else if ((utf8[i] & 0xf0) == 0xe0) {
- if (i + 2 >= utf8_len)
- return 0;
-
- if (((utf8[i + 1] & 0xc0) != 0x80) ||
- ((utf8[i + 2] & 0xc0) != 0x80))
- return 0;
-
- charval = (utf8[i++] & 0xf) << 12;
- charval |= (utf8[i++] & 0x3f) << 6;
- charval |= utf8[i] & 0x3f;
- if (charval < 0x800)
- return 0;
- } else if ((utf8[i] & 0xf8) == 0xf0) {
- if (i + 3 >= utf8_len)
- return 0;
-
- if (((utf8[i + 1] & 0xc0) != 0x80) ||
- ((utf8[i + 2] & 0xc0) != 0x80) ||
- ((utf8[i + 3] & 0xc0) != 0x80))
- return 0;
-
- charval = ((SilcUInt32)(utf8[i++] & 0x7)) << 18;
- charval |= (utf8[i++] & 0x3f) << 12;
- charval |= (utf8[i++] & 0x3f) << 6;
- charval |= utf8[i] & 0x3f;
- if (charval < 0x10000)
- return 0;
- } else if ((utf8[i] & 0xfc) == 0xf8) {
- if (i + 4 >= utf8_len)
- return 0;
-
- if (((utf8[i + 1] & 0xc0) != 0x80) ||
- ((utf8[i + 2] & 0xc0) != 0x80) ||
- ((utf8[i + 3] & 0xc0) != 0x80) ||
- ((utf8[i + 4] & 0xc0) != 0x80))
- return 0;
-
- charval = ((SilcUInt32)(utf8[i++] & 0x3)) << 24;
- charval |= ((SilcUInt32)(utf8[i++] & 0x3f)) << 18;
- charval |= ((SilcUInt32)(utf8[i++] & 0x3f)) << 12;
- charval |= (utf8[i++] & 0x3f) << 6;
- charval |= utf8[i] & 0x3f;
- if (charval < 0x200000)
- return 0;
- } else if ((utf8[i] & 0xfe) == 0xfc) {
- if (i + 5 >= utf8_len)
- return 0;
-
- if (((utf8[i + 1] & 0xc0) != 0x80) ||
- ((utf8[i + 2] & 0xc0) != 0x80) ||
- ((utf8[i + 3] & 0xc0) != 0x80) ||
- ((utf8[i + 4] & 0xc0) != 0x80) ||
- ((utf8[i + 5] & 0xc0) != 0x80))
- return 0;
-
- charval = ((SilcUInt32)(utf8[i++] & 0x1)) << 30;
- charval |= ((SilcUInt32)(utf8[i++] & 0x3f)) << 24;
- charval |= ((SilcUInt32)(utf8[i++] & 0x3f)) << 18;
- charval |= ((SilcUInt32)(utf8[i++] & 0x3f)) << 12;
- charval |= (utf8[i++] & 0x3f) << 6;
- charval |= utf8[i] & 0x3f;
- if (charval < 0x4000000)
- return 0;
- } else {
- return 0;
- }
-
- switch (bin_encoding) {
- case SILC_STRING_ASCII:
- if (bin) {
- if (enclen + 1 > bin_size)
- return 0;
-
- bin[enclen] = (unsigned char)charval;
- }
- enclen++;
- break;
- case SILC_STRING_ASCII_ESC:
- SILC_NOT_IMPLEMENTED("SILC_STRING_ASCII_ESC");
- return 0;
- break;
- case SILC_STRING_BMP:
- if (bin)
- SILC_PUT16_MSB(charval, bin + enclen);
- enclen += 2;
- break;
- case SILC_STRING_BMP_LSB:
- if (bin)
- SILC_PUT16_LSB(charval, bin + enclen);
- enclen += 2;
- break;
- case SILC_STRING_UNIVERSAL:
- if (bin)
- SILC_PUT32_MSB(charval, bin + enclen);
- enclen += 4;
- break;
- case SILC_STRING_UNIVERSAL_LSB:
- if (bin)
- SILC_PUT32_LSB(charval, bin + enclen);
- enclen += 4;
- break;
- default:
- return 0;
- break;
- }
- }
-
- return enclen;
-}
-
-/* Returns the length of UTF-8 encoded string if the `bin' of
- encoding of `bin_encoding' is encoded with silc_utf8_encode. */
-
-SilcUInt32 silc_utf8_encoded_len(const unsigned char *bin, SilcUInt32 bin_len,
- SilcStringEncoding bin_encoding)
-{
- return silc_utf8_encode(bin, bin_len, bin_encoding, NULL, 0);
-}
-
-/* Returns the length of decoded string if the `bin' of encoding of
- `bin_encoding' is decoded with silc_utf8_decode. */
-
-SilcUInt32 silc_utf8_decoded_len(const unsigned char *bin, SilcUInt32 bin_len,
- SilcStringEncoding bin_encoding)
-{
- return silc_utf8_decode(bin, bin_len, bin_encoding, NULL, 0);
-}
-
-/* Returns TRUE if the `utf8' string of length of `utf8_len' is valid
- UTF-8 encoded string, FALSE if it is not UTF-8 encoded string. */
-
-bool silc_utf8_valid(const unsigned char *utf8, SilcUInt32 utf8_len)
-{
- return silc_utf8_decode(utf8, utf8_len, 0, NULL, 0) != 0;
-}
-