Imported new UTF-8 routines from my 1.1 tree.
authorPekka Riikonen <priikone@silcnet.org>
Sun, 27 Mar 2005 16:12:09 +0000 (16:12 +0000)
committerPekka Riikonen <priikone@silcnet.org>
Sun, 27 Mar 2005 16:12:09 +0000 (16:12 +0000)
Splitted UTF-8 routines to own files.
Implemented stringprep API.

19 files changed:
CHANGES
TODO
apps/irssi/src/silc/core/client_ops.c
apps/irssi/src/silc/core/silc-channels.c
apps/irssi/src/silc/core/silc-cmdqueue.c
apps/irssi/src/silc/core/silc-servers.c
apps/silcd/command.c
apps/silcd/serverconfig.c
config.guess
config.sub
lib/silcutil/Makefile.am
lib/silcutil/silclog.c
lib/silcutil/silclog.h
lib/silcutil/silcstringprep.c [new file with mode: 0644]
lib/silcutil/silcstringprep.h [new file with mode: 0644]
lib/silcutil/silcstrutil.c
lib/silcutil/silcstrutil.h
lib/silcutil/silcutf8.c [new file with mode: 0644]
lib/silcutil/silcutf8.h [new file with mode: 0644]

diff --git a/CHANGES b/CHANGES
index 871e47f7338925dac581cca697c083ec25f6b738..ecfaf6728465a6ed26f48db656fe0c64b4048818 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,18 @@
+Sun Mar 27 19:02:48 EEST 2005  Pekka Riikonen <priikone@silcnet.org>
+
+       * Imported new silc_utf8_[encode|decode] routines from
+         my internal SILC 1.1 source tree.  New encodings added:
+         SILC_STRING_LOCALE (SILC_STRING_LANGUAGE is deprecated),
+         SILC_STRING_UTF8, SILC_STRING_PRINTABLE, SILC_STRING_VISIBLE,
+         SILC_STRING_TELETEX, SILC_STRING_NUMERICAL and
+         SILC_STRING_LDAP_DN.
+
+       * Splitted UTF-8 routines into lib/silcutil/silcutf8.[ch].
+
+       * Implemented stringprep (RFC 3454) API.  Internally we use
+         GNU Libidn's stringprep, for now, so it is required now to
+         compile SILC sources.  Added lib/silcutil/silcstringprep.[ch].
+
 Wed Mar 23 11:20:33 CET 2005  Jochen Eisinger <jochen@penguin-breeder.org>
 
        * If the passphrases entered do not match while generating a new key,
diff --git a/TODO b/TODO
index 47c30b150677768493802c5b3c94f4df6174b7f0..8a86a6fe814e29d627eff6cd92076dbd3167d079 100644 (file)
--- a/TODO
+++ b/TODO
@@ -25,7 +25,10 @@ TODO for SILC Server 1.0
 
  o Check for valid ban strings.  Check ban string notifying.
 
- o stringprep (RFC3454) for UTF-8 strings + all other UTF-8 string things.
+ o Basic UTF-8 stringprep profile that makes sure UTF-8 strings are
+   as defined in spec-08 section 3.13.
+
+ o Start using the stringprep for identifier strings.
 
  o Check that founder key is distributed ok during backup resuming.
 
@@ -35,8 +38,15 @@ TODO for SILC Server 1.0
 TODO/bugs In SILC Libraries
 ===========================
 
- o Add the compilation flags for Toolkit apps to check, ala:
-   _SILC_TOOLKIT_ENABLE_DEBUG, etc.
+ o Add following defines in silcincludes.h and silcclient.h for 
+   third-party software:
+
+       __SILC_LIBSILC_HAVE_PTHREAD
+       __SILC_LIBSILC_HAVE_SIM
+        __SILC_LIBSILC_ENABLE_DEBUG
+       __SILC_LIBSILCCLIENT_HAVE_PTHREAD
+       __SILC_LIBSILCCLIENT_HAVE_SIM
+        __SILC_LIBSILCCLIENT_ENABLE_DEBUG
 
  o Test cases for all payload encoding and decoding routins in lib/silccore/
 
index 42bd842608770eb3a538171ca00846da9e8d9ec2..927b706508ce61d24806dfe48d2327037d64af6a 100644 (file)
@@ -4,7 +4,7 @@
 
   Author: Pekka Riikonen <priikone@poseidon.pspt.fi>
 
-  Copyright (C) 2001 - 2003 Pekka Riikonen
+  Copyright (C) 2001 - 2004 Pekka Riikonen
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -113,7 +113,7 @@ char * silc_convert_utf8_string(const char *str)
   }
 
   if (!silc_term_utf8() && silc_utf8_valid(str, message_len))
-    silc_utf8_decode(str, message_len, SILC_STRING_LANGUAGE,
+    silc_utf8_decode(str, message_len, SILC_STRING_LOCALE,
                      message, message_len);
   else
     strcpy(message, str);
@@ -459,7 +459,7 @@ void silc_channel_message(SilcClient client, SilcClientConnection conn,
         dm = silc_calloc(message_len + 1, sizeof(*dm));
         cp = dm;
       }
-      silc_utf8_decode(message, message_len, SILC_STRING_LANGUAGE,
+      silc_utf8_decode(message, message_len, SILC_STRING_LOCALE,
                        cp, message_len);
       if (flags & SILC_MESSAGE_FLAG_SIGNED)
         signal_emit("message silc signed_action", 6, server, cp, nick->nick,
@@ -485,7 +485,7 @@ void silc_channel_message(SilcClient client, SilcClientConnection conn,
         dm = silc_calloc(message_len + 1, sizeof(*dm));
         cp = dm;
       }
-      silc_utf8_decode(message, message_len, SILC_STRING_LANGUAGE,
+      silc_utf8_decode(message, message_len, SILC_STRING_LOCALE,
                        cp, message_len);
       if (flags & SILC_MESSAGE_FLAG_SIGNED)
        signal_emit("message silc signed_notice", 6, server, cp, nick->nick,
@@ -513,7 +513,7 @@ void silc_channel_message(SilcClient client, SilcClientConnection conn,
        cp = dm;
       }
 
-      silc_utf8_decode(message, message_len, SILC_STRING_LANGUAGE,
+      silc_utf8_decode(message, message_len, SILC_STRING_LOCALE,
                       cp, message_len);
       if (flags & SILC_MESSAGE_FLAG_SIGNED)
         signal_emit("message signed_public", 6, server, cp,
@@ -596,7 +596,7 @@ void silc_private_message(SilcClient client, SilcClientConnection conn,
         dm = silc_calloc(message_len + 1, sizeof(*dm));
         cp = dm;
       }
-      silc_utf8_decode(message, message_len, SILC_STRING_LANGUAGE,
+      silc_utf8_decode(message, message_len, SILC_STRING_LOCALE,
                        cp, message_len);
       if (flags & SILC_MESSAGE_FLAG_SIGNED)
         signal_emit("message silc signed_private_action", 6, server, cp, 
@@ -628,7 +628,7 @@ void silc_private_message(SilcClient client, SilcClientConnection conn,
         dm = silc_calloc(message_len + 1, sizeof(*dm));
         cp = dm;
       }
-      silc_utf8_decode(message, message_len, SILC_STRING_LANGUAGE,
+      silc_utf8_decode(message, message_len, SILC_STRING_LOCALE,
                        cp, message_len);
       if (flags & SILC_MESSAGE_FLAG_SIGNED)
         signal_emit("message silc signed_private_notice", 6, server, cp, 
@@ -662,7 +662,7 @@ void silc_private_message(SilcClient client, SilcClientConnection conn,
         cp = dm;
       }
 
-      silc_utf8_decode(message, message_len, SILC_STRING_LANGUAGE,
+      silc_utf8_decode(message, message_len, SILC_STRING_LOCALE,
                     cp, message_len);
       if (flags & SILC_MESSAGE_FLAG_SIGNED)
         signal_emit("message signed_private", 5, server, cp,
@@ -1936,7 +1936,7 @@ silc_command_reply(SilcClient client, SilcClientConnection conn,
            cp = dm;
          }
 
-         silc_utf8_decode(topic, strlen(topic), SILC_STRING_LANGUAGE,
+         silc_utf8_decode(topic, strlen(topic), SILC_STRING_LOCALE,
                           cp, strlen(topic));
          topic = cp;
        }
@@ -2043,7 +2043,7 @@ silc_command_reply(SilcClient client, SilcClientConnection conn,
          cp = dm;
        }
 
-       silc_utf8_decode(topic, strlen(topic), SILC_STRING_LANGUAGE,
+       silc_utf8_decode(topic, strlen(topic), SILC_STRING_LOCALE,
                         cp, strlen(topic));
        topic = cp;
       }
@@ -2295,7 +2295,7 @@ silc_command_reply(SilcClient client, SilcClientConnection conn,
          cp = dm;
        }
 
-       silc_utf8_decode(topic, strlen(topic), SILC_STRING_LANGUAGE,
+       silc_utf8_decode(topic, strlen(topic), SILC_STRING_LOCALE,
                         cp, strlen(topic));
        topic = cp;
       }
index c6212d396845e7f00eea51ef29b49a2570a7851a..9c8028b56393a761eeb467bd84688cecc5b3c090 100644 (file)
@@ -1,7 +1,7 @@
 /*
   silc-channels.c : irssi
 
-  Copyright (C) 2000 - 2001 Timo Sirainen
+  Copyright (C) 2000 - 2001, 2004 Timo Sirainen
                             Pekka Riikonen <priikone@poseidon.pspt.fi>
 
   This program is free software; you can redistribute it and/or modify
@@ -249,10 +249,10 @@ static void command_action(const char *data, SILC_SERVER_REC *server,
 
   if (!silc_term_utf8()) {
     int len = silc_utf8_encoded_len(msg, strlen(msg),
-                                   SILC_STRING_LANGUAGE);
+                                   SILC_STRING_LOCALE);
     message = silc_calloc(len + 1, sizeof(*message));
     g_return_if_fail(message != NULL);
-    silc_utf8_encode(msg, strlen(msg), SILC_STRING_LANGUAGE,
+    silc_utf8_encode(msg, strlen(msg), SILC_STRING_LOCALE,
                     message, len);
   }
 
@@ -353,10 +353,10 @@ static void command_notice(const char *data, SILC_SERVER_REC *server,
 
   if (!silc_term_utf8()) {
     int len = silc_utf8_encoded_len(msg, strlen(msg),
-                                   SILC_STRING_LANGUAGE);
+                                   SILC_STRING_LOCALE);
     message = silc_calloc(len + 1, sizeof(*message));
     g_return_if_fail(message != NULL);
-    silc_utf8_encode(msg, strlen(msg), SILC_STRING_LANGUAGE,
+    silc_utf8_encode(msg, strlen(msg), SILC_STRING_LOCALE,
                     message, len);
   }
 
index eab72455746ee197a9f318e6634482e57a313d6f..21fe833b0a6d02186b07179ec116c6e4c55711fd 100644 (file)
@@ -38,7 +38,7 @@ void silc_queue_flush(SilcClientConnection conn)
   if (list != NULL) {
     GSList *tmp;
 
-    for (tmp = g_slist_next(list); tmp != NULL; tmp = g_slist_next(tmp)) 
+    for (tmp = g_slist_next(list); tmp != NULL; tmp = g_slist_next(tmp))
       silc_client_command_call(silc_client, conn, tmp->data);
 
     g_slist_foreach(list, (GFunc) cmd_list_remove_cb, NULL);
@@ -58,7 +58,7 @@ void silc_queue_enable(SilcClientConnection conn)
 void silc_queue_disable(SilcClientConnection conn)
 {
   GSList *list = g_hash_table_lookup(cmd_queues, conn);
+
    if (list != NULL) {
      silc_queue_flush(conn);
      g_slist_free(list);
@@ -101,7 +101,7 @@ bool silc_queue_command_call(SilcClient client,
   va_end(ap);
 
   if (!silc_term_utf8()) {
-    int len = silc_utf8_encoded_len(cmd, strlen(cmd), SILC_STRING_LANGUAGE);
+    int len = silc_utf8_encoded_len(cmd, strlen(cmd), SILC_STRING_LOCALE);
     char *message = silc_calloc(len + 1, sizeof(*cmd));
     if (message == NULL) {
 
@@ -113,8 +113,7 @@ bool silc_queue_command_call(SilcClient client,
 
       return FALSE;
     }
-    silc_utf8_encode(cmd, strlen(cmd), SILC_STRING_LANGUAGE,
-                    message, len);
+    silc_utf8_encode(cmd, strlen(cmd), SILC_STRING_LOCALE, message, len);
 
     if (need_free)
       g_free(cmd);
index d9fdd00495459e7ec0a7062e906fc0eece28feba..2ba621c682a03a6b78aa9619fa7971f0625c8c51 100644 (file)
@@ -1,7 +1,7 @@
 /*
   silc-server.c : irssi
 
-  Copyright (C) 2000 - 2003 Timo Sirainen
+  Copyright (C) 2000 - 2004 Timo Sirainen
                             Pekka Riikonen <priikone@silcnet.org>
 
   This program is free software; you can redistribute it and/or modify
@@ -245,10 +245,10 @@ static void send_message(SILC_SERVER_REC *server, char *target,
   g_return_if_fail(msg != NULL);
 
   if (!silc_term_utf8()) {
-    len = silc_utf8_encoded_len(msg, strlen(msg), SILC_STRING_LANGUAGE);
+    len = silc_utf8_encoded_len(msg, strlen(msg), SILC_STRING_LOCALE);
     message = silc_calloc(len + 1, sizeof(*message));
     g_return_if_fail(message != NULL);
-    silc_utf8_encode(msg, strlen(msg), SILC_STRING_LANGUAGE, message, len);
+    silc_utf8_encode(msg, strlen(msg), SILC_STRING_LOCALE, message, len);
   }
 
   if (target_type == SEND_TARGET_CHANNEL)
@@ -541,10 +541,10 @@ static void command_smsg(const char *data, SILC_SERVER_REC *server,
     int len, result;
 
     if (!silc_term_utf8()) {
-      len = silc_utf8_encoded_len(msg, strlen(msg), SILC_STRING_LANGUAGE);
+      len = silc_utf8_encoded_len(msg, strlen(msg), SILC_STRING_LOCALE);
       message = silc_calloc(len + 1, sizeof(*message));
       g_return_if_fail(message != NULL);
-      silc_utf8_encode(msg, strlen(msg), SILC_STRING_LANGUAGE, message, len);
+      silc_utf8_encode(msg, strlen(msg), SILC_STRING_LOCALE, message, len);
     }
 
     if (target_type == SEND_TARGET_CHANNEL)
index 8e2acc19374d672af0a1489482c4a9007ac6566e..d0f4d3e811565d6feba99ed6b38ea5d623836b87 100644 (file)
@@ -762,6 +762,12 @@ silc_server_command_list_send_reply(SilcServerCommandContext cmd,
       valid_rcount++;
   }
 
+  if (!lch_count && !gch_count) {
+    silc_server_command_send_status_reply(cmd, SILC_COMMAND_LIST,
+                                         SILC_STATUS_OK, 0);
+    return;
+  }
+
   status = SILC_STATUS_OK;
   if ((lch_count + gch_count) > 1)
     status = SILC_STATUS_LIST_START;
index ae67475b57ae78d6376e148c50519563f8e49074..5eeb6229bc6960007cc385e13c26e78776cf8e19 100644 (file)
@@ -4,7 +4,7 @@
 
   Author: Giovanni Giacobbi <giovanni@giacobbi.net>
 
-  Copyright (C) 1997 - 2003 Pekka Riikonen
+  Copyright (C) 1997 - 2004 Pekka Riikonen
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -127,9 +127,9 @@ static bool my_parse_authdata(SilcAuthMethod auth_meth, const char *p,
     if (auth_data && auth_data_len) {
       if (!silc_utf8_valid(p, strlen(p))) {
        *auth_data_len = silc_utf8_encoded_len(p, strlen(p),
-                                              SILC_STRING_LANGUAGE);
+                                              SILC_STRING_LOCALE);
        *auth_data = silc_calloc(*auth_data_len, sizeof(unsigned char));
-       silc_utf8_encode(p, strlen(p), SILC_STRING_LANGUAGE, *auth_data,
+       silc_utf8_encode(p, strlen(p), SILC_STRING_LOCALE, *auth_data,
                         *auth_data_len);
       } else {
        *auth_data = (void *) strdup(p);
index f1657bbc46471af91d3bd85a01b289b939ae851b..6641456623484695a632468c7a9bd0dc700b8656 100755 (executable)
@@ -1,9 +1,9 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
 #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002 Free Software Foundation, Inc.
+#   2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
 
-timestamp='2002-09-03'
+timestamp='2004-07-19'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -53,7 +53,7 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
@@ -98,30 +98,32 @@ trap 'exit 1' 1 2 15
 # Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
 # use `HOST_CC' if defined, but it is deprecated.
 
-# This shell variable is my proudest work .. or something. --bje
+# Portable tmp directory creation inspired by the Autoconf team.
 
-set_cc_for_build='tmpdir=${TMPDIR-/tmp}/config-guess-$$ ;
-(old=`umask` && umask 077 && mkdir $tmpdir && umask $old && unset old)
-   || (echo "$me: cannot create $tmpdir" >&2 && exit 1) ;
-dummy=$tmpdir/dummy ;
-files="$dummy.c $dummy.o $dummy.rel $dummy" ;
-trap '"'"'rm -f $files; rmdir $tmpdir; exit 1'"'"' 1 2 15 ;
+set_cc_for_build='
+trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
+trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
+: ${TMPDIR=/tmp} ;
+ { tmp=`(umask 077 && mktemp -d -q "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
+ { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
+ { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
+ { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
+dummy=$tmp/dummy ;
+tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
 case $CC_FOR_BUILD,$HOST_CC,$CC in
  ,,)    echo "int x;" > $dummy.c ;
        for c in cc gcc c89 c99 ; do
-         if ($c $dummy.c -c -o $dummy.o) >/dev/null 2>&1 ; then
+         if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then
             CC_FOR_BUILD="$c"; break ;
          fi ;
        done ;
-       rm -f $files ;
        if test x"$CC_FOR_BUILD" = x ; then
          CC_FOR_BUILD=no_compiler_found ;
        fi
        ;;
  ,,*)   CC_FOR_BUILD=$CC ;;
  ,*,*)  CC_FOR_BUILD=$HOST_CC ;;
-esac ;
-unset files'
+esac ;'
 
 # This is needed to find uname on a Pyramid OSx when run in the BSD universe.
 # (ghazi@noc.rutgers.edu 1994-08-24)
@@ -178,21 +180,41 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
                ;;
        esac
        # The OS release
-       release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+       # Debian GNU/NetBSD machines have a different userland, and
+       # thus, need a distinct triplet. However, they do not need
+       # kernel version information, so it can be replaced with a
+       # suitable tag, in the style of linux-gnu.
+       case "${UNAME_VERSION}" in
+           Debian*)
+               release='-gnu'
+               ;;
+           *)
+               release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+               ;;
+       esac
        # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
        # contains redundant information, the shorter form:
        # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
        echo "${machine}-${os}${release}"
        exit 0 ;;
+    amd64:OpenBSD:*:*)
+       echo x86_64-unknown-openbsd${UNAME_RELEASE}
+       exit 0 ;;
     amiga:OpenBSD:*:*)
        echo m68k-unknown-openbsd${UNAME_RELEASE}
        exit 0 ;;
     arc:OpenBSD:*:*)
        echo mipsel-unknown-openbsd${UNAME_RELEASE}
        exit 0 ;;
+    cats:OpenBSD:*:*)
+       echo arm-unknown-openbsd${UNAME_RELEASE}
+       exit 0 ;;
     hp300:OpenBSD:*:*)
        echo m68k-unknown-openbsd${UNAME_RELEASE}
        exit 0 ;;
+    luna88k:OpenBSD:*:*)
+       echo m88k-unknown-openbsd${UNAME_RELEASE}
+       exit 0 ;;
     mac68k:OpenBSD:*:*)
        echo m68k-unknown-openbsd${UNAME_RELEASE}
        exit 0 ;;
@@ -223,71 +245,67 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     *:OpenBSD:*:*)
        echo ${UNAME_MACHINE}-unknown-openbsd${UNAME_RELEASE}
        exit 0 ;;
+    *:ekkoBSD:*:*)
+       echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
+       exit 0 ;;
+    macppc:MirBSD:*:*)
+       echo powerppc-unknown-mirbsd${UNAME_RELEASE}
+       exit 0 ;;
+    *:MirBSD:*:*)
+       echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
+       exit 0 ;;
     alpha:OSF1:*:*)
-       if test $UNAME_RELEASE = "V4.0"; then
+       case $UNAME_RELEASE in
+       *4.0)
                UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
-       fi
+               ;;
+       *5.*)
+               UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+               ;;
+       esac
+       # According to Compaq, /usr/sbin/psrinfo has been available on
+       # OSF/1 and Tru64 systems produced since 1995.  I hope that
+       # covers most systems running today.  This code pipes the CPU
+       # types through head -n 1, so we only detect the type of CPU 0.
+       ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
+       case "$ALPHA_CPU_TYPE" in
+           "EV4 (21064)")
+               UNAME_MACHINE="alpha" ;;
+           "EV4.5 (21064)")
+               UNAME_MACHINE="alpha" ;;
+           "LCA4 (21066/21068)")
+               UNAME_MACHINE="alpha" ;;
+           "EV5 (21164)")
+               UNAME_MACHINE="alphaev5" ;;
+           "EV5.6 (21164A)")
+               UNAME_MACHINE="alphaev56" ;;
+           "EV5.6 (21164PC)")
+               UNAME_MACHINE="alphapca56" ;;
+           "EV5.7 (21164PC)")
+               UNAME_MACHINE="alphapca57" ;;
+           "EV6 (21264)")
+               UNAME_MACHINE="alphaev6" ;;
+           "EV6.7 (21264A)")
+               UNAME_MACHINE="alphaev67" ;;
+           "EV6.8CB (21264C)")
+               UNAME_MACHINE="alphaev68" ;;
+           "EV6.8AL (21264B)")
+               UNAME_MACHINE="alphaev68" ;;
+           "EV6.8CX (21264D)")
+               UNAME_MACHINE="alphaev68" ;;
+           "EV6.9A (21264/EV69A)")
+               UNAME_MACHINE="alphaev69" ;;
+           "EV7 (21364)")
+               UNAME_MACHINE="alphaev7" ;;
+           "EV7.9 (21364A)")
+               UNAME_MACHINE="alphaev79" ;;
+       esac
+       # A Pn.n version is a patched version.
        # A Vn.n version is a released version.
        # A Tn.n version is a released field test version.
        # A Xn.n version is an unreleased experimental baselevel.
        # 1.2 uses "1.2" for uname -r.
-       eval $set_cc_for_build
-       cat <<EOF >$dummy.s
-       .data
-\$Lformat:
-       .byte 37,100,45,37,120,10,0     # "%d-%x\n"
-
-       .text
-       .globl main
-       .align 4
-       .ent main
-main:
-       .frame \$30,16,\$26,0
-       ldgp \$29,0(\$27)
-       .prologue 1
-       .long 0x47e03d80 # implver \$0
-       lda \$2,-1
-       .long 0x47e20c21 # amask \$2,\$1
-       lda \$16,\$Lformat
-       mov \$0,\$17
-       not \$1,\$18
-       jsr \$26,printf
-       ldgp \$29,0(\$26)
-       mov 0,\$16
-       jsr \$26,exit
-       .end main
-EOF
-       $CC_FOR_BUILD $dummy.s -o $dummy 2>/dev/null
-       if test "$?" = 0 ; then
-               case `$dummy` in
-                       0-0)
-                               UNAME_MACHINE="alpha"
-                               ;;
-                       1-0)
-                               UNAME_MACHINE="alphaev5"
-                               ;;
-                       1-1)
-                               UNAME_MACHINE="alphaev56"
-                               ;;
-                       1-101)
-                               UNAME_MACHINE="alphapca56"
-                               ;;
-                       2-303)
-                               UNAME_MACHINE="alphaev6"
-                               ;;
-                       2-307)
-                               UNAME_MACHINE="alphaev67"
-                               ;;
-                       2-1307)
-                               UNAME_MACHINE="alphaev68"
-                               ;;
-                       3-1307)
-                               UNAME_MACHINE="alphaev7"
-                               ;;
-               esac
-       fi
-       rm -f $dummy.s $dummy && rmdir $tmpdir
-       echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[VTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+       echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
        exit 0 ;;
     Alpha\ *:Windows_NT*:*)
        # How do we know it's Interix rather than the generic POSIX subsystem?
@@ -310,6 +328,9 @@ EOF
     *:OS/390:*:*)
        echo i370-ibm-openedition
        exit 0 ;;
+    *:OS400:*:*)
+        echo powerpc-ibm-os400
+       exit 0 ;;
     arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
        echo arm-acorn-riscix${UNAME_RELEASE}
        exit 0;;
@@ -327,6 +348,9 @@ EOF
     NILE*:*:*:dcosx)
        echo pyramid-pyramid-svr4
        exit 0 ;;
+    DRS?6000:unix:4.0:6*)
+       echo sparc-icl-nx6
+       exit 0 ;;
     DRS?6000:UNIX_SV:4.2*:7*)
        case `/usr/bin/uname -p` in
            sparc) echo sparc-icl-nx7 && exit 0 ;;
@@ -399,6 +423,9 @@ EOF
     *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
         echo m68k-unknown-mint${UNAME_RELEASE}
         exit 0 ;;
+    m68k:machten:*:*)
+       echo m68k-apple-machten${UNAME_RELEASE}
+       exit 0 ;;
     powerpc:machten:*:*)
        echo powerpc-apple-machten${UNAME_RELEASE}
        exit 0 ;;
@@ -437,10 +464,9 @@ EOF
          exit (-1);
        }
 EOF
-       $CC_FOR_BUILD $dummy.c -o $dummy \
+       $CC_FOR_BUILD -o $dummy $dummy.c \
          && $dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \
-         && rm -f $dummy.c $dummy && rmdir $tmpdir && exit 0
-       rm -f $dummy.c $dummy && rmdir $tmpdir
+         && exit 0
        echo mips-mips-riscos${UNAME_RELEASE}
        exit 0 ;;
     Motorola:PowerMAX_OS:*:*)
@@ -449,7 +475,7 @@ EOF
     Motorola:*:4.3:PL8-*)
        echo powerpc-harris-powermax
        exit 0 ;;
-    Night_Hawk:*:*:PowerMAX_OS)
+    Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
        echo powerpc-harris-powermax
        exit 0 ;;
     Night_Hawk:Power_UNIX:*:*)
@@ -524,8 +550,7 @@ EOF
                        exit(0);
                        }
 EOF
-               $CC_FOR_BUILD $dummy.c -o $dummy && $dummy && rm -f $dummy.c $dummy && rmdir $tmpdir && exit 0
-               rm -f $dummy.c $dummy && rmdir $tmpdir
+               $CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0
                echo rs6000-ibm-aix3.2.5
        elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
                echo rs6000-ibm-aix3.2.4
@@ -623,11 +648,21 @@ EOF
                   exit (0);
               }
 EOF
-                   (CCOPTS= $CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null) && HP_ARCH=`$dummy`
-                   if test -z "$HP_ARCH"; then HP_ARCH=hppa; fi
-                   rm -f $dummy.c $dummy && rmdir $tmpdir
+                   (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
+                   test -z "$HP_ARCH" && HP_ARCH=hppa
                fi ;;
        esac
+       if [ ${HP_ARCH} = "hppa2.0w" ]
+       then
+           # avoid double evaluation of $set_cc_for_build
+           test -n "$CC_FOR_BUILD" || eval $set_cc_for_build
+           if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E -) | grep __LP64__ >/dev/null
+           then
+               HP_ARCH="hppa2.0w"
+           else
+               HP_ARCH="hppa64"
+           fi
+       fi
        echo ${HP_ARCH}-hp-hpux${HPUX_REV}
        exit 0 ;;
     ia64:HP-UX:*:*)
@@ -661,8 +696,7 @@ EOF
          exit (0);
        }
 EOF
-       $CC_FOR_BUILD $dummy.c -o $dummy && $dummy && rm -f $dummy.c $dummy && rmdir $tmpdir && exit 0
-       rm -f $dummy.c $dummy && rmdir $tmpdir
+       $CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0
        echo unknown-hitachi-hiuxwe2
        exit 0 ;;
     9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
@@ -720,21 +754,26 @@ EOF
     CRAY*TS:*:*:*)
        echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
        exit 0 ;;
-    CRAY*T3D:*:*:*)
-       echo alpha-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
-       exit 0 ;;
     CRAY*T3E:*:*:*)
        echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
        exit 0 ;;
     CRAY*SV1:*:*:*)
        echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
        exit 0 ;;
+    *:UNICOS/mp:*:*)
+       echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+       exit 0 ;;
     F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
        FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
         FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
         FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
         echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
         exit 0 ;;
+    5000:UNIX_System_V:4.*:*)
+        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+        FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+        echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+       exit 0 ;;
     i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
        echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
        exit 0 ;;
@@ -745,19 +784,7 @@ EOF
        echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
        exit 0 ;;
     *:FreeBSD:*:*)
-       # Determine whether the default compiler uses glibc.
-       eval $set_cc_for_build
-       sed 's/^        //' << EOF >$dummy.c
-       #include <features.h>
-       #if __GLIBC__ >= 2
-       LIBC=gnu
-       #else
-       LIBC=
-       #endif
-EOF
-       eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=`
-       rm -f $dummy.c && rmdir $tmpdir
-       echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`${LIBC:+-$LIBC}
+       echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
        exit 0 ;;
     i*:CYGWIN*:*)
        echo ${UNAME_MACHINE}-pc-cygwin
@@ -768,14 +795,17 @@ EOF
     i*:PW*:*)
        echo ${UNAME_MACHINE}-pc-pw32
        exit 0 ;;
-    x86:Interix*:3*)
-       echo i386-pc-interix3
+    x86:Interix*:[34]*)
+       echo i586-pc-interix${UNAME_RELEASE}|sed -e 's/\..*//'
+       exit 0 ;;
+    [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
+       echo i${UNAME_MACHINE}-pc-mks
        exit 0 ;;
     i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
        # How do we know it's Interix rather than the generic POSIX subsystem?
        # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
        # UNAME_MACHINE based on the output of uname instead of i386?
-       echo i386-pc-interix
+       echo i586-pc-interix
        exit 0 ;;
     i*:UWIN*:*)
        echo ${UNAME_MACHINE}-pc-uwin
@@ -787,17 +817,28 @@ EOF
        echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
        exit 0 ;;
     *:GNU:*:*)
+       # the GNU system
        echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
        exit 0 ;;
+    *:GNU/*:*:*)
+       # other systems with GNU libc and userland
+       echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu
+       exit 0 ;;
     i*86:Minix:*:*)
        echo ${UNAME_MACHINE}-pc-minix
        exit 0 ;;
     arm*:Linux:*:*)
        echo ${UNAME_MACHINE}-unknown-linux-gnu
        exit 0 ;;
+    cris:Linux:*:*)
+       echo cris-axis-linux-gnu
+       exit 0 ;;
     ia64:Linux:*:*)
        echo ${UNAME_MACHINE}-unknown-linux-gnu
        exit 0 ;;
+    m32r*:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit 0 ;;
     m68*:Linux:*:*)
        echo ${UNAME_MACHINE}-unknown-linux-gnu
        exit 0 ;;
@@ -818,8 +859,26 @@ EOF
        #endif
 EOF
        eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=`
-       rm -f $dummy.c && rmdir $tmpdir
-       test x"${CPU}" != x && echo "${CPU}-pc-linux-gnu" && exit 0
+       test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0
+       ;;
+    mips64:Linux:*:*)
+       eval $set_cc_for_build
+       sed 's/^        //' << EOF >$dummy.c
+       #undef CPU
+       #undef mips64
+       #undef mips64el
+       #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+       CPU=mips64el
+       #else
+       #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
+       CPU=mips64
+       #else
+       CPU=
+       #endif
+       #endif
+EOF
+       eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=`
+       test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0
        ;;
     ppc:Linux:*:*)
        echo powerpc-unknown-linux-gnu
@@ -855,6 +914,9 @@ EOF
     s390:Linux:*:* | s390x:Linux:*:*)
        echo ${UNAME_MACHINE}-ibm-linux
        exit 0 ;;
+    sh64*:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit 0 ;;
     sh*:Linux:*:*)
        echo ${UNAME_MACHINE}-unknown-linux-gnu
        exit 0 ;;
@@ -912,9 +974,11 @@ EOF
        LIBC=gnuaout
        #endif
        #endif
+       #ifdef __dietlibc__
+       LIBC=dietlibc
+       #endif
 EOF
        eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=`
-       rm -f $dummy.c && rmdir $tmpdir
        test x"${LIBC}" != x && echo "${UNAME_MACHINE}-pc-linux-${LIBC}" && exit 0
        test x"${TENTATIVE}" != x && echo "${TENTATIVE}" && exit 0
        ;;
@@ -932,6 +996,26 @@ EOF
         # Use sysv4.2uw... so that sysv4* matches it.
        echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
        exit 0 ;;
+    i*86:OS/2:*:*)
+       # If we were able to find `uname', then EMX Unix compatibility
+       # is probably installed.
+       echo ${UNAME_MACHINE}-pc-os2-emx
+       exit 0 ;;
+    i*86:XTS-300:*:STOP)
+       echo ${UNAME_MACHINE}-unknown-stop
+       exit 0 ;;
+    i*86:atheos:*:*)
+       echo ${UNAME_MACHINE}-unknown-atheos
+       exit 0 ;;
+       i*86:syllable:*:*)
+       echo ${UNAME_MACHINE}-pc-syllable
+       exit 0 ;;
+    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
+       echo i386-unknown-lynxos${UNAME_RELEASE}
+       exit 0 ;;
+    i*86:*DOS:*:*)
+       echo ${UNAME_MACHINE}-pc-msdosdjgpp
+       exit 0 ;;
     i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
        UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
        if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
@@ -966,9 +1050,6 @@ EOF
                echo ${UNAME_MACHINE}-pc-sysv32
        fi
        exit 0 ;;
-    i*86:*DOS:*:*)
-       echo ${UNAME_MACHINE}-pc-msdosdjgpp
-       exit 0 ;;
     pc:*:*:*)
        # Left here for compatibility:
         # uname -m prints for DJGPP always 'pc', but it prints nothing about
@@ -995,9 +1076,12 @@ EOF
     mc68k:UNIX:SYSTEM5:3.51m)
        echo m68k-convergent-sysv
        exit 0 ;;
-    M68*:*:R3V[567]*:*)
+    M680?0:D-NIX:5.3:*)
+       echo m68k-diab-dnix
+       exit 0 ;;
+    M68*:*:R3V[5678]*:*)
        test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;;
-    3[34]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0)
+    3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
        OS_REL=''
        test -r /etc/.relid \
        && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
@@ -1014,9 +1098,6 @@ EOF
     mc68030:UNIX_System_V:4.*:*)
        echo m68k-atari-sysv4
        exit 0 ;;
-    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
-       echo i386-unknown-lynxos${UNAME_RELEASE}
-       exit 0 ;;
     TSUNAMI:LynxOS:2.*:*)
        echo sparc-unknown-lynxos${UNAME_RELEASE}
        exit 0 ;;
@@ -1098,7 +1179,11 @@ EOF
        echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
        exit 0 ;;
     *:Darwin:*:*)
-       echo `uname -p`-apple-darwin${UNAME_RELEASE}
+       case `uname -p` in
+           *86) UNAME_PROCESSOR=i686 ;;
+           powerpc) UNAME_PROCESSOR=powerpc ;;
+       esac
+       echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
        exit 0 ;;
     *:procnto*:*:* | *:QNX:[0123456789]*:*)
        UNAME_PROCESSOR=`uname -p`
@@ -1111,7 +1196,7 @@ EOF
     *:QNX:*:4*)
        echo i386-pc-qnx
        exit 0 ;;
-    NSR-[DGKLNPTVW]:NONSTOP_KERNEL:*:*)
+    NSR-?:NONSTOP_KERNEL:*:*)
        echo nsr-tandem-nsk${UNAME_RELEASE}
        exit 0 ;;
     *:NonStop-UX:*:*)
@@ -1134,11 +1219,6 @@ EOF
        fi
        echo ${UNAME_MACHINE}-unknown-plan9
        exit 0 ;;
-    i*86:OS/2:*:*)
-       # If we were able to find `uname', then EMX Unix compatibility
-       # is probably installed.
-       echo ${UNAME_MACHINE}-pc-os2-emx
-       exit 0 ;;
     *:TOPS-10:*:*)
        echo pdp10-unknown-tops10
        exit 0 ;;
@@ -1157,12 +1237,19 @@ EOF
     *:ITS:*:*)
        echo pdp10-unknown-its
        exit 0 ;;
-    i*86:XTS-300:*:STOP)
-       echo ${UNAME_MACHINE}-unknown-stop
+    SEI:*:*:SEIUX)
+        echo mips-sei-seiux${UNAME_RELEASE}
        exit 0 ;;
-    i*86:atheos:*:*)
-       echo ${UNAME_MACHINE}-unknown-atheos
+    *:DragonFly:*:*)
+       echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
        exit 0 ;;
+    *:*VMS:*:*)
+       UNAME_MACHINE=`(uname -p) 2>/dev/null`
+       case "${UNAME_MACHINE}" in
+           A*) echo alpha-dec-vms && exit 0 ;;
+           I*) echo ia64-dec-vms && exit 0 ;;
+           V*) echo vax-dec-vms && exit 0 ;;
+       esac
 esac
 
 #echo '(No uname command or uname output not recognized.)' 1>&2
@@ -1283,8 +1370,7 @@ main ()
 }
 EOF
 
-$CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null && $dummy && rm -f $dummy.c $dummy && rmdir $tmpdir && exit 0
-rm -f $dummy.c $dummy && rmdir $tmpdir
+$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && $dummy && exit 0
 
 # Apollos put the system type in the environment.
 
index 1dea9b79d5500e4e629a7f841d4a969d7b8412c6..ac6de9869c9a1b0a8145abac6ac35d9854fb11f9 100755 (executable)
@@ -1,9 +1,9 @@
 #! /bin/sh
 # Configuration validation subroutine script.
 #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002 Free Software Foundation, Inc.
+#   2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
 
-timestamp='2002-09-05'
+timestamp='2004-06-24'
 
 # This file is (in principle) common to ALL GNU software.
 # The presence of a machine in this file suggests that SOME GNU software
@@ -70,7 +70,7 @@ Report bugs and patches to <config-patches@gnu.org>."
 version="\
 GNU config.sub ($timestamp)
 
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
@@ -118,7 +118,8 @@ esac
 # Here we must recognize all the valid KERNEL-OS combinations.
 maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
 case $maybe_os in
-  nto-qnx* | linux-gnu* | freebsd*-gnu* | storm-chaos* | os2-emx* | windows32-* | rtmk-nova*)
+  nto-qnx* | linux-gnu* | linux-dietlibc | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | \
+  kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*)
     os=-$maybe_os
     basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
     ;;
@@ -144,7 +145,7 @@ case $os in
        -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
        -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
        -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
-       -apple | -axis)
+       -apple | -axis | -knuth | -cray)
                os=
                basic_machine=$1
                ;;
@@ -228,14 +229,15 @@ case $basic_machine in
        | a29k \
        | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
        | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
+       | am33_2.0 \
        | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr \
-       | clipper \
+       | c4x | clipper \
        | d10v | d30v | dlx | dsp16xx \
        | fr30 | frv \
        | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
        | i370 | i860 | i960 | ia64 \
-       | ip2k \
-       | m32r | m68000 | m68k | m88k | mcore \
+       | ip2k | iq2000 \
+       | m32r | m32rle | m68000 | m68k | m88k | mcore \
        | mips | mipsbe | mipseb | mipsel | mipsle \
        | mips16 \
        | mips64 | mips64el \
@@ -245,21 +247,24 @@ case $basic_machine in
        | mips64vr4300 | mips64vr4300el \
        | mips64vr5000 | mips64vr5000el \
        | mipsisa32 | mipsisa32el \
+       | mipsisa32r2 | mipsisa32r2el \
        | mipsisa64 | mipsisa64el \
+       | mipsisa64r2 | mipsisa64r2el \
        | mipsisa64sb1 | mipsisa64sb1el \
        | mipsisa64sr71k | mipsisa64sr71kel \
        | mipstx39 | mipstx39el \
        | mn10200 | mn10300 \
+       | msp430 \
        | ns16k | ns32k \
        | openrisc | or32 \
        | pdp10 | pdp11 | pj | pjl \
        | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
        | pyramid \
-       | sh | sh[1234] | sh3e | sh[34]eb | shbe | shle | sh[1234]le | sh3ele \
+       | sh | sh[1234] | sh[23]e | sh[34]eb | shbe | shle | sh[1234]le | sh3ele \
        | sh64 | sh64le \
-       | sparc | sparc64 | sparc86x | sparclet | sparclite | sparcv9 | sparcv9b \
+       | sparc | sparc64 | sparc86x | sparclet | sparclite | sparcv8 | sparcv9 | sparcv9b \
        | strongarm \
-       | tahoe | thumb | tic80 | tron \
+       | tahoe | thumb | tic4x | tic80 | tron \
        | v850 | v850e \
        | we32k \
        | x86 | xscale | xstormy16 | xtensa \
@@ -294,16 +299,16 @@ case $basic_machine in
        | arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
        | avr-* \
        | bs2000-* \
-       | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* \
-       | clipper-* | cydra-* \
+       | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \
+       | clipper-* | craynv-* | cydra-* \
        | d10v-* | d30v-* | dlx-* \
        | elxsi-* \
        | f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \
        | h8300-* | h8500-* \
        | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
        | i*86-* | i860-* | i960-* | ia64-* \
-       | ip2k-* \
-       | m32r-* \
+       | ip2k-* | iq2000-* \
+       | m32r-* | m32rle-* \
        | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
        | m88110-* | m88k-* | mcore-* \
        | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
@@ -315,21 +320,27 @@ case $basic_machine in
        | mips64vr4300-* | mips64vr4300el-* \
        | mips64vr5000-* | mips64vr5000el-* \
        | mipsisa32-* | mipsisa32el-* \
+       | mipsisa32r2-* | mipsisa32r2el-* \
        | mipsisa64-* | mipsisa64el-* \
+       | mipsisa64r2-* | mipsisa64r2el-* \
        | mipsisa64sb1-* | mipsisa64sb1el-* \
        | mipsisa64sr71k-* | mipsisa64sr71kel-* \
-       | mipstx39 | mipstx39el \
+       | mipstx39-* | mipstx39el-* \
+       | mmix-* \
+       | msp430-* \
        | none-* | np1-* | ns16k-* | ns32k-* \
        | orion-* \
        | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
        | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
        | pyramid-* \
        | romp-* | rs6000-* \
-       | sh-* | sh[1234]-* | sh3e-* | sh[34]eb-* | shbe-* \
+       | sh-* | sh[1234]-* | sh[23]e-* | sh[34]eb-* | shbe-* \
        | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
        | sparc-* | sparc64-* | sparc86x-* | sparclet-* | sparclite-* \
-       | sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \
-       | tahoe-* | thumb-* | tic30-* | tic4x-* | tic54x-* | tic80-* | tron-* \
+       | sparcv8-* | sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \
+       | tahoe-* | thumb-* \
+       | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
+       | tron-* \
        | v850-* | v850e-* | vax-* \
        | we32k-* \
        | x86-* | x86_64-* | xps100-* | xscale-* | xstormy16-* \
@@ -353,6 +364,9 @@ case $basic_machine in
                basic_machine=a29k-amd
                os=-udi
                ;;
+       abacus)
+               basic_machine=abacus-unknown
+               ;;
        adobe68k)
                basic_machine=m68010-adobe
                os=-scout
@@ -367,6 +381,12 @@ case $basic_machine in
                basic_machine=a29k-none
                os=-bsd
                ;;
+       amd64)
+               basic_machine=x86_64-pc
+               ;;
+       amd64-*)
+               basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
        amdahl)
                basic_machine=580-amdahl
                os=-sysv
@@ -426,12 +446,24 @@ case $basic_machine in
                basic_machine=j90-cray
                os=-unicos
                ;;
+       craynv)
+               basic_machine=craynv-cray
+               os=-unicosmp
+               ;;
+       cr16c)
+               basic_machine=cr16c-unknown
+               os=-elf
+               ;;
        crds | unos)
                basic_machine=m68k-crds
                ;;
        cris | cris-* | etrax*)
                basic_machine=cris-axis
                ;;
+       crx)
+               basic_machine=crx-unknown
+               os=-elf
+               ;;
        da30 | da30-*)
                basic_machine=m68k-da30
                ;;
@@ -632,10 +664,6 @@ case $basic_machine in
        mips3*)
                basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
                ;;
-       mmix*)
-               basic_machine=mmix-knuth
-               os=-mmixware
-               ;;
        monitor)
                basic_machine=m68k-rom68k
                os=-coff
@@ -727,6 +755,10 @@ case $basic_machine in
                basic_machine=or32-unknown
                os=-coff
                ;;
+       os400)
+               basic_machine=powerpc-ibm
+               os=-os400
+               ;;
        OSE68000 | ose68000)
                basic_machine=m68000-ericsson
                os=-ose
@@ -758,18 +790,24 @@ case $basic_machine in
        pentiumpro | p6 | 6x86 | athlon | athlon_*)
                basic_machine=i686-pc
                ;;
-       pentiumii | pentium2)
+       pentiumii | pentium2 | pentiumiii | pentium3)
                basic_machine=i686-pc
                ;;
+       pentium4)
+               basic_machine=i786-pc
+               ;;
        pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
                basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
                ;;
        pentiumpro-* | p6-* | 6x86-* | athlon-*)
                basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
                ;;
-       pentiumii-* | pentium2-*)
+       pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
                basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
                ;;
+       pentium4-*)
+               basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
        pn)
                basic_machine=pn-gould
                ;;
@@ -828,6 +866,10 @@ case $basic_machine in
        sb1el)
                basic_machine=mipsisa64sb1el-unknown
                ;;
+       sei)
+               basic_machine=mips-sei
+               os=-seiux
+               ;;
        sequent)
                basic_machine=i386-sequent
                ;;
@@ -835,6 +877,9 @@ case $basic_machine in
                basic_machine=sh-hitachi
                os=-hms
                ;;
+       sh64)
+               basic_machine=sh64-unknown
+               ;;
        sparclite-wrs | simso-wrs)
                basic_machine=sparclite-wrs
                os=-vxworks
@@ -901,10 +946,6 @@ case $basic_machine in
                basic_machine=i386-sequent
                os=-dynix
                ;;
-       t3d)
-               basic_machine=alpha-cray
-               os=-unicos
-               ;;
        t3e)
                basic_machine=alphaev5-cray
                os=-unicos
@@ -913,14 +954,18 @@ case $basic_machine in
                basic_machine=t90-cray
                os=-unicos
                ;;
-        tic4x | c4x*)
-               basic_machine=tic4x-unknown
-               os=-coff
-               ;;
        tic54x | c54x*)
                basic_machine=tic54x-unknown
                os=-coff
                ;;
+       tic55x | c55x*)
+               basic_machine=tic55x-unknown
+               os=-coff
+               ;;
+       tic6x | c6x*)
+               basic_machine=tic6x-unknown
+               os=-coff
+               ;;
        tx39)
                basic_machine=mipstx39-unknown
                ;;
@@ -934,6 +979,10 @@ case $basic_machine in
        tower | tower-32)
                basic_machine=m68k-ncr
                ;;
+       tpf)
+               basic_machine=s390x-ibm
+               os=-tpf
+               ;;
        udi29k)
                basic_machine=a29k-amd
                os=-udi
@@ -977,10 +1026,6 @@ case $basic_machine in
                basic_machine=hppa1.1-winbond
                os=-proelf
                ;;
-       windows32)
-               basic_machine=i386-pc
-               os=-windows32-msvcrt
-               ;;
        xps | xps100)
                basic_machine=xps100-honeywell
                ;;
@@ -1011,6 +1056,9 @@ case $basic_machine in
        romp)
                basic_machine=romp-ibm
                ;;
+       mmix)
+               basic_machine=mmix-knuth
+               ;;
        rs6000)
                basic_machine=rs6000-ibm
                ;;
@@ -1027,13 +1075,13 @@ case $basic_machine in
        we32k)
                basic_machine=we32k-att
                ;;
-       sh3 | sh4 | sh3eb | sh4eb | sh[1234]le | sh3ele)
+       sh3 | sh4 | sh[34]eb | sh[1234]le | sh[23]ele)
                basic_machine=sh-unknown
                ;;
        sh64)
                basic_machine=sh64-unknown
                ;;
-       sparc | sparcv9 | sparcv9b)
+       sparc | sparcv8 | sparcv9 | sparcv9b)
                basic_machine=sparc-sun
                ;;
        cydra)
@@ -1106,18 +1154,20 @@ case $os in
              | -aos* \
              | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
              | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
-             | -hiux* | -386bsd* | -netbsd* | -openbsd* | -freebsd* | -riscix* \
-             | -lynxos* | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
+             | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* | -openbsd* \
+             | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
+             | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
              | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
              | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
              | -chorusos* | -chorusrdb* \
              | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
-             | -mingw32* | -linux-gnu* | -uxpv* | -beos* | -mpeix* | -udk* \
-             | -interix* | -uwin* | -rhapsody* | -darwin* | -opened* \
+             | -mingw32* | -linux-gnu* | -linux-uclibc* | -uxpv* | -beos* | -mpeix* | -udk* \
+             | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
              | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
              | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
              | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
-             | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* | -powermax*)
+             | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
+             | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly*)
        # Remember, each alternative MUST END IN *, to match a version number.
                ;;
        -qnx*)
@@ -1129,8 +1179,10 @@ case $os in
                        ;;
                esac
                ;;
+       -nto-qnx*)
+               ;;
        -nto*)
-               os=-nto-qnx
+               os=`echo $os | sed -e 's|nto|nto-qnx|'`
                ;;
        -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
              | -windows* | -osx | -abug | -netware* | -os9* | -beos* \
@@ -1139,6 +1191,9 @@ case $os in
        -mac*)
                os=`echo $os | sed -e 's|mac|macos|'`
                ;;
+       -linux-dietlibc)
+               os=-linux-dietlibc
+               ;;
        -linux*)
                os=`echo $os | sed -e 's|linux|linux-gnu|'`
                ;;
@@ -1151,6 +1206,9 @@ case $os in
        -opened*)
                os=-openedition
                ;;
+        -os400*)
+               os=-os400
+               ;;
        -wince*)
                os=-wince
                ;;
@@ -1172,6 +1230,9 @@ case $os in
        -atheos*)
                os=-atheos
                ;;
+       -syllable*)
+               os=-syllable
+               ;;
        -386bsd)
                os=-bsd
                ;;
@@ -1194,6 +1255,9 @@ case $os in
        -sinix*)
                os=-sysv4
                ;;
+        -tpf*)
+               os=-tpf
+               ;;
        -triton*)
                os=-sysv3
                ;;
@@ -1224,6 +1288,12 @@ case $os in
        -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
                os=-mint
                ;;
+       -aros*)
+               os=-aros
+               ;;
+       -kaos*)
+               os=-kaos
+               ;;
        -none)
                ;;
        *)
@@ -1255,6 +1325,9 @@ case $basic_machine in
        arm*-semi)
                os=-aout
                ;;
+    c4x-* | tic4x-*)
+        os=-coff
+        ;;
        # This must come before the *-dec entry.
        pdp10-*)
                os=-tops20
@@ -1301,6 +1374,9 @@ case $basic_machine in
        *-ibm)
                os=-aix
                ;;
+       *-knuth)
+               os=-mmixware
+               ;;
        *-wec)
                os=-proelf
                ;;
@@ -1433,9 +1509,15 @@ case $basic_machine in
                        -mvs* | -opened*)
                                vendor=ibm
                                ;;
+                       -os400*)
+                               vendor=ibm
+                               ;;
                        -ptx*)
                                vendor=sequent
                                ;;
+                       -tpf*)
+                               vendor=ibm
+                               ;;
                        -vxsim* | -vxworks* | -windiss*)
                                vendor=wrs
                                ;;
index cad301ffabd6ba07ed0bb6c4b16cacc865fd3165..100fcb097d188fb3cc824d96d3c42502ee9725e6 100644 (file)
@@ -3,7 +3,7 @@
 #
 #  Author: Pekka Riikonen <priikone@silcnet.org>
 #
-#  Copyright (C) 2000 - 2002 Pekka Riikonen
+#  Copyright (C) 2000 - 2004 Pekka Riikonen
 #
 #  This program is free software; you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
@@ -62,7 +62,9 @@ libsilcutil_la_SOURCES = \
        silcsockconn.c  \
        silcprotocol.c  \
        silcvcard.c     \
-       silcapputil.c
+       silcapputil.c   \
+       silcutf8.c      \
+       silcstringprep.c
 
 if SILC_DIST_TOOLKIT
 include_HEADERS =      \
@@ -86,6 +88,8 @@ include_HEADERS =     \
        silcstrutil.h   \
        silcvcard.h     \
        silcapputil.h   \
+       silcutf8.h      \
+       silcstringprep.h        \
        silctypes.h
 endif
 
index ccbb8efc29378c64ce75eef8a3d382868c31fad0..31351ec8a0d4e612b7d337541a16363fa444ad07 100644 (file)
@@ -4,7 +4,7 @@
 
   Author: Giovanni Giacobbi <giovanni@giacobbi.net>
 
-  Copyright (C) 1997 - 2004 Pekka Riikonen
+  Copyright (C) 1997 - 2005 Pekka Riikonen
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -361,7 +361,7 @@ void silc_log_reset_all() {
 
 /* Outputs the debug message to stderr. */
 
-void silc_log_output_debug(char *file, char *function,
+void silc_log_output_debug(char *file, const char *function,
                           int line, char *string)
 {
   if (!silc_debug)
@@ -373,7 +373,7 @@ void silc_log_output_debug(char *file, char *function,
     goto end;
 
   if (silc_log_debug_cb) {
-    if ((*silc_log_debug_cb)(file, function, line, string,
+    if ((*silc_log_debug_cb)(file, (char *)function, line, string,
                             silc_log_debug_context))
       goto end;
   }
@@ -387,7 +387,7 @@ void silc_log_output_debug(char *file, char *function,
 
 /* Hexdumps a message */
 
-void silc_log_output_hexdump(char *file, char *function,
+void silc_log_output_hexdump(char *file, const char *function,
                             int line, void *data_in,
                             SilcUInt32 len, char *string)
 {
@@ -404,7 +404,8 @@ void silc_log_output_hexdump(char *file, char *function,
     goto end;
 
   if (silc_log_hexdump_cb) {
-    if ((*silc_log_hexdump_cb)(file, function, line, data_in, len, string,
+    if ((*silc_log_hexdump_cb)(file, (char *)function, line,
+                              data_in, len, string,
                               silc_log_hexdump_context))
       goto end;
   }
index eae965e049e48b69d97c60bfeb108eb0a81da16f..624120356e8dbd9a35752e7559832ac19d199aa6 100644 (file)
@@ -4,7 +4,7 @@
 
   Author: Giovanni Giacobbi <giovanni@giacobbi.net>
 
-  Copyright (C) 1997 - 2003 Pekka Riikonen
+  Copyright (C) 1997 - 2005 Pekka Riikonen
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -569,7 +569,7 @@ void silc_log_reset_all();
  *
  * SYNOPSIS
  *
- *    void silc_log_output_debug(char *file, char *function,
+ *    void silc_log_output_debug(char *file, const char *function,
  *                               int line, char *string);
  *
  * DESCRIPTION
@@ -581,7 +581,7 @@ void silc_log_reset_all();
  *    dynamic allocated (null-terminated) buffer.
  *
  ***/
-void silc_log_output_debug(char *file, char *function,
+void silc_log_output_debug(char *file, const char *function,
                           int line, char *string);
 
 /****f* silcutil/SilcLogAPI/silc_log_output_hexdump
@@ -602,7 +602,7 @@ void silc_log_output_debug(char *file, char *function,
  *    `string' must be a dynamic allocated (null-terminated) buffer.
  *
  ***/
-void silc_log_output_hexdump(char *file, char *function,
+void silc_log_output_hexdump(char *file, const char *function,
                             int line, void *data_in,
                             SilcUInt32 len, char *string);
 
diff --git a/lib/silcutil/silcstringprep.c b/lib/silcutil/silcstringprep.c
new file mode 100644 (file)
index 0000000..769fb41
--- /dev/null
@@ -0,0 +1,199 @@
+/*
+
+  silcstringprep.c
+
+  Author: Pekka Riikonen <priikone@silcnet.org>
+
+  Copyright (C) 2004 - 2005 Pekka Riikonen
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+*/
+
+#include "silcincludes.h"
+#include "silcstringprep.h"
+#include <stringprep.h>
+
+/* We use GNU Libidn which has stringprep to do the magic.  Only bad thing
+   is that its interface is idiotic.  We have our own API here in case
+   we'll implement it ourselves later. */
+
+/* Prohibited characters as defined by the protocol in Appendix B */
+const Stringprep_table_element silc_appendix_b[] =
+{
+  {0x000021}, {0x00002A}, {0x00002C}, {0x00003F}, {0x000040},
+  {0x0000A2, 0x0000A9},
+  {0x0000AC}, {0x0000AE}, {0x0000AF}, {0x0000B0}, {0x0000B1}, {0x0000B4},
+  {0x0000B6}, {0x0000B8}, {0x0000D7}, {0x0000F7},
+  {0x0002C2, 0x0002C5}, {0x0002D2, 0x0002FF},
+  {0x000374}, {0x000375}, {0x000384}, {0x000385}, {0x0003F6}, {0x000482},
+  {0x00060E}, {0x00060F}, {0x0006E9}, {0x0006FD}, {0x0006FE}, {0x0009F2},
+  {0x0009F3}, {0x0009FA}, {0x000AF1}, {0x000B70},
+  {0x000BF3, 0x000BFA}, {0x000E3F},
+  {0x000F01, 0x000F03}, {0x000F13, 0x000F17}, {0x000F1A, 0x000F1F},
+  {0x000F34}, {0x000F36}, {0x000F38}, {0x000FBE}, {0x000FBF},
+  {0x000FC0, 0x000FC5}, {0x000FC7, 0x000FCF}, {0x0017DB}, {0x001940},
+  {0x0019E0, 0x0019FF}, {0x001FBD}, {0x001FBF, 0x001FC1},
+  {0x001FCD, 0x001FCF}, {0x001FDD, 0x001FDF}, {0x001FED, 0x001FEF},
+  {0x001FFD}, {0x001FFE}, {0x002044}, {0x002052}, {0x00207A, 0x00207C},
+  {0x00208A, 0x00208C}, {0x0020A0, 0x0020B1}, {0x002100, 0x00214F},
+  {0x002150, 0x00218F}, {0x002190, 0x0021FF}, {0x002200, 0x0022FF},
+  {0x002300, 0x0023FF}, {0x002400, 0x00243F}, {0x002440, 0x00245F},
+  {0x002460, 0x0024FF}, {0x002500, 0x00257F}, {0x002580, 0x00259F},
+  {0x0025A0, 0x0025FF}, {0x002600, 0x0026FF}, {0x002700, 0x0027BF},
+  {0x0027C0, 0x0027EF}, {0x0027F0, 0x0027FF}, {0x002800, 0x0028FF},
+  {0x002900, 0x00297F}, {0x002980, 0x0029FF}, {0x002A00, 0x002AFF},
+  {0x002B00, 0x002BFF}, {0x002E9A}, {0x002EF4, 0x002EFF},
+  {0x002FF0, 0x002FFF}, {0x00303B, 0x00303D}, {0x003040},
+  {0x003095, 0x003098}, {0x00309F, 0x0030A0}, {0x0030FF, 0x003104},
+  {0x00312D, 0x003130}, {0x00318F}, {0x0031B8, 0x0031FF},
+  {0x00321D, 0x00321F}, {0x003244, 0x00325F}, {0x00327C, 0x00327E},
+  {0x0032B1, 0x0032BF}, {0x0032CC, 0x0032CF}, {0x0032FF},
+  {0x003377, 0x00337A}, {0x0033DE, 0x0033DF}, {0x0033FF},
+  {0x004DB6, 0x004DFF},
+  {0x009FA6, 0x009FFF}, {0x00A48D, 0x00A48F}, {0x00A4A2, 0x00A4A3},
+  {0x00A4B4}, {0x00A4C1}, {0x00A4C5}, {0x00A4C7, 0x00ABFF},
+  {0x00D7A4, 0x00D7FF}, {0x00FA2E, 0x00FAFF}, {0x00FFE0, 0x00FFEE},
+  {0x00FFFC}, {0x010000, 0x01007F}, {0x010080, 0x0100FF},
+  {0x010100, 0x01013F}, {0x01D000, 0x01D0FF}, {0x01D100, 0x01D1FF},
+  {0x01D300, 0x01D35F}, {0x01D400, 0x01D7FF},
+  {0x0E0100, 0x0E01EF},
+  {0}
+};
+
+/* Default SILC Identifier String profile defined by the protocol */
+const Stringprep_profile stringprep_silc_identifier_prep[] =
+{
+  {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1},
+  {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_2},
+  {STRINGPREP_NFKC, 0, 0},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_1},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_2},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_7},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9},
+  {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9},
+  {STRINGPREP_PROHIBIT_TABLE, 0, silc_appendix_b},
+  {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED,
+   stringprep_rfc3454_A_1},
+  {0}
+};
+
+/* Prepares string according to the profile */
+
+SilcStringprepStatus
+silc_stringprep(const unsigned char *bin, SilcUInt32 bin_len,
+               SilcStringEncoding bin_encoding,
+               const char *profile_name,
+               SilcStringprepFlags flags,
+               unsigned char **out, SilcUInt32 *out_len,
+               SilcStringEncoding out_encoding)
+{
+  Stringprep_profile_flags f = 0;
+  const Stringprep_profile *profile;
+  unsigned char *utf8s;
+  SilcUInt32 utf8s_len;
+  int ret;
+
+  if (!bin || !bin_len || !profile_name)
+    return SILC_STRINGPREP_ERR;
+
+  /* Convert string to UTF-8 */
+  utf8s_len = silc_utf8_encoded_len(bin, bin_len, bin_encoding);
+  if (!utf8s_len)
+    return SILC_STRINGPREP_ERR_ENCODING;
+  utf8s = silc_calloc(utf8s_len + 1, sizeof(*utf8s));
+  if (!utf8s)
+    return SILC_STRINGPREP_ERR_OUT_OF_MEMORY;
+  silc_utf8_encode(bin, bin_len, bin_encoding, utf8s, utf8s_len);
+
+  /* Check profile. */
+  if (!strcmp(profile_name, SILC_IDENTIFIER_PREP))
+    profile = stringprep_silc_identifier_prep;
+  else
+    return SILC_STRINGPREP_ERR_UNSUP_PROFILE;
+
+  /* Translate flags */
+  if (!(flags & SILC_STRINGPREP_ALLOW_UNASSIGNED))
+    f |= STRINGPREP_NO_UNASSIGNED;
+
+  /* Prepare */
+  ret = stringprep((char *)utf8s, utf8s_len, f, profile);
+  switch (ret) {
+  case STRINGPREP_OK:
+    ret = SILC_STRINGPREP_OK;
+    break;
+
+  case STRINGPREP_CONTAINS_UNASSIGNED:
+    ret = SILC_STRINGPREP_ERR_UNASSIGNED;
+    break;
+
+  case STRINGPREP_CONTAINS_PROHIBITED:
+    ret = SILC_STRINGPREP_ERR_PROHIBITED;
+    break;
+
+  case STRINGPREP_BIDI_BOTH_L_AND_RAL:
+    ret = SILC_STRINGPREP_ERR_BIDI_RAL_WITH_L;
+    break;
+
+  case STRINGPREP_BIDI_LEADTRAIL_NOT_RAL:
+    ret = SILC_STRINGPREP_ERR_BIDI_RAL;
+    break;
+
+  case STRINGPREP_BIDI_CONTAINS_PROHIBITED:
+    ret = SILC_STRINGPREP_ERR_BIDI_PROHIBITED;
+    break;
+
+  case STRINGPREP_UNKNOWN_PROFILE:
+    ret = SILC_STRINGPREP_ERR_UNSUP_PROFILE;
+    break;
+
+  case STRINGPREP_MALLOC_ERROR:
+    ret = SILC_STRINGPREP_ERR_OUT_OF_MEMORY;
+    break;
+
+  default:
+    ret = SILC_STRINGPREP_ERR;
+    break;
+  }
+
+  /* Convert to desired output character encoding */
+  if (ret == SILC_STRINGPREP_OK) {
+    if (out && out_len) {
+      if (out_encoding != SILC_STRING_UTF8) {
+       *out_len = silc_utf8_decoded_len(utf8s, strlen(utf8s), out_encoding);
+       if (*out_len) {
+         *out = silc_calloc(*out_len + 1, sizeof(**out));
+         if (*out) {
+           silc_utf8_decode(utf8s, strlen(utf8s), out_encoding, *out,
+                            *out_len);
+         } else {
+           ret = SILC_STRINGPREP_ERR_OUT_OF_MEMORY;
+         }
+       } else {
+         ret = SILC_STRINGPREP_ERR_ENCODING;
+       }
+      } else {
+       *out_len = strlen(utf8s);
+       *out = silc_memdup(utf8s, *out_len);
+      }
+    }
+  }
+
+  silc_free(utf8s);
+
+  return (SilcStringprepStatus)ret;
+}
diff --git a/lib/silcutil/silcstringprep.h b/lib/silcutil/silcstringprep.h
new file mode 100644 (file)
index 0000000..aa061b5
--- /dev/null
@@ -0,0 +1,132 @@
+/*
+
+  silcstringprep.h
+
+  Author: Pekka Riikonen <priikone@silcnet.org>
+
+  Copyright (C) 2004 - 2005 Pekka Riikonen
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+*/
+
+/****h* silcutil/SILC Stringprep
+ *
+ * DESCRIPTION
+ *
+ * Interface for the stringprep (RFC3454) standard, that is used to prepare
+ * strings for internationalization.  The interface can be used to prepare
+ * strings according to various stringprep profiles.  The profiles defines
+ * what characters the strings may contain, what characters are prohibited
+ * and how the strings are prepared.
+ *
+ ***/
+
+#ifndef SILCSTRINGPREP_H
+#define SILCSTRINGPREP_H
+
+/****d* silcutil/SilcStringprep/SilcStringprepStatus
+ *
+ * NAME
+ *
+ *    typedef enum { ... } SilcStringprepStatus;
+ *
+ * DESCRIPTION
+ *
+ *    Status and errors returned by silc_stringprep.
+ *
+ * SOURCE
+ */
+typedef enum {
+  SILC_STRINGPREP_OK,                    /* Preparation success */
+  SILC_STRINGPREP_ERR_UNASSIGNED,         /* Contains unassigned characters */
+  SILC_STRINGPREP_ERR_PROHIBITED,        /* Contains prohibited characters */
+  SILC_STRINGPREP_ERR_BIDI_PROHIBITED,   /* BIDI contains prohibited chars */
+  SILC_STRINGPREP_ERR_BIDI_RAL_WITH_L,   /* BIDI has both R/AL and L */
+  SILC_STRINGPREP_ERR_BIDI_RAL,                  /* BIDI has R/AL but not as leading
+                                            and/or trailing character. */
+  SILC_STRINGPREP_ERR_OUT_OF_MEMORY,     /* System out of memory */
+  SILC_STRINGPREP_ERR_ENCODING,                  /* Character encoding error */
+  SILC_STRINGPREP_ERR_UNSUP_ENCODING,     /* Unsupported character encoding  */
+  SILC_STRINGPREP_ERR_UNSUP_PROFILE,     /* Unsupported profile */
+  SILC_STRINGPREP_ERR,                   /* Unknown error */
+} SilcStringprepStatus;
+/***/
+
+/****d* silcutil/SilcStringprep/SilcStringprepFlags
+ *
+ * NAME
+ *
+ *    typedef enum { ... } SilcStringprepFlags;
+ *
+ * DESCRIPTION
+ *
+ *    Flags that change how the strings are prepared with silc_stringprep.
+ *
+ * SOURCE
+ */
+typedef enum {
+  SILC_STRINGPREP_NONE               = 0x00,  /* No flags */
+  SILC_STRINGPREP_ALLOW_UNASSIGNED   = 0x01,  /* Allow unassigned characters
+                                                without returning error. */
+} SilcStringprepFlags;
+/***/
+
+/* Profiles */
+#define SILC_IDENTIFIER_PREP "silc-identifier-prep"
+
+/****f* silcutil/SilcStringprep/silc_stringprep
+ *
+ * SYNOPSIS
+ *
+ *    SilcStringprepStatus
+ *    silc_stringprep(const unsigned char *bin, SilcUInt32 bin_len,
+ *                    SilcStringEncoding bin_encoding,
+ *                    const char *profile_name,
+ *                    SilcStringprepFlags flags,
+ *                    unsigned char **out, SilcUInt32 *out_len,
+ *                    SilcStringEncoding out_encoding);
+ *
+ * DESCRIPTION
+ *
+ *    Prepares the input string 'bin' of length 'bin_len' of encoding
+ *    'bin_encoding' according to the stringrep profile 'profile_name'.
+ *    Returns the prepared and allocated string into 'out'.  The 'out_len'
+ *    indicates the length of the prepared string.  This returns the
+ *    SilcStringprepStatus which indicates the status of the preparation.
+ *    For example, if the input string contains prohibited characters
+ *    (according to the used profile) this function will return error.
+ *    The 'flags' however can be used to modify the behavior of this
+ *    function.  Caller must free the returned 'out' string.
+ *
+ *    The output string will be encoded into the character encoding
+ *    defined by the 'out_encoding'.  This allows caller to have for
+ *    example the input string as locale specific string and output string
+ *    as UTF-8 encoded string.
+ *
+ *    If the 'out' is NULL this function merely performs the preparation
+ *    process, but does not return anything.  In this case this function
+ *    could be used to for example verify that an input string that ought
+ *    to have been prepared correctly was done so.
+ *
+ *    Available profile names:
+ *
+ *      SILC_IDENTIFIER_PREP
+ *
+ ***/
+SilcStringprepStatus
+silc_stringprep(const unsigned char *bin, SilcUInt32 bin_len,
+               SilcStringEncoding bin_encoding,
+               const char *profile_name,
+               SilcStringprepFlags flags,
+               unsigned char **out, SilcUInt32 *out_len,
+               SilcStringEncoding out_encoding);
+
+#endif /* SILCSTRINGPREP_H */
index f3cfdaad0c64a28ba80c3c7e3bf20a844889cd80..d998ebbedc27b6dc005531d54284c52b1ebacad5 100644 (file)
@@ -170,366 +170,6 @@ unsigned char *silc_pem_decode(unsigned char *pem, SilcUInt32 pem_len,
   return data;
 }
 
-/* Encodes the string `bin' of which encoding is `bin_encoding' to the
-   UTF-8 encoding into the buffer `utf8' which is of size of `utf8_size'.
-   Returns the length of the UTF-8 encoded string, or zero (0) on error.
-   By default `bin_encoding' is ASCII, and the caller needs to know the
-   encoding of the input string if it is anything else. */
-
-SilcUInt32 silc_utf8_encode(const unsigned char *bin, SilcUInt32 bin_len,
-                           SilcStringEncoding bin_encoding,
-                           unsigned char *utf8, SilcUInt32 utf8_size)
-{
-  SilcUInt32 enclen = 0, i, charval = 0;
-
-  if (!bin || !bin_len)
-    return 0;
-
-  if (silc_utf8_valid(bin, bin_len) && bin_len <= utf8_size) {
-    memcpy(utf8, bin, bin_len);
-    return bin_len;
-  }
-
-  if (bin_encoding == SILC_STRING_LANGUAGE) {
-#if defined(HAVE_ICONV) && defined(HAVE_NL_LANGINFO) && defined(CODESET)
-    char *fromconv, *icp, *ocp;
-    iconv_t icd;
-    size_t inlen, outlen;
-
-    setlocale(LC_CTYPE, "");
-    fromconv = nl_langinfo(CODESET);
-    if (fromconv && strlen(fromconv)) {
-      icd = iconv_open("UTF-8", fromconv);
-      icp = (char *)bin;
-      ocp = (char *)utf8;
-      inlen = bin_len;
-      outlen = utf8_size;
-      if (icp && ocp && icd != (iconv_t)-1) {
-       if (iconv(icd, &icp, &inlen, &ocp, &outlen) != -1) {
-         utf8_size -= outlen;
-         iconv_close(icd);
-         return utf8_size;
-       }
-      }
-      if (icd != (iconv_t)-1)
-       iconv_close(icd);
-    }
-#endif
-
-    /* Fallback to 8-bit ASCII */
-    bin_encoding = SILC_STRING_ASCII;
-  }
-
-  for (i = 0; i < bin_len; i++) {
-    switch (bin_encoding) {
-    case SILC_STRING_ASCII:
-      charval = bin[i];
-      break;
-    case SILC_STRING_ASCII_ESC:
-      SILC_NOT_IMPLEMENTED("SILC_STRING_ASCII_ESC");
-      return 0;
-      break;
-    case SILC_STRING_BMP:
-      if (i + 1 >= bin_len)
-       return 0;
-      SILC_GET16_MSB(charval, bin + i);
-      i += 1;
-      break;
-    case SILC_STRING_BMP_LSB:
-      if (i + 1 >= bin_len)
-       return 0;
-      SILC_GET16_LSB(charval, bin + i);
-      i += 1;
-      break;
-    case SILC_STRING_UNIVERSAL:
-      if (i + 3 >= bin_len)
-       return 0;
-      SILC_GET32_MSB(charval, bin + i);
-      i += 3;
-      break;
-    case SILC_STRING_UNIVERSAL_LSB:
-      if (i + 3 >= bin_len)
-       return 0;
-      SILC_GET32_LSB(charval, bin + i);
-      i += 3;
-      break;
-    default:
-      return 0;
-      break;
-    }
-
-    if (charval < 0x80) {
-      if (utf8) {
-       if (enclen > utf8_size)
-         return 0;
-
-       utf8[enclen] = (unsigned char)charval;
-      }
-      enclen++;
-    } else if (charval < 0x800) {
-      if (utf8) {
-       if (enclen + 2 > utf8_size)
-         return 0;
-
-       utf8[enclen    ] = (unsigned char )(((charval >> 6)  & 0x1f) | 0xc0);
-       utf8[enclen + 1] = (unsigned char )((charval & 0x3f) | 0x80);
-      }
-      enclen += 2;
-    } else if (charval < 0x10000) {
-      if (utf8) {
-       if (enclen + 3 > utf8_size)
-         return 0;
-
-       utf8[enclen    ] = (unsigned char )(((charval >> 12) & 0xf)  | 0xe0);
-       utf8[enclen + 1] = (unsigned char )(((charval >> 6)  & 0x3f) | 0x80);
-       utf8[enclen + 2] = (unsigned char )((charval & 0x3f) | 0x80);
-      }
-      enclen += 3;
-    } else if (charval < 0x200000) {
-      if (utf8) {
-       if (enclen + 4 > utf8_size)
-         return 0;
-
-       utf8[enclen    ] = (unsigned char )(((charval >> 18) & 0x7)  | 0xf0);
-       utf8[enclen + 1] = (unsigned char )(((charval >> 12) & 0x3f) | 0x80);
-       utf8[enclen + 2] = (unsigned char )(((charval >> 6)  & 0x3f) | 0x80);
-       utf8[enclen + 3] = (unsigned char )((charval & 0x3f) | 0x80);
-      }
-      enclen += 4;
-    } else if (charval < 0x4000000) {
-      if (utf8) {
-       if (enclen + 5 > utf8_size)
-         return 0;
-
-       utf8[enclen    ] = (unsigned char )(((charval >> 24) & 0x3)  | 0xf8);
-       utf8[enclen + 1] = (unsigned char )(((charval >> 18) & 0x3f) | 0x80);
-       utf8[enclen + 2] = (unsigned char )(((charval >> 12) & 0x3f) | 0x80);
-       utf8[enclen + 3] = (unsigned char )(((charval >> 6)  & 0x3f) | 0x80);
-       utf8[enclen + 4] = (unsigned char )((charval & 0x3f) | 0x80);
-      }
-      enclen += 5;
-    } else {
-      if (utf8) {
-       if (enclen + 6 > utf8_size)
-         return 0;
-
-       utf8[enclen    ] = (unsigned char )(((charval >> 30) & 0x1)  | 0xfc);
-       utf8[enclen + 1] = (unsigned char )(((charval >> 24) & 0x3f) | 0x80);
-       utf8[enclen + 2] = (unsigned char )(((charval >> 18) & 0x3f) | 0x80);
-       utf8[enclen + 3] = (unsigned char )(((charval >> 12) & 0x3f) | 0x80);
-       utf8[enclen + 4] = (unsigned char )(((charval >> 6)  & 0x3f) | 0x80);
-       utf8[enclen + 5] = (unsigned char )((charval & 0x3f) | 0x80);
-      }
-      enclen += 6;
-    }
-  }
-
-  return enclen;
-}
-
-/* Decodes UTF-8 encoded string `utf8' to string of which encoding is
-   to be `bin_encoding', into the `bin' buffer of size of `bin_size'.
-   Returns the length of the decoded buffer, or zero (0) on error.
-   By default `bin_encoding' is ASCII, and the caller needs to know to
-   which encoding the output string is to be encoded if ASCII is not
-   desired. */
-
-SilcUInt32 silc_utf8_decode(const unsigned char *utf8, SilcUInt32 utf8_len,
-                           SilcStringEncoding bin_encoding,
-                           unsigned char *bin, SilcUInt32 bin_size)
-{
-  SilcUInt32 enclen = 0, i, charval;
-
-  if (!utf8 || !utf8_len)
-    return 0;
-
-  if (bin_encoding == SILC_STRING_LANGUAGE) {
-#if defined(HAVE_ICONV) && defined(HAVE_NL_LANGINFO) && defined(CODESET)
-    char *toconv, *icp, *ocp;
-    iconv_t icd;
-    size_t inlen, outlen;
-
-    setlocale(LC_CTYPE, "");
-    toconv = nl_langinfo(CODESET);
-    if (toconv && strlen(toconv)) {
-      icd = iconv_open(toconv, "UTF-8");
-      icp = (char *)utf8;
-      ocp = (char *)bin;
-      inlen = utf8_len;
-      outlen = bin_size;
-      if (icp && ocp && icd != (iconv_t)-1) {
-       if (iconv(icd, &icp, &inlen, &ocp, &outlen) != -1) {
-         bin_size -= outlen;
-         iconv_close(icd);
-         return bin_size;
-       }
-      }
-      if (icd != (iconv_t)-1)
-       iconv_close(icd);
-    }
-#endif
-
-    /* Fallback to 8-bit ASCII */
-    bin_encoding = SILC_STRING_ASCII;
-  }
-
-  for (i = 0; i < utf8_len; i++) {
-    if ((utf8[i] & 0x80) == 0x00) {
-      charval = utf8[i] & 0x7f;
-    } else if ((utf8[i] & 0xe0) == 0xc0) {
-      if (i + 1 >= utf8_len)
-       return 0;
-
-      if ((utf8[i + 1] & 0xc0) != 0x80)
-        return 0;
-
-      charval = (utf8[i++] & 0x1f) << 6;
-      charval |= utf8[i] & 0x3f;
-      if (charval < 0x80)
-        return 0;
-    } else if ((utf8[i] & 0xf0) == 0xe0) {
-      if (i + 2 >= utf8_len)
-       return 0;
-
-      if (((utf8[i + 1] & 0xc0) != 0x80) ||
-         ((utf8[i + 2] & 0xc0) != 0x80))
-        return 0;
-
-      /* Surrogates not allowed (D800-DFFF) */
-      if (utf8[i] == 0xed &&
-         utf8[i + 1] >= 0xa0 && utf8[i + 1] <= 0xbf &&
-         utf8[i + 2] >= 0x80 && utf8[i + 2] <= 0xbf)
-       return 0;
-
-      charval = (utf8[i++]  & 0xf)  << 12;
-      charval |= (utf8[i++] & 0x3f) << 6;
-      charval |= utf8[i] & 0x3f;
-      if (charval < 0x800)
-        return 0;
-    } else if ((utf8[i] & 0xf8) == 0xf0) {
-      if (i + 3 >= utf8_len)
-       return 0;
-
-      if (((utf8[i + 1] & 0xc0) != 0x80) ||
-         ((utf8[i + 2] & 0xc0) != 0x80) ||
-         ((utf8[i + 3] & 0xc0) != 0x80))
-        return 0;
-
-      charval = ((SilcUInt32)(utf8[i++] & 0x7)) << 18;
-      charval |= (utf8[i++] & 0x3f) << 12;
-      charval |= (utf8[i++] & 0x3f) << 6;
-      charval |= utf8[i] & 0x3f;
-      if (charval < 0x10000)
-        return 0;
-    } else if ((utf8[i] & 0xfc) == 0xf8) {
-      if (i + 4 >= utf8_len)
-       return 0;
-
-      if (((utf8[i + 1] & 0xc0) != 0x80) ||
-         ((utf8[i + 2] & 0xc0) != 0x80) ||
-         ((utf8[i + 3] & 0xc0) != 0x80) ||
-         ((utf8[i + 4] & 0xc0) != 0x80))
-        return 0;
-
-      charval = ((SilcUInt32)(utf8[i++]  & 0x3))  << 24;
-      charval |= ((SilcUInt32)(utf8[i++] & 0x3f)) << 18;
-      charval |= ((SilcUInt32)(utf8[i++] & 0x3f)) << 12;
-      charval |= (utf8[i++] & 0x3f) << 6;
-      charval |= utf8[i] & 0x3f;
-      if (charval < 0x200000)
-        return 0;
-    } else if ((utf8[i] & 0xfe) == 0xfc) {
-      if (i + 5 >= utf8_len)
-       return 0;
-
-      if (((utf8[i + 1] & 0xc0) != 0x80) ||
-         ((utf8[i + 2] & 0xc0) != 0x80) ||
-         ((utf8[i + 3] & 0xc0) != 0x80) ||
-         ((utf8[i + 4] & 0xc0) != 0x80) ||
-         ((utf8[i + 5] & 0xc0) != 0x80))
-        return 0;
-
-      charval = ((SilcUInt32)(utf8[i++]  & 0x1))  << 30;
-      charval |= ((SilcUInt32)(utf8[i++] & 0x3f)) << 24;
-      charval |= ((SilcUInt32)(utf8[i++] & 0x3f)) << 18;
-      charval |= ((SilcUInt32)(utf8[i++] & 0x3f)) << 12;
-      charval |= (utf8[i++] & 0x3f) << 6;
-      charval |= utf8[i] & 0x3f;
-      if (charval < 0x4000000)
-        return 0;
-    } else {
-      return 0;
-    }
-
-    switch (bin_encoding) {
-    case SILC_STRING_ASCII:
-      if (bin) {
-        if (enclen + 1 > bin_size)
-          return 0;
-
-        bin[enclen] = (unsigned char)charval;
-      }
-      enclen++;
-      break;
-    case SILC_STRING_ASCII_ESC:
-      SILC_NOT_IMPLEMENTED("SILC_STRING_ASCII_ESC");
-      return 0;
-      break;
-    case SILC_STRING_BMP:
-      if (bin)
-       SILC_PUT16_MSB(charval, bin + enclen);
-      enclen += 2;
-      break;
-    case SILC_STRING_BMP_LSB:
-      if (bin)
-       SILC_PUT16_LSB(charval, bin + enclen);
-      enclen += 2;
-      break;
-    case SILC_STRING_UNIVERSAL:
-      if (bin)
-       SILC_PUT32_MSB(charval, bin + enclen);
-      enclen += 4;
-      break;
-    case SILC_STRING_UNIVERSAL_LSB:
-      if (bin)
-       SILC_PUT32_LSB(charval, bin + enclen);
-      enclen += 4;
-      break;
-    default:
-      return 0;
-      break;
-    }
-  }
-
-  return enclen;
-}
-
-/* Returns the length of UTF-8 encoded string if the `bin' of
-   encoding of `bin_encoding' is encoded with silc_utf8_encode. */
-
-SilcUInt32 silc_utf8_encoded_len(const unsigned char *bin, SilcUInt32 bin_len,
-                                SilcStringEncoding bin_encoding)
-{
-  return silc_utf8_encode(bin, bin_len, bin_encoding, NULL, 0);
-}
-
-/* Returns the length of decoded string if the `bin' of encoding of
-   `bin_encoding' is decoded with silc_utf8_decode. */
-
-SilcUInt32 silc_utf8_decoded_len(const unsigned char *bin, SilcUInt32 bin_len,
-                                SilcStringEncoding bin_encoding)
-{
-  return silc_utf8_decode(bin, bin_len, bin_encoding, NULL, 0);
-}
-
-/* Returns TRUE if the `utf8' string of length of `utf8_len' is valid
-   UTF-8 encoded string, FALSE if it is not UTF-8 encoded string. */
-
-bool silc_utf8_valid(const unsigned char *utf8, SilcUInt32 utf8_len)
-{
-  return silc_utf8_decode(utf8, utf8_len, 0, NULL, 0) != 0;
-}
-
 /* Mime constants and macros */
 #define MIME_VERSION "MIME-Version: "
 #define MIME_VERSION_LEN 14
index 35e9781227bbbf520bceafcf5318ba32ab9d476e..6525df50b0ff3f160a4de5ed952da1195babf67c 100644 (file)
@@ -1,10 +1,10 @@
 /*
 
-  silcstrutil.h 
+  silcstrutil.h
 
   Author: Pekka Riikonen <priikone@silcnet.org>
 
-  Copyright (C) 2002 - 2003 Pekka Riikonen
+  Copyright (C) 2002 - 2004 Pekka Riikonen
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
 #ifndef SILCSTRUTIL_H
 #define SILCSTRUTIL_H
 
-/****f* silcutil/SilcStrUtilAPI/silc_pem_encode
- *
- * SYNOPSIS
- *
- *    char *silc_pem_encode(unsigned char *data, SilcUInt32 len);
- *
- * DESCRIPTION
- *
- *    Encodes data into PEM encoding. Returns NULL terminated PEM encoded
- *    data string. Note: This is originally public domain code and is
- *    still PD.
- *
- ***/
-char *silc_pem_encode(unsigned char *data, SilcUInt32 len);
-
-/****f* silcutil/SilcStrUtilAPI/silc_pem_encode_file
- *
- * SYNOPSIS
- *
- *    char *silc_pem_encode_file(unsigned char *data, SilcUInt32 data_len);
- *
- * DESCRIPTION
- *
- *    Same as silc_pem_encode() but puts newline ('\n') every 72 characters.
- *
- ***/
-char *silc_pem_encode_file(unsigned char *data, SilcUInt32 data_len);
-
-/****f* silcutil/SilcStrUtilAPI/silc_pem_decode
- *
- * SYNOPSIS
- *
- *    unsigned char *silc_pem_decode(unsigned char *pem, SilcUInt32 pem_len,
- *                                   SilcUInt32 *ret_len);
- *
- * DESCRIPTION
- *
- *    Decodes PEM into data. Returns the decoded data. Note: This is
- *    originally public domain code and is still PD.
- *
- ***/
-unsigned char *silc_pem_decode(unsigned char *pem, SilcUInt32 pem_len,
-                              SilcUInt32 *ret_len);
-
 /****d* silcutil/SilcStrUtilAPI/SilcStringEncoding
  *
  * NAME
- * 
+ *
  *    typedef enum { ... } SilcStringEncoding;
  *
  * DESCRIPTION
  *
- *    String encoding definitions used with the UTF-8 encoding and
- *    decoding functions.  By default, systems should use SILC_STRING_LANGUAGE
- *    since it encodes and decodes correctly according to local system
- *    language and character set.
+ *    String encoding definitions used with various string manipulation
+ *    routines.  By default, applications are suggested to use
+ *    SILC_STRING_LOCALE since it encodes and decodes correctly according
+ *    to local system language and character set (locale).
  *
  * SOURCE
  */
@@ -96,103 +52,63 @@ typedef enum {
   SILC_STRING_BMP_LSB       = 3,  /* BMP, least significant byte first */
   SILC_STRING_UNIVERSAL     = 4,  /* 32 bit, UCS-4, Universal, ISO/IEC 10646 */
   SILC_STRING_UNIVERSAL_LSB = 5,  /* Universal, least significant byte first */
-  SILC_STRING_LANGUAGE      = 6,  /* Language and charset specific conversion
-                                    on those platforms that support iconv().
+  SILC_STRING_LOCALE        = 6,  /* A locale specific conversion on
+                                    those platforms that support iconv().
                                     Fallback is SILC_STRING_ASCII. */
+  SILC_STRING_UTF8          = 7,  /* UTF-8 encoding */
+  SILC_STRING_PRINTABLE     = 8,  /* Printable ASCII (no escaping) */
+  SILC_STRING_VISIBLE       = 9,  /* Visible ASCII string */
+  SILC_STRING_TELETEX       = 10, /* Teletex ASCII string */
+  SILC_STRING_NUMERICAL     = 11, /* Numerical ASCII string (digits) */
+  SILC_STRING_LDAP_DN       = 12, /* Strings for LDAP DNs, RFC 2253 */
+
+  SILC_STRING_LANGUAGE      = 6,  /* _Deprecated_, use SILC_STRING_LOCALE. */
 } SilcStringEncoding;
 /***/
 
-/****f* silcutil/SilcStrUtilAPI/silc_utf8_encode
- *
- * SYNOPSIS
- *
- *    SilcUInt32 silc_utf8_encode(const unsigned char *bin, SilcUInt32 bin_len,
- *                                SilcStringEncoding bin_encoding,
- *                                unsigned char *utf8, SilcUInt32 utf8_size);
- *
- * DESCRIPTION
- *
- *    Encodes the string `bin' of which encoding is `bin_encoding' to the
- *    UTF-8 encoding into the buffer `utf8' which is of size of `utf8_size'.
- *    Returns the length of the UTF-8 encoded string, or zero (0) on error.
- *    By default `bin_encoding' is ASCII, and the caller needs to know the
- *    encoding of the input string if it is anything else.
- *
- ***/
-SilcUInt32 silc_utf8_encode(const unsigned char *bin, SilcUInt32 bin_len,
-                           SilcStringEncoding bin_encoding,
-                           unsigned char *utf8, SilcUInt32 utf8_size);
-
-/****f* silcutil/SilcStrUtilAPI/silc_utf8_decode
- *
- * SYNOPSIS
- *
- *    SilcUInt32 silc_utf8_decode(const unsigned char *utf8, 
- *                                SilcUInt32 utf8_len,
- *                                SilcStringEncoding bin_encoding,
- *                                unsigned char *bin, SilcUInt32 bin_size);
- *
- * DESCRIPTION
- *
- *    Decodes UTF-8 encoded string `utf8' to string of which encoding is
- *    to be `bin_encoding', into the `bin' buffer of size of `bin_size'.
- *    Returns the length of the decoded buffer, or zero (0) on error.
- *    By default `bin_encoding' is ASCII, and the caller needs to know to
- *    which encoding the output string is to be encoded if ASCII is not
- *    desired. 
- *
- ***/
-SilcUInt32 silc_utf8_decode(const unsigned char *utf8, SilcUInt32 utf8_len,
-                           SilcStringEncoding bin_encoding,
-                           unsigned char *bin, SilcUInt32 bin_size);
-
-/****f* silcutil/SilcStrUtilAPI/silc_utf8_encoded_len
+/****f* silcutil/SilcStrUtilAPI/silc_pem_encode
  *
  * SYNOPSIS
  *
- *    SilcUInt32 silc_utf8_encoded_len(const unsigned char *bin, 
- *                                     SilcUInt32 bin_len,
- *                                     SilcStringEncoding bin_encoding);
+ *    char *silc_pem_encode(unsigned char *data, SilcUInt32 len);
  *
  * DESCRIPTION
  *
- *    Returns the length of UTF-8 encoded string if the `bin' of
- *    encoding of `bin_encoding' is encoded with silc_utf8_encode.
+ *    Encodes data into PEM encoding. Returns NULL terminated PEM encoded
+ *    data string. Note: This is originally public domain code and is
+ *    still PD.
  *
  ***/
-SilcUInt32 silc_utf8_encoded_len(const unsigned char *bin, SilcUInt32 bin_len,
-                                SilcStringEncoding bin_encoding);
+char *silc_pem_encode(unsigned char *data, SilcUInt32 len);
 
-/****f* silcutil/SilcStrUtilAPI/silc_utf8_decoded_len
+/****f* silcutil/SilcStrUtilAPI/silc_pem_encode_file
  *
  * SYNOPSIS
  *
- *    SilcUInt32 silc_utf8_decoded_len(const unsigned char *bin, 
- *                                     SilcUInt32 bin_len,
- *                                     SilcStringEncoding bin_encoding);
+ *    char *silc_pem_encode_file(unsigned char *data, SilcUInt32 data_len);
  *
  * DESCRIPTION
  *
- *    Returns the length of decoded string if the `bin' of encoding of
- *    `bin_encoding' is decoded with silc_utf8_decode. 
+ *    Same as silc_pem_encode() but puts newline ('\n') every 72 characters.
  *
  ***/
-SilcUInt32 silc_utf8_decoded_len(const unsigned char *bin, SilcUInt32 bin_len,
-                                SilcStringEncoding bin_encoding);
+char *silc_pem_encode_file(unsigned char *data, SilcUInt32 data_len);
 
-/****f* silcutil/SilcStrUtilAPI/silc_utf8_valid
+/****f* silcutil/SilcStrUtilAPI/silc_pem_decode
  *
  * SYNOPSIS
  *
- *    bool silc_utf8_valid(const unsigned char *utf8, SilcUInt32 utf8_len);
+ *    unsigned char *silc_pem_decode(unsigned char *pem, SilcUInt32 pem_len,
+ *                                   SilcUInt32 *ret_len);
  *
  * DESCRIPTION
  *
- *    Returns TRUE if the `utf8' string of length of `utf8_len' is valid
- *    UTF-8 encoded string, FALSE if it is not UTF-8 encoded string.
+ *    Decodes PEM into data. Returns the decoded data. Note: This is
+ *    originally public domain code and is still PD.
  *
  ***/
-bool silc_utf8_valid(const unsigned char *utf8, SilcUInt32 utf8_len);
+unsigned char *silc_pem_decode(unsigned char *pem, SilcUInt32 pem_len,
+                              SilcUInt32 *ret_len);
 
 /****f* silcutil/SilcStrUtilAPI/silc_mime_parse
  *
@@ -218,10 +134,10 @@ bool silc_utf8_valid(const unsigned char *utf8, SilcUInt32 utf8_len);
  *    indicating the encoding of the data and copies it to the
  *    `content_transfer_encoding' if provided.
  *
- *    The pointer to the actual data in the MIME object is saved into 
- *    `mime_data_ptr'.  The pointer is a location in the `mime' and it 
- *    does not allocate or copy anything, ie. the `mime_data_ptr' is a 
- *    pointer to the `mime'.  The `mime_data_len' indicates the length of 
+ *    The pointer to the actual data in the MIME object is saved into
+ *    `mime_data_ptr'.  The pointer is a location in the `mime' and it
+ *    does not allocate or copy anything, ie. the `mime_data_ptr' is a
+ *    pointer to the `mime'.  The `mime_data_len' indicates the length of
  *    the data without the MIME header.  The caller is responsible of
  *    NULL terminating the buffers it provides.
  *
diff --git a/lib/silcutil/silcutf8.c b/lib/silcutil/silcutf8.c
new file mode 100644 (file)
index 0000000..4d37c91
--- /dev/null
@@ -0,0 +1,505 @@
+/*
+
+  silcutf8.c
+
+  Author: Pekka Riikonen <priikone@silcnet.org>
+
+  Copyright (C) 2004 Pekka Riikonen
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+*/
+
+#include "silcincludes.h"
+#include "silcutf8.h"
+
+/* Encodes the string `bin' of which encoding is `bin_encoding' to the
+   UTF-8 encoding into the buffer `utf8' which is of size of `utf8_size'.
+   Returns the length of the UTF-8 encoded string, or zero (0) on error.
+   By default `bin_encoding' is ASCII, and the caller needs to know the
+   encoding of the input string if it is anything else. */
+
+SilcUInt32 silc_utf8_encode(const unsigned char *bin, SilcUInt32 bin_len,
+                           SilcStringEncoding bin_encoding,
+                           unsigned char *utf8, SilcUInt32 utf8_size)
+{
+  SilcUInt32 enclen = 0, i, charval = 0;
+
+  if (!bin || !bin_len)
+    return 0;
+
+  if (bin_encoding == SILC_STRING_UTF8 ||
+      (silc_utf8_valid(bin, bin_len) && bin_len <= utf8_size)) {
+    memcpy(utf8, bin, bin_len);
+    return bin_len;
+  }
+
+  if (bin_encoding == SILC_STRING_LOCALE) {
+#if defined(HAVE_ICONV) && defined(HAVE_NL_LANGINFO) && defined(CODESET)
+    char *fromconv, *icp, *ocp;
+    iconv_t icd;
+    size_t inlen, outlen;
+
+    setlocale(LC_CTYPE, "");
+    fromconv = nl_langinfo(CODESET);
+    if (fromconv && strlen(fromconv)) {
+      icd = iconv_open("UTF-8", fromconv);
+      icp = (char *)bin;
+      ocp = (char *)utf8;
+      inlen = bin_len;
+      outlen = utf8_size;
+      if (icp && ocp && icd != (iconv_t)-1) {
+       if (iconv(icd, &icp, &inlen, &ocp, &outlen) != -1) {
+         utf8_size -= outlen;
+         iconv_close(icd);
+         return utf8_size;
+       }
+      }
+      if (icd != (iconv_t)-1)
+       iconv_close(icd);
+    }
+#endif
+
+    /* Fallback to 8-bit ASCII */
+    bin_encoding = SILC_STRING_ASCII;
+  }
+
+  for (i = 0; i < bin_len; i++) {
+    switch (bin_encoding) {
+    case SILC_STRING_ASCII:
+    case SILC_STRING_TELETEX:
+      charval = bin[i];
+      break;
+    case SILC_STRING_ASCII_ESC:
+      SILC_NOT_IMPLEMENTED("SILC_STRING_ASCII_ESC");
+      return 0;
+      break;
+    case SILC_STRING_BMP:
+      if (i + 1 >= bin_len)
+       return 0;
+      SILC_GET16_MSB(charval, bin + i);
+      i += 1;
+      break;
+    case SILC_STRING_BMP_LSB:
+      if (i + 1 >= bin_len)
+       return 0;
+      SILC_GET16_LSB(charval, bin + i);
+      i += 1;
+      break;
+    case SILC_STRING_UNIVERSAL:
+      if (i + 3 >= bin_len)
+       return 0;
+      SILC_GET32_MSB(charval, bin + i);
+      i += 3;
+      break;
+    case SILC_STRING_UNIVERSAL_LSB:
+      if (i + 3 >= bin_len)
+       return 0;
+      SILC_GET32_LSB(charval, bin + i);
+      i += 3;
+      break;
+    case SILC_STRING_PRINTABLE:
+    case SILC_STRING_VISIBLE:
+      if (!isprint(bin[i]))
+       return 0;
+      charval = bin[i];
+      break;
+    case SILC_STRING_NUMERICAL:
+      if (bin[i] != 0x20 && !isdigit(bin[i]))
+       return 0;
+      charval = bin[i];
+      break;
+    case SILC_STRING_LDAP_DN:
+      /* Remove any escaping */
+      if (bin[i] == '\\') {
+       unsigned char cv;
+       if (i + 1 >= bin_len)
+         return 0;
+
+       /* If escaped character is any of the following no processing is
+          needed, otherwise it is a hex value and we need to read it. */
+       cv = bin[++i];
+       if (cv != ',' && cv != '+' && cv != '"' && cv != '\\' && cv != '<' &&
+           cv != '>' && cv != ';' && cv != ' ' && cv != '#') {
+         unsigned int hexval;
+         if (i + 1 >= bin_len)
+           return 0;
+         if (sscanf(&bin[++i], "%02X", &hexval) != 1)
+           return 0;
+         cv = (unsigned char)hexval;
+       }
+
+       charval = cv;
+       break;
+      }
+      charval = bin[i];
+      break;
+    default:
+      return 0;
+      break;
+    }
+
+    if (charval < 0x80) {
+      if (utf8) {
+       if (enclen > utf8_size)
+         return 0;
+
+       utf8[enclen] = (unsigned char)charval;
+      }
+      enclen++;
+    } else if (charval < 0x800) {
+      if (utf8) {
+       if (enclen + 2 > utf8_size)
+         return 0;
+
+       utf8[enclen    ] = (unsigned char )(((charval >> 6)  & 0x1f) | 0xc0);
+       utf8[enclen + 1] = (unsigned char )((charval & 0x3f) | 0x80);
+      }
+      enclen += 2;
+    } else if (charval < 0x10000) {
+      if (utf8) {
+       if (enclen + 3 > utf8_size)
+         return 0;
+
+       utf8[enclen    ] = (unsigned char )(((charval >> 12) & 0xf)  | 0xe0);
+       utf8[enclen + 1] = (unsigned char )(((charval >> 6)  & 0x3f) | 0x80);
+       utf8[enclen + 2] = (unsigned char )((charval & 0x3f) | 0x80);
+      }
+      enclen += 3;
+    } else if (charval < 0x200000) {
+      if (utf8) {
+       if (enclen + 4 > utf8_size)
+         return 0;
+
+       utf8[enclen    ] = (unsigned char )(((charval >> 18) & 0x7)  | 0xf0);
+       utf8[enclen + 1] = (unsigned char )(((charval >> 12) & 0x3f) | 0x80);
+       utf8[enclen + 2] = (unsigned char )(((charval >> 6)  & 0x3f) | 0x80);
+       utf8[enclen + 3] = (unsigned char )((charval & 0x3f) | 0x80);
+      }
+      enclen += 4;
+    } else if (charval < 0x4000000) {
+      if (utf8) {
+       if (enclen + 5 > utf8_size)
+         return 0;
+
+       utf8[enclen    ] = (unsigned char )(((charval >> 24) & 0x3)  | 0xf8);
+       utf8[enclen + 1] = (unsigned char )(((charval >> 18) & 0x3f) | 0x80);
+       utf8[enclen + 2] = (unsigned char )(((charval >> 12) & 0x3f) | 0x80);
+       utf8[enclen + 3] = (unsigned char )(((charval >> 6)  & 0x3f) | 0x80);
+       utf8[enclen + 4] = (unsigned char )((charval & 0x3f) | 0x80);
+      }
+      enclen += 5;
+    } else {
+      if (utf8) {
+       if (enclen + 6 > utf8_size)
+         return 0;
+
+       utf8[enclen    ] = (unsigned char )(((charval >> 30) & 0x1)  | 0xfc);
+       utf8[enclen + 1] = (unsigned char )(((charval >> 24) & 0x3f) | 0x80);
+       utf8[enclen + 2] = (unsigned char )(((charval >> 18) & 0x3f) | 0x80);
+       utf8[enclen + 3] = (unsigned char )(((charval >> 12) & 0x3f) | 0x80);
+       utf8[enclen + 4] = (unsigned char )(((charval >> 6)  & 0x3f) | 0x80);
+       utf8[enclen + 5] = (unsigned char )((charval & 0x3f) | 0x80);
+      }
+      enclen += 6;
+    }
+  }
+
+  return enclen;
+}
+
+/* Decodes UTF-8 encoded string `utf8' to string of which encoding is
+   to be `bin_encoding', into the `bin' buffer of size of `bin_size'.
+   Returns the length of the decoded buffer, or zero (0) on error.
+   By default `bin_encoding' is ASCII, and the caller needs to know to
+   which encoding the output string is to be encoded if ASCII is not
+   desired. */
+
+SilcUInt32 silc_utf8_decode(const unsigned char *utf8, SilcUInt32 utf8_len,
+                           SilcStringEncoding bin_encoding,
+                           unsigned char *bin, SilcUInt32 bin_size)
+{
+  SilcUInt32 enclen = 0, i, charval;
+
+  if (!utf8 || !utf8_len)
+    return 0;
+
+  if (bin_encoding == SILC_STRING_UTF8) {
+    if (!silc_utf8_valid(utf8, utf8_len) ||
+       utf8_len > bin_size)
+      return 0;
+    memcpy(bin, utf8, utf8_len);
+    return utf8_len;
+  }
+
+  if (bin_encoding == SILC_STRING_LOCALE) {
+#if defined(HAVE_ICONV) && defined(HAVE_NL_LANGINFO) && defined(CODESET)
+    char *toconv, *icp, *ocp;
+    iconv_t icd;
+    size_t inlen, outlen;
+
+    setlocale(LC_CTYPE, "");
+    toconv = nl_langinfo(CODESET);
+    if (toconv && strlen(toconv)) {
+      icd = iconv_open(toconv, "UTF-8");
+      icp = (char *)utf8;
+      ocp = (char *)bin;
+      inlen = utf8_len;
+      outlen = bin_size;
+      if (icp && ocp && icd != (iconv_t)-1) {
+       if (iconv(icd, &icp, &inlen, &ocp, &outlen) != -1) {
+         bin_size -= outlen;
+         iconv_close(icd);
+         return bin_size;
+       }
+      }
+      if (icd != (iconv_t)-1)
+       iconv_close(icd);
+    }
+#endif
+
+    /* Fallback to 8-bit ASCII */
+    bin_encoding = SILC_STRING_ASCII;
+  }
+
+  for (i = 0; i < utf8_len; i++) {
+    if ((utf8[i] & 0x80) == 0x00) {
+      charval = utf8[i] & 0x7f;
+    } else if ((utf8[i] & 0xe0) == 0xc0) {
+      if (i + 1 >= utf8_len)
+       return 0;
+
+      if ((utf8[i + 1] & 0xc0) != 0x80)
+        return 0;
+
+      charval = (utf8[i++] & 0x1f) << 6;
+      charval |= utf8[i] & 0x3f;
+      if (charval < 0x80)
+        return 0;
+    } else if ((utf8[i] & 0xf0) == 0xe0) {
+      if (i + 2 >= utf8_len)
+       return 0;
+
+      if (((utf8[i + 1] & 0xc0) != 0x80) ||
+         ((utf8[i + 2] & 0xc0) != 0x80))
+        return 0;
+
+      /* Surrogates not allowed (D800-DFFF) */
+      if (utf8[i] == 0xed &&
+         utf8[i + 1] >= 0xa0 && utf8[i + 1] <= 0xbf &&
+         utf8[i + 2] >= 0x80 && utf8[i + 2] <= 0xbf)
+       return 0;
+
+      charval = (utf8[i++]  & 0xf)  << 12;
+      charval |= (utf8[i++] & 0x3f) << 6;
+      charval |= utf8[i] & 0x3f;
+      if (charval < 0x800)
+        return 0;
+    } else if ((utf8[i] & 0xf8) == 0xf0) {
+      if (i + 3 >= utf8_len)
+       return 0;
+
+      if (((utf8[i + 1] & 0xc0) != 0x80) ||
+         ((utf8[i + 2] & 0xc0) != 0x80) ||
+         ((utf8[i + 3] & 0xc0) != 0x80))
+        return 0;
+
+      charval = ((SilcUInt32)(utf8[i++] & 0x7)) << 18;
+      charval |= (utf8[i++] & 0x3f) << 12;
+      charval |= (utf8[i++] & 0x3f) << 6;
+      charval |= utf8[i] & 0x3f;
+      if (charval < 0x10000)
+        return 0;
+    } else if ((utf8[i] & 0xfc) == 0xf8) {
+      if (i + 4 >= utf8_len)
+       return 0;
+
+      if (((utf8[i + 1] & 0xc0) != 0x80) ||
+         ((utf8[i + 2] & 0xc0) != 0x80) ||
+         ((utf8[i + 3] & 0xc0) != 0x80) ||
+         ((utf8[i + 4] & 0xc0) != 0x80))
+        return 0;
+
+      charval = ((SilcUInt32)(utf8[i++]  & 0x3))  << 24;
+      charval |= ((SilcUInt32)(utf8[i++] & 0x3f)) << 18;
+      charval |= ((SilcUInt32)(utf8[i++] & 0x3f)) << 12;
+      charval |= (utf8[i++] & 0x3f) << 6;
+      charval |= utf8[i] & 0x3f;
+      if (charval < 0x200000)
+        return 0;
+    } else if ((utf8[i] & 0xfe) == 0xfc) {
+      if (i + 5 >= utf8_len)
+       return 0;
+
+      if (((utf8[i + 1] & 0xc0) != 0x80) ||
+         ((utf8[i + 2] & 0xc0) != 0x80) ||
+         ((utf8[i + 3] & 0xc0) != 0x80) ||
+         ((utf8[i + 4] & 0xc0) != 0x80) ||
+         ((utf8[i + 5] & 0xc0) != 0x80))
+        return 0;
+
+      charval = ((SilcUInt32)(utf8[i++]  & 0x1))  << 30;
+      charval |= ((SilcUInt32)(utf8[i++] & 0x3f)) << 24;
+      charval |= ((SilcUInt32)(utf8[i++] & 0x3f)) << 18;
+      charval |= ((SilcUInt32)(utf8[i++] & 0x3f)) << 12;
+      charval |= (utf8[i++] & 0x3f) << 6;
+      charval |= utf8[i] & 0x3f;
+      if (charval < 0x4000000)
+        return 0;
+    } else {
+      return 0;
+    }
+
+    switch (bin_encoding) {
+    case SILC_STRING_ASCII:
+    case SILC_STRING_PRINTABLE:
+    case SILC_STRING_VISIBLE:
+    case SILC_STRING_TELETEX:
+    case SILC_STRING_NUMERICAL:
+      if (bin) {
+        if (enclen + 1 > bin_size)
+          return 0;
+
+        bin[enclen] = (unsigned char)charval;
+      }
+      enclen++;
+      break;
+    case SILC_STRING_ASCII_ESC:
+      SILC_NOT_IMPLEMENTED("SILC_STRING_ASCII_ESC");
+      return 0;
+      break;
+    case SILC_STRING_BMP:
+      if (bin) {
+        if (enclen + 2 > bin_size)
+          return 0;
+       SILC_PUT16_MSB(charval, bin + enclen);
+      }
+      enclen += 2;
+      break;
+    case SILC_STRING_BMP_LSB:
+      if (bin) {
+        if (enclen + 2 > bin_size)
+          return 0;
+       SILC_PUT16_LSB(charval, bin + enclen);
+      }
+      enclen += 2;
+      break;
+    case SILC_STRING_UNIVERSAL:
+      if (bin) {
+        if (enclen + 4 > bin_size)
+          return 0;
+       SILC_PUT32_MSB(charval, bin + enclen);
+      }
+      enclen += 4;
+      break;
+    case SILC_STRING_UNIVERSAL_LSB:
+      if (bin) {
+        if (enclen + 4 > bin_size)
+          return 0;
+       SILC_PUT32_LSB(charval, bin + enclen);
+      }
+      enclen += 4;
+      break;
+    case SILC_STRING_LDAP_DN:
+      {
+       /* XXX multibyte handling */
+       unsigned char cv = (unsigned char)charval;
+
+       /* If string starts with space or # escape it */
+       if (!enclen && (cv == '#' || cv == ' ')) {
+         if (bin) {
+           if (enclen + 2 > bin_size)
+             return 0;
+           bin[enclen] = '\\';
+           bin[enclen + 1] = cv;
+         }
+         enclen += 2;
+         break;
+       }
+
+       /* If string ends with space escape it */
+       if (i == utf8_len - 1 && cv == ' ') {
+         if (bin) {
+           if (enclen + 2 > bin_size)
+             return 0;
+           bin[enclen] = '\\';
+           bin[enclen + 1] = cv;
+         }
+         enclen += 2;
+         break;
+       }
+
+       /* If character is any of following then escape */
+       if (cv == ',' || cv == '+' || cv == '"' || cv == '\\' || cv == '<' ||
+           cv == '>' || cv == ';') {
+         if (bin) {
+           if (enclen + 2 > bin_size)
+             return 0;
+           bin[enclen] = '\\';
+           bin[enclen + 1] = cv;
+         }
+         enclen += 2;
+         break;
+       }
+
+       /* If character is not printable escape it with hex character */
+       if (!isprint((int)cv)) {
+         if (bin) {
+           if (enclen + 2 > bin_size)
+             return 0;
+           bin[enclen] = '\\';
+           snprintf(bin + enclen + 1, 3, "%02X", cv);
+         }
+         enclen += 2;
+         break;
+       }
+
+       if (bin) {
+         if (enclen + 1 > bin_size)
+           return 0;
+         bin[enclen] = cv;
+       }
+       enclen++;
+      }
+      break;
+    default:
+      return 0;
+      break;
+    }
+  }
+
+  return enclen;
+}
+
+/* Returns the length of UTF-8 encoded string if the `bin' of
+   encoding of `bin_encoding' is encoded with silc_utf8_encode. */
+
+SilcUInt32 silc_utf8_encoded_len(const unsigned char *bin, SilcUInt32 bin_len,
+                                SilcStringEncoding bin_encoding)
+{
+  return silc_utf8_encode(bin, bin_len, bin_encoding, NULL, 0);
+}
+
+/* Returns the length of decoded string if the `bin' of encoding of
+   `bin_encoding' is decoded with silc_utf8_decode. */
+
+SilcUInt32 silc_utf8_decoded_len(const unsigned char *bin, SilcUInt32 bin_len,
+                                SilcStringEncoding bin_encoding)
+{
+  return silc_utf8_decode(bin, bin_len, bin_encoding, NULL, 0);
+}
+
+/* Returns TRUE if the `utf8' string of length of `utf8_len' is valid
+   UTF-8 encoded string, FALSE if it is not UTF-8 encoded string. */
+
+bool silc_utf8_valid(const unsigned char *utf8, SilcUInt32 utf8_len)
+{
+  return silc_utf8_decode(utf8, utf8_len, 0, NULL, 0) != 0;
+}
diff --git a/lib/silcutil/silcutf8.h b/lib/silcutil/silcutf8.h
new file mode 100644 (file)
index 0000000..8373cab
--- /dev/null
@@ -0,0 +1,127 @@
+/*
+
+  silcutf8.h
+
+  Author: Pekka Riikonen <priikone@silcnet.org>
+
+  Copyright (C) 2004, 2005 Pekka Riikonen
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+*/
+
+/****h* silcutil/SILC UTF-8 Interface
+ *
+ * DESCRIPTION
+ *
+ * Interface for the UTF-8 Unicode encoding form.  These routines provides
+ * applications full UTF-8 and Unicode support.  It supports UTF-8 encoding
+ * to and decoding from myriad of other character encodings.
+ *
+ ***/
+
+#ifndef SILCUTF8_H
+#define SILCUTF8_H
+
+/****f* silcutil/SilcUTF8API/silc_utf8_encode
+ *
+ * SYNOPSIS
+ *
+ *    SilcUInt32 silc_utf8_encode(const unsigned char *bin, SilcUInt32 bin_len,
+ *                                SilcStringEncoding bin_encoding,
+ *                                unsigned char *utf8, SilcUInt32 utf8_size);
+ *
+ * DESCRIPTION
+ *
+ *    Encodes the string `bin' of which encoding is `bin_encoding' to the
+ *    UTF-8 encoding into the buffer `utf8' which is of size of `utf8_size'.
+ *    Returns the length of the UTF-8 encoded string, or zero (0) on error.
+ *    By default `bin_encoding' is ASCII, and the caller needs to know the
+ *    encoding of the input string if it is anything else.
+ *
+ ***/
+SilcUInt32 silc_utf8_encode(const unsigned char *bin, SilcUInt32 bin_len,
+                           SilcStringEncoding bin_encoding,
+                           unsigned char *utf8, SilcUInt32 utf8_size);
+
+/****f* silcutil/SilcStrUtilAPI/silc_utf8_decode
+ *
+ * SYNOPSIS
+ *
+ *    SilcUInt32 silc_utf8_decode(const unsigned char *utf8,
+ *                                SilcUInt32 utf8_len,
+ *                                SilcStringEncoding bin_encoding,
+ *                                unsigned char *bin, SilcUInt32 bin_size);
+ *
+ * DESCRIPTION
+ *
+ *    Decodes UTF-8 encoded string `utf8' to string of which encoding is
+ *    to be `bin_encoding', into the `bin' buffer of size of `bin_size'.
+ *    Returns the length of the decoded buffer, or zero (0) on error.
+ *    By default `bin_encoding' is ASCII, and the caller needs to know to
+ *    which encoding the output string is to be encoded if ASCII is not
+ *    desired.
+ *
+ ***/
+SilcUInt32 silc_utf8_decode(const unsigned char *utf8, SilcUInt32 utf8_len,
+                           SilcStringEncoding bin_encoding,
+                           unsigned char *bin, SilcUInt32 bin_size);
+
+/****f* silcutil/SilcStrUtilAPI/silc_utf8_encoded_len
+ *
+ * SYNOPSIS
+ *
+ *    SilcUInt32 silc_utf8_encoded_len(const unsigned char *bin,
+ *                                     SilcUInt32 bin_len,
+ *                                     SilcStringEncoding bin_encoding);
+ *
+ * DESCRIPTION
+ *
+ *    Returns the length of UTF-8 encoded string if the `bin' of
+ *    encoding of `bin_encoding' is encoded with silc_utf8_encode.
+ *    Returns zero (0) on error.
+ *
+ ***/
+SilcUInt32 silc_utf8_encoded_len(const unsigned char *bin, SilcUInt32 bin_len,
+                                SilcStringEncoding bin_encoding);
+
+/****f* silcutil/SilcStrUtilAPI/silc_utf8_decoded_len
+ *
+ * SYNOPSIS
+ *
+ *    SilcUInt32 silc_utf8_decoded_len(const unsigned char *bin,
+ *                                     SilcUInt32 bin_len,
+ *                                     SilcStringEncoding bin_encoding);
+ *
+ * DESCRIPTION
+ *
+ *    Returns the length of decoded string if the `bin' of encoding of
+ *    `bin_encoding' is decoded with silc_utf8_decode.  Returns zero (0)
+ *    on error.
+ *
+ ***/
+SilcUInt32 silc_utf8_decoded_len(const unsigned char *bin, SilcUInt32 bin_len,
+                                SilcStringEncoding bin_encoding);
+
+/****f* silcutil/SilcStrUtilAPI/silc_utf8_valid
+ *
+ * SYNOPSIS
+ *
+ *    bool silc_utf8_valid(const unsigned char *utf8, SilcUInt32 utf8_len);
+ *
+ * DESCRIPTION
+ *
+ *    Returns TRUE if the `utf8' string of length of `utf8_len' is valid
+ *    UTF-8 encoded string, FALSE if it is not UTF-8 encoded string.
+ *
+ ***/
+bool silc_utf8_valid(const unsigned char *utf8, SilcUInt32 utf8_len);
+
+#endif /* SILCUTF8_H */