]> git.meshlink.io Git - catta/commitdiff
add utf8 validity check API (based on the glib's implementation and hence mostly...
authorLennart Poettering <lennart@poettering.net>
Mon, 24 Apr 2006 21:51:00 +0000 (21:51 +0000)
committerLennart Poettering <lennart@poettering.net>
Mon, 24 Apr 2006 21:51:00 +0000 (21:51 +0000)
git-svn-id: file:///home/lennart/svn/public/avahi/trunk@1201 941a03a8-eaeb-0310-b9a0-b1bbd8fe43fe

avahi-common/Makefile.am
avahi-common/utf8-test.c [new file with mode: 0644]
avahi-common/utf8.c [new file with mode: 0644]
avahi-common/utf8.h [new file with mode: 0644]

index c7869544713a9b467b94c5a7f9b59f60272d6b9e..cd21c11d78b8db83ea8cfe4d42090338f5b3b1de 100644 (file)
@@ -48,7 +48,8 @@ noinst_PROGRAMS = \
        alternative-test \
        timeval-test \
        watch-test \
-       watch-test-thread
+       watch-test-thread \
+       utf8-test
 endif
 
 lib_LTLIBRARIES = \
@@ -65,7 +66,8 @@ libavahi_common_la_SOURCES = \
        simple-watch.c simple-watch.h \
        thread-watch.c thread-watch.h \
        watch.h gccmacro.h \
-       rlist.h rlist.c
+       rlist.h rlist.c \
+       utf8.c utf8.h
 
 libavahi_common_la_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) 
 libavahi_common_la_LIBADD = $(AM_LDADD) $(PTHREAD_CFLAGS) $(PTHREAD_LIBS)
@@ -82,14 +84,16 @@ alternative_test_SOURCES = \
        malloc.c malloc.h \
        domain.c domain.h \
        address.c address.h \
-       alternative-test.c
+       alternative-test.c \
+       utf8.c utf8.h
 alternative_test_CFLAGS = $(AM_CFLAGS)
 
 domain_test_SOURCES = \
        domain.c domain.h \
        malloc.c malloc.h \
        address.c address.h \
-       domain-test.c
+       domain-test.c \
+       utf8.c utf8.h
 domain_test_CFLAGS = $(AM_CFLAGS)
 
 watch_test_SOURCES = \
@@ -111,6 +115,12 @@ timeval_test_SOURCES = \
 timeval_test_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS)
 timeval_test_LDADD = $(AM_LDADD) $(PTHREAD_LIBS) $(PTHREAD_CFLAGS)
 
+utf8_test_SOURCES = \
+       utf8-test.c \
+       utf8.c utf8.h
+utf8_test_CFLAGS = $(AM_CFLAGS)
+utf8_test_LDADD = $(AM_LDADD)
+
 if HAVE_DBUS
 
 noinst_HEADERS = \
diff --git a/avahi-common/utf8-test.c b/avahi-common/utf8-test.c
new file mode 100644 (file)
index 0000000..ce7a984
--- /dev/null
@@ -0,0 +1,37 @@
+/* $Id$ */
+
+/***
+  This file is part of avahi.
+  avahi is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as
+  published by the Free Software Foundation; either version 2.1 of the
+  License, or (at your option) any later version.
+  avahi is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General
+  Public License for more details.
+  You should have received a copy of the GNU Lesser General Public
+  License along with avahi; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <assert.h>
+
+#include <avahi-common/gccmacro.h>
+
+#include "utf8.h"
+
+int main(AVAHI_GCC_UNUSED int argc, AVAHI_GCC_UNUSED char *argv[]) {
+
+    assert(avahi_utf8_valid("hallo"));
+    assert(!avahi_utf8_valid("üxknürz"));
+    assert(avahi_utf8_valid("üxknürz"));
+}
diff --git a/avahi-common/utf8.c b/avahi-common/utf8.c
new file mode 100644 (file)
index 0000000..f52ce55
--- /dev/null
@@ -0,0 +1,115 @@
+/* $Id */
+
+/* This file is based on the GLIB utf8 validation functions. The
+ * original license text follows. */
+
+/* gutf8.c - Operations on UTF-8 strings.
+ *
+ * Copyright (C) 1999 Tom Tromey
+ * Copyright (C) 2000 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.         See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdlib.h>
+
+#include "utf8.h"
+
+#define UNICODE_VALID(Char)                   \
+    ((Char) < 0x110000 &&                     \
+     (((Char) & 0xFFFFF800) != 0xD800) &&     \
+     ((Char) < 0xFDD0 || (Char) > 0xFDEF) &&  \
+     ((Char) & 0xFFFE) != 0xFFFE)
+   
+     
+#define CONTINUATION_CHAR                           \
+ do {                                     \
+  if ((*(const unsigned char *)p & 0xc0) != 0x80) /* 10xxxxxx */ \
+    goto error;                                     \
+  val <<= 6;                                        \
+  val |= (*(const unsigned char *)p) & 0x3f;                     \
+ } while(0)
+
+
+const char *
+avahi_utf8_valid (const char *str)
+
+{
+  unsigned val = 0;
+  unsigned min = 0;
+  const char *p;
+
+  for (p = str; *p; p++)
+    {
+      if (*(const unsigned char *)p < 128)
+       /* done */;
+      else 
+       {
+         const char *last;
+         
+         last = p;
+         if ((*(const unsigned char *)p & 0xe0) == 0xc0) /* 110xxxxx */
+           {
+             if ( ((*(const unsigned char *)p & 0x1e) == 0))
+               goto error;
+             p++;
+             if ( ((*(const unsigned char *)p & 0xc0) != 0x80)) /* 10xxxxxx */
+               goto error;
+           }
+         else
+           {
+             if ((*(const unsigned char *)p & 0xf0) == 0xe0) /* 1110xxxx */
+               {
+                 min = (1 << 11);
+                 val = *(const unsigned char *)p & 0x0f;
+                 goto TWO_REMAINING;
+               }
+             else if ((*(const unsigned char *)p & 0xf8) == 0xf0) /* 11110xxx */
+               {
+                 min = (1 << 16);
+                 val = *(const unsigned char *)p & 0x07;
+               }
+             else
+               goto error;
+             
+             p++;
+             CONTINUATION_CHAR;
+           TWO_REMAINING:
+             p++;
+             CONTINUATION_CHAR;
+             p++;
+             CONTINUATION_CHAR;
+             
+             if ( (val < min))
+               goto error;
+
+             if ( (!UNICODE_VALID(val)))
+               goto error;
+           } 
+         
+         continue;
+         
+       error:
+         return NULL;
+       }
+    }
+
+  return str;
+}
diff --git a/avahi-common/utf8.h b/avahi-common/utf8.h
new file mode 100644 (file)
index 0000000..93c9b59
--- /dev/null
@@ -0,0 +1,35 @@
+#ifndef fooutf8hfoo
+#define fooutf8hfoo
+
+/* $Id$ */
+
+/***
+  This file is part of avahi.
+  avahi is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as
+  published by the Free Software Foundation; either version 2.1 of the
+  License, or (at your option) any later version.
+  avahi is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General
+  Public License for more details.
+  You should have received a copy of the GNU Lesser General Public
+  License along with avahi; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#include <inttypes.h>
+
+#include <avahi-common/cdecl.h>
+
+AVAHI_C_DECL_BEGIN
+
+const char *avahi_utf8_valid(const char *str);
+
+AVAHI_C_DECL_END
+
+#endif