diff options
Diffstat (limited to '3rdParty/LibIDN/src/nfkc.c')
-rw-r--r-- | 3rdParty/LibIDN/src/nfkc.c | 432 |
1 files changed, 244 insertions, 188 deletions
diff --git a/3rdParty/LibIDN/src/nfkc.c b/3rdParty/LibIDN/src/nfkc.c index 621f749..4992074 100644 --- a/3rdParty/LibIDN/src/nfkc.c +++ b/3rdParty/LibIDN/src/nfkc.c | |||
@@ -1,42 +1,43 @@ | |||
1 | /* nfkc.c --- Unicode normalization utilities. | 1 | /* nfkc.c --- Unicode normalization utilities. |
2 | * Copyright (C) 2002, 2003, 2004, 2006, 2007 Simon Josefsson | 2 | Copyright (C) 2002-2015 Simon Josefsson |
3 | * | 3 | |
4 | * This file is part of GNU Libidn. | 4 | This file is part of GNU Libidn. |
5 | * | 5 | |
6 | * GNU Libidn is free software; you can redistribute it and/or | 6 | GNU Libidn is free software: you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public | 7 | modify it under the terms of either: |
8 | * License as published by the Free Software Foundation; either | 8 | |
9 | * version 2.1 of the License, or (at your option) any later version. | 9 | * the GNU Lesser General Public License as published by the Free |
10 | * | 10 | Software Foundation; either version 3 of the License, or (at |
11 | * GNU Libidn is distributed in the hope that it will be useful, | 11 | your option) any later version. |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 12 | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 13 | or |
14 | * Lesser General Public License for more details. | 14 | |
15 | * | 15 | * the GNU General Public License as published by the Free |
16 | * You should have received a copy of the GNU Lesser General Public | 16 | Software Foundation; either version 2 of the License, or (at |
17 | * License along with GNU Libidn; if not, write to the Free Software | 17 | your option) any later version. |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA | 18 | |
19 | * | 19 | or both in parallel, as here. |
20 | */ | 20 | |
21 | GNU Libidn is distributed in the hope that it will be useful, | ||
22 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
23 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
24 | General Public License for more details. | ||
25 | |||
26 | You should have received copies of the GNU General Public License and | ||
27 | the GNU Lesser General Public License along with this program. If | ||
28 | not, see <http://www.gnu.org/licenses/>. */ | ||
21 | 29 | ||
22 | #ifdef HAVE_CONFIG_H | 30 | #ifdef HAVE_CONFIG_H |
23 | # include "config.h" | 31 | #include "config.h" |
24 | #endif | 32 | #endif |
25 | 33 | ||
26 | #include <stdlib.h> | 34 | #include <stdlib.h> |
27 | #include <string.h> | 35 | #include <string.h> |
28 | 36 | ||
29 | #include "stringprep.h" | 37 | #include "stringprep.h" |
30 | 38 | ||
31 | /* This file contains functions from GLIB, including gutf8.c and | ||
32 | * gunidecomp.c, all licensed under LGPL and copyright hold by: | ||
33 | * | ||
34 | * Copyright (C) 1999, 2000 Tom Tromey | ||
35 | * Copyright 2000 Red Hat, Inc. | ||
36 | */ | ||
37 | |||
38 | /* Hacks to make syncing with GLIB code easier. */ | 39 | /* Hacks to make syncing with GLIB code easier. */ |
39 | #define gboolean int | 40 | #define gboolean int |
40 | #define gchar char | 41 | #define gchar char |
41 | #define guchar unsigned char | 42 | #define guchar unsigned char |
42 | #define glong long | 43 | #define glong long |
@@ -48,33 +49,71 @@ | |||
48 | #define gunichar uint32_t | 49 | #define gunichar uint32_t |
49 | #define gsize size_t | 50 | #define gsize size_t |
50 | #define gssize ssize_t | 51 | #define gssize ssize_t |
51 | #define g_malloc malloc | 52 | #define g_malloc malloc |
52 | #define g_free free | 53 | #define g_free free |
53 | #define GError void | 54 | #define g_return_val_if_fail(expr,val) { \ |
54 | #define g_set_error(a,b,c,d) ((void) 0) | 55 | if (!(expr)) \ |
55 | #define g_new(struct_type, n_structs) \ | 56 | return (val); \ |
56 | ((struct_type *) g_malloc (((gsize) sizeof (struct_type)) * ((gsize) (n_structs)))) | 57 | } |
57 | # if defined (__GNUC__) && !defined (__STRICT_ANSI__) && !defined (__cplusplus) | 58 | |
58 | # define G_STMT_START (void)( | 59 | /* Code from GLIB gmacros.h starts here. */ |
59 | # define G_STMT_END ) | 60 | |
60 | # else | 61 | /* GLIB - Library of useful routines for C programming |
61 | # if (defined (sun) || defined (__sun__)) | 62 | * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald |
62 | # define G_STMT_START if (1) | 63 | * |
63 | # define G_STMT_END else (void)0 | 64 | * This library is free software; you can redistribute it and/or |
64 | # else | 65 | * modify it under the terms of the GNU Lesser General Public |
65 | # define G_STMT_START do | 66 | * License as published by the Free Software Foundation; either |
66 | # define G_STMT_END while (0) | 67 | * version 2 of the License, or (at your option) any later version. |
67 | # endif | 68 | * |
68 | # endif | 69 | * This library is distributed in the hope that it will be useful, |
69 | #define g_return_val_if_fail(expr,val) G_STMT_START{ (void)0; }G_STMT_END | 70 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
71 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
72 | * Lesser General Public License for more details. | ||
73 | * | ||
74 | * You should have received a copy of the GNU Lesser General Public | ||
75 | * License along with this library; if not, write to the | ||
76 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
77 | * Boston, MA 02111-1307, USA. | ||
78 | */ | ||
79 | |||
80 | #ifndef FALSE | ||
81 | #define FALSE (0) | ||
82 | #endif | ||
83 | |||
84 | #ifndef TRUE | ||
85 | #define TRUE (!FALSE) | ||
86 | #endif | ||
87 | |||
70 | #define G_N_ELEMENTS(arr) (sizeof (arr) / sizeof ((arr)[0])) | 88 | #define G_N_ELEMENTS(arr) (sizeof (arr) / sizeof ((arr)[0])) |
71 | #define TRUE 1 | 89 | |
72 | #define FALSE 0 | 90 | #define G_UNLIKELY(expr) (expr) |
73 | 91 | ||
74 | /* Code from GLIB gunicode.h starts here. */ | 92 | /* Code from GLIB gunicode.h starts here. */ |
75 | 93 | ||
94 | /* gunicode.h - Unicode manipulation functions | ||
95 | * | ||
96 | * Copyright (C) 1999, 2000 Tom Tromey | ||
97 | * Copyright 2000, 2005 Red Hat, Inc. | ||
98 | * | ||
99 | * The Gnome Library is free software; you can redistribute it and/or | ||
100 | * modify it under the terms of the GNU Lesser General Public License as | ||
101 | * published by the Free Software Foundation; either version 2 of the | ||
102 | * License, or (at your option) any later version. | ||
103 | * | ||
104 | * The Gnome Library is distributed in the hope that it will be useful, | ||
105 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
106 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
107 | * Lesser General Public License for more details. | ||
108 | * | ||
109 | * You should have received a copy of the GNU Lesser General Public | ||
110 | * License along with the Gnome Library; see the file COPYING.LIB. If not, | ||
111 | * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
112 | * Boston, MA 02111-1307, USA. | ||
113 | */ | ||
114 | |||
76 | typedef enum | 115 | typedef enum |
77 | { | 116 | { |
78 | G_NORMALIZE_DEFAULT, | 117 | G_NORMALIZE_DEFAULT, |
79 | G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT, | 118 | G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT, |
80 | G_NORMALIZE_DEFAULT_COMPOSE, | 119 | G_NORMALIZE_DEFAULT_COMPOSE, |
@@ -84,12 +123,35 @@ typedef enum | |||
84 | G_NORMALIZE_ALL_COMPOSE, | 123 | G_NORMALIZE_ALL_COMPOSE, |
85 | G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE | 124 | G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE |
86 | } | 125 | } |
87 | GNormalizeMode; | 126 | GNormalizeMode; |
88 | 127 | ||
128 | #define g_utf8_next_char(p) ((p) + g_utf8_skip[*(const guchar *)(p)]) | ||
129 | |||
89 | /* Code from GLIB gutf8.c starts here. */ | 130 | /* Code from GLIB gutf8.c starts here. */ |
90 | 131 | ||
132 | /* gutf8.c - Operations on UTF-8 strings. | ||
133 | * | ||
134 | * Copyright (C) 1999 Tom Tromey | ||
135 | * Copyright (C) 2000 Red Hat, Inc. | ||
136 | * | ||
137 | * This library is free software; you can redistribute it and/or | ||
138 | * modify it under the terms of the GNU Lesser General Public | ||
139 | * License as published by the Free Software Foundation; either | ||
140 | * version 2 of the License, or (at your option) any later version. | ||
141 | * | ||
142 | * This library is distributed in the hope that it will be useful, | ||
143 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
144 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
145 | * Lesser General Public License for more details. | ||
146 | * | ||
147 | * You should have received a copy of the GNU Lesser General Public | ||
148 | * License along with this library; if not, write to the | ||
149 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
150 | * Boston, MA 02111-1307, USA. | ||
151 | */ | ||
152 | |||
91 | #define UTF8_COMPUTE(Char, Mask, Len) \ | 153 | #define UTF8_COMPUTE(Char, Mask, Len) \ |
92 | if (Char < 128) \ | 154 | if (Char < 128) \ |
93 | { \ | 155 | { \ |
94 | Len = 1; \ | 156 | Len = 1; \ |
95 | Mask = 0x7f; \ | 157 | Mask = 0x7f; \ |
@@ -127,31 +189,23 @@ GNormalizeMode; | |||
127 | ((Char) < 0x800 ? 2 : \ | 189 | ((Char) < 0x800 ? 2 : \ |
128 | ((Char) < 0x10000 ? 3 : \ | 190 | ((Char) < 0x10000 ? 3 : \ |
129 | ((Char) < 0x200000 ? 4 : \ | 191 | ((Char) < 0x200000 ? 4 : \ |
130 | ((Char) < 0x4000000 ? 5 : 6))))) | 192 | ((Char) < 0x4000000 ? 5 : 6))))) |
131 | 193 | ||
132 | 194 | #define UTF8_GET(Result, Chars, Count, Mask, Len) \ | |
133 | #define UTF8_GET(Result, Chars, Count, Mask, Len) \ | 195 | (Result) = (Chars)[0] & (Mask); \ |
134 | (Result) = (Chars)[0] & (Mask); \ | 196 | for ((Count) = 1; (Count) < (Len); ++(Count)) \ |
135 | for ((Count) = 1; (Count) < (Len); ++(Count)) \ | 197 | { \ |
136 | { \ | 198 | if (((Chars)[(Count)] & 0xc0) != 0x80) \ |
137 | if (((Chars)[(Count)] & 0xc0) != 0x80) \ | 199 | { \ |
138 | { \ | 200 | (Result) = -1; \ |
139 | (Result) = -1; \ | 201 | break; \ |
140 | break; \ | 202 | } \ |
141 | } \ | 203 | (Result) <<= 6; \ |
142 | (Result) <<= 6; \ | 204 | (Result) |= ((Chars)[(Count)] & 0x3f); \ |
143 | (Result) |= ((Chars)[(Count)] & 0x3f); \ | ||
144 | } | 205 | } |
145 | 206 | ||
146 | #define UNICODE_VALID(Char) \ | ||
147 | ((Char) < 0x110000 && \ | ||
148 | (((Char) & 0xFFFFF800) != 0xD800) && \ | ||
149 | ((Char) < 0xFDD0 || (Char) > 0xFDEF) && \ | ||
150 | ((Char) & 0xFFFE) != 0xFFFE) | ||
151 | |||
152 | |||
153 | static const gchar utf8_skip_data[256] = { | 207 | static const gchar utf8_skip_data[256] = { |
154 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 208 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
155 | 1, 1, 1, 1, 1, 1, 1, | 209 | 1, 1, 1, 1, 1, 1, 1, |
156 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 210 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
157 | 1, 1, 1, 1, 1, 1, 1, | 211 | 1, 1, 1, 1, 1, 1, 1, |
@@ -169,21 +223,20 @@ static const gchar utf8_skip_data[256] = { | |||
169 | 5, 5, 5, 6, 6, 1, 1 | 223 | 5, 5, 5, 6, 6, 1, 1 |
170 | }; | 224 | }; |
171 | 225 | ||
172 | static const gchar *const g_utf8_skip = utf8_skip_data; | 226 | static const gchar *const g_utf8_skip = utf8_skip_data; |
173 | 227 | ||
174 | #define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(guchar *)(p)]) | ||
175 | |||
176 | /* | 228 | /* |
177 | * g_utf8_strlen: | 229 | * g_utf8_strlen: |
178 | * @p: pointer to the start of a UTF-8 encoded string. | 230 | * @p: pointer to the start of a UTF-8 encoded string |
179 | * @max: the maximum number of bytes to examine. If @max | 231 | * @max: the maximum number of bytes to examine. If @max |
180 | * is less than 0, then the string is assumed to be | 232 | * is less than 0, then the string is assumed to be |
181 | * nul-terminated. If @max is 0, @p will not be examined and | 233 | * nul-terminated. If @max is 0, @p will not be examined and |
182 | * may be %NULL. | 234 | * may be %NULL. |
183 | * | 235 | * |
184 | * Returns the length of the string in characters. | 236 | * Computes the length of the string in characters, not including |
237 | * the terminating nul character. | ||
185 | * | 238 | * |
186 | * Return value: the length of the string in characters | 239 | * Return value: the length of the string in characters |
187 | **/ | 240 | **/ |
188 | static glong | 241 | static glong |
189 | g_utf8_strlen (const gchar * p, gssize max) | 242 | g_utf8_strlen (const gchar * p, gssize max) |
@@ -214,11 +267,11 @@ g_utf8_strlen (const gchar * p, gssize max) | |||
214 | } | 267 | } |
215 | 268 | ||
216 | /* only do the last len increment if we got a complete | 269 | /* only do the last len increment if we got a complete |
217 | * char (don't count partial chars) | 270 | * char (don't count partial chars) |
218 | */ | 271 | */ |
219 | if (p - start == max) | 272 | if (p - start <= max) |
220 | ++len; | 273 | ++len; |
221 | } | 274 | } |
222 | 275 | ||
223 | return len; | 276 | return len; |
224 | } | 277 | } |
@@ -250,11 +303,11 @@ g_utf8_get_char (const gchar * p) | |||
250 | return result; | 303 | return result; |
251 | } | 304 | } |
252 | 305 | ||
253 | /* | 306 | /* |
254 | * g_unichar_to_utf8: | 307 | * g_unichar_to_utf8: |
255 | * @c: a ISO10646 character code | 308 | * @c: a Unicode character code |
256 | * @outbuf: output buffer, must have at least 6 bytes of space. | 309 | * @outbuf: output buffer, must have at least 6 bytes of space. |
257 | * If %NULL, the length will be computed and returned | 310 | * If %NULL, the length will be computed and returned |
258 | * and nothing will be written to @outbuf. | 311 | * and nothing will be written to @outbuf. |
259 | * | 312 | * |
260 | * Converts a single character to UTF-8. | 313 | * Converts a single character to UTF-8. |
@@ -262,10 +315,11 @@ g_utf8_get_char (const gchar * p) | |||
262 | * Return value: number of bytes written | 315 | * Return value: number of bytes written |
263 | **/ | 316 | **/ |
264 | static int | 317 | static int |
265 | g_unichar_to_utf8 (gunichar c, gchar * outbuf) | 318 | g_unichar_to_utf8 (gunichar c, gchar * outbuf) |
266 | { | 319 | { |
320 | /* If this gets modified, also update the copy in g_string_insert_unichar() */ | ||
267 | guint len = 0; | 321 | guint len = 0; |
268 | int first; | 322 | int first; |
269 | int i; | 323 | int i; |
270 | 324 | ||
271 | if (c < 0x80) | 325 | if (c < 0x80) |
@@ -313,29 +367,29 @@ g_unichar_to_utf8 (gunichar c, gchar * outbuf) | |||
313 | } | 367 | } |
314 | 368 | ||
315 | /* | 369 | /* |
316 | * g_utf8_to_ucs4_fast: | 370 | * g_utf8_to_ucs4_fast: |
317 | * @str: a UTF-8 encoded string | 371 | * @str: a UTF-8 encoded string |
318 | * @len: the maximum length of @str to use. If @len < 0, then | 372 | * @len: the maximum length of @str to use, in bytes. If @len < 0, |
319 | * the string is nul-terminated. | 373 | * then the string is nul-terminated. |
320 | * @items_written: location to store the number of characters in the | 374 | * @items_written: location to store the number of characters in the |
321 | * result, or %NULL. | 375 | * result, or %NULL. |
322 | * | 376 | * |
323 | * Convert a string from UTF-8 to a 32-bit fixed width | 377 | * Convert a string from UTF-8 to a 32-bit fixed width |
324 | * representation as UCS-4, assuming valid UTF-8 input. | 378 | * representation as UCS-4, assuming valid UTF-8 input. |
325 | * This function is roughly twice as fast as g_utf8_to_ucs4() | 379 | * This function is roughly twice as fast as g_utf8_to_ucs4() |
326 | * but does no error checking on the input. | 380 | * but does no error checking on the input. A trailing 0 character |
381 | * will be added to the string after the converted text. | ||
327 | * | 382 | * |
328 | * Return value: a pointer to a newly allocated UCS-4 string. | 383 | * Return value: a pointer to a newly allocated UCS-4 string. |
329 | * This value must be freed with g_free(). | 384 | * This value must be freed with g_free(). |
330 | **/ | 385 | **/ |
331 | static gunichar * | 386 | static gunichar * |
332 | g_utf8_to_ucs4_fast (const gchar * str, glong len, glong * items_written) | 387 | g_utf8_to_ucs4_fast (const gchar * str, glong len, glong * items_written) |
333 | { | 388 | { |
334 | gint j, charlen; | ||
335 | gunichar *result; | 389 | gunichar *result; |
336 | gint n_chars, i; | 390 | gsize n_chars, i; |
337 | const gchar *p; | 391 | const gchar *p; |
338 | 392 | ||
339 | g_return_val_if_fail (str != NULL, NULL); | 393 | g_return_val_if_fail (str != NULL, NULL); |
340 | 394 | ||
341 | p = str; | 395 | p = str; |
@@ -355,60 +409,48 @@ g_utf8_to_ucs4_fast (const gchar * str, glong len, glong * items_written) | |||
355 | p = g_utf8_next_char (p); | 409 | p = g_utf8_next_char (p); |
356 | ++n_chars; | 410 | ++n_chars; |
357 | } | 411 | } |
358 | } | 412 | } |
359 | 413 | ||
360 | result = g_new (gunichar, n_chars + 1); | 414 | result = g_malloc (sizeof (gunichar) * (n_chars + 1)); |
361 | if (!result) | 415 | if (!result) |
362 | return NULL; | 416 | return NULL; |
363 | 417 | ||
364 | p = str; | 418 | p = str; |
365 | for (i = 0; i < n_chars; i++) | 419 | for (i = 0; i < n_chars; i++) |
366 | { | 420 | { |
367 | gunichar wc = ((unsigned char *) p)[0]; | 421 | gunichar wc = (guchar) * p++; |
368 | 422 | ||
369 | if (wc < 0x80) | 423 | if (wc < 0x80) |
370 | { | 424 | { |
371 | result[i] = wc; | 425 | result[i] = wc; |
372 | p++; | ||
373 | } | 426 | } |
374 | else | 427 | else |
375 | { | 428 | { |
376 | if (wc < 0xe0) | 429 | gunichar mask = 0x40; |
377 | { | 430 | |
378 | charlen = 2; | 431 | if (G_UNLIKELY ((wc & mask) == 0)) |
379 | wc &= 0x1f; | ||
380 | } | ||
381 | else if (wc < 0xf0) | ||
382 | { | ||
383 | charlen = 3; | ||
384 | wc &= 0x0f; | ||
385 | } | ||
386 | else if (wc < 0xf8) | ||
387 | { | ||
388 | charlen = 4; | ||
389 | wc &= 0x07; | ||
390 | } | ||
391 | else if (wc < 0xfc) | ||
392 | { | ||
393 | charlen = 5; | ||
394 | wc &= 0x03; | ||
395 | } | ||
396 | else | ||
397 | { | 432 | { |
398 | charlen = 6; | 433 | /* It's an out-of-sequence 10xxxxxxx byte. |
399 | wc &= 0x01; | 434 | * Rather than making an ugly hash of this and the next byte |
435 | * and overrunning the buffer, it's more useful to treat it | ||
436 | * with a replacement character */ | ||
437 | result[i] = 0xfffd; | ||
438 | continue; | ||
400 | } | 439 | } |
401 | 440 | ||
402 | for (j = 1; j < charlen; j++) | 441 | do |
403 | { | 442 | { |
404 | wc <<= 6; | 443 | wc <<= 6; |
405 | wc |= ((unsigned char *) p)[j] & 0x3f; | 444 | wc |= (guchar) (*p++) & 0x3f; |
445 | mask <<= 5; | ||
406 | } | 446 | } |
447 | while ((wc & mask) != 0); | ||
448 | |||
449 | wc &= mask - 1; | ||
407 | 450 | ||
408 | result[i] = wc; | 451 | result[i] = wc; |
409 | p += charlen; | ||
410 | } | 452 | } |
411 | } | 453 | } |
412 | result[i] = 0; | 454 | result[i] = 0; |
413 | 455 | ||
414 | if (items_written) | 456 | if (items_written) |
@@ -418,32 +460,34 @@ g_utf8_to_ucs4_fast (const gchar * str, glong len, glong * items_written) | |||
418 | } | 460 | } |
419 | 461 | ||
420 | /* | 462 | /* |
421 | * g_ucs4_to_utf8: | 463 | * g_ucs4_to_utf8: |
422 | * @str: a UCS-4 encoded string | 464 | * @str: a UCS-4 encoded string |
423 | * @len: the maximum length of @str to use. If @len < 0, then | 465 | * @len: the maximum length (number of characters) of @str to use. |
424 | * the string is terminated with a 0 character. | 466 | * If @len < 0, then the string is nul-terminated. |
425 | * @items_read: location to store number of characters read read, or %NULL. | 467 | * @items_read: location to store number of characters read, or %NULL. |
426 | * @items_written: location to store number of bytes written or %NULL. | 468 | * @items_written: location to store number of bytes written or %NULL. |
427 | * The value here stored does not include the trailing 0 | 469 | * The value here stored does not include the trailing 0 |
428 | * byte. | 470 | * byte. |
429 | * @error: location to store the error occuring, or %NULL to ignore | 471 | * @error: location to store the error occurring, or %NULL to ignore |
430 | * errors. Any of the errors in #GConvertError other than | 472 | * errors. Any of the errors in #GConvertError other than |
431 | * %G_CONVERT_ERROR_NO_CONVERSION may occur. | 473 | * %G_CONVERT_ERROR_NO_CONVERSION may occur. |
432 | * | 474 | * |
433 | * Convert a string from a 32-bit fixed width representation as UCS-4. | 475 | * Convert a string from a 32-bit fixed width representation as UCS-4. |
434 | * to UTF-8. The result will be terminated with a 0 byte. | 476 | * to UTF-8. The result will be terminated with a 0 byte. |
435 | * | 477 | * |
436 | * Return value: a pointer to a newly allocated UTF-8 string. | 478 | * Return value: a pointer to a newly allocated UTF-8 string. |
437 | * This value must be freed with g_free(). If an | 479 | * This value must be freed with g_free(). If an |
438 | * error occurs, %NULL will be returned and | 480 | * error occurs, %NULL will be returned and |
439 | * @error set. | 481 | * @error set. In that case, @items_read will be |
482 | * set to the position of the first invalid input | ||
483 | * character. | ||
440 | **/ | 484 | **/ |
441 | static gchar * | 485 | static gchar * |
442 | g_ucs4_to_utf8 (const gunichar * str, | 486 | g_ucs4_to_utf8 (const gunichar * str, |
443 | glong len, | 487 | glong len, |
444 | glong * items_read, glong * items_written, GError ** error) | 488 | glong * items_read, glong * items_written) |
445 | { | 489 | { |
446 | gint result_length; | 490 | gint result_length; |
447 | gchar *result = NULL; | 491 | gchar *result = NULL; |
448 | gchar *p; | 492 | gchar *p; |
449 | gint i; | 493 | gint i; |
@@ -453,19 +497,11 @@ g_ucs4_to_utf8 (const gunichar * str, | |||
453 | { | 497 | { |
454 | if (!str[i]) | 498 | if (!str[i]) |
455 | break; | 499 | break; |
456 | 500 | ||
457 | if (str[i] >= 0x80000000) | 501 | if (str[i] >= 0x80000000) |
458 | { | 502 | goto err_out; |
459 | if (items_read) | ||
460 | *items_read = i; | ||
461 | |||
462 | g_set_error (error, G_CONVERT_ERROR, | ||
463 | G_CONVERT_ERROR_ILLEGAL_SEQUENCE, | ||
464 | _("Character out of range for UTF-8")); | ||
465 | goto err_out; | ||
466 | } | ||
467 | 503 | ||
468 | result_length += UTF8_LENGTH (str[i]); | 504 | result_length += UTF8_LENGTH (str[i]); |
469 | } | 505 | } |
470 | 506 | ||
471 | result = g_malloc (result_length + 1); | 507 | result = g_malloc (result_length + 1); |
@@ -489,28 +525,49 @@ err_out: | |||
489 | return result; | 525 | return result; |
490 | } | 526 | } |
491 | 527 | ||
492 | /* Code from GLIB gunidecomp.c starts here. */ | 528 | /* Code from GLIB gunidecomp.c starts here. */ |
493 | 529 | ||
530 | /* decomp.c - Character decomposition. | ||
531 | * | ||
532 | * Copyright (C) 1999, 2000 Tom Tromey | ||
533 | * Copyright 2000 Red Hat, Inc. | ||
534 | * | ||
535 | * The Gnome Library is free software; you can redistribute it and/or | ||
536 | * modify it under the terms of the GNU Lesser General Public License as | ||
537 | * published by the Free Software Foundation; either version 2 of the | ||
538 | * License, or (at your option) any later version. | ||
539 | * | ||
540 | * The Gnome Library is distributed in the hope that it will be useful, | ||
541 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
542 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
543 | * Lesser General Public License for more details. | ||
544 | * | ||
545 | * You should have received a copy of the GNU Lesser General Public | ||
546 | * License along with the Gnome Library; see the file COPYING.LIB. If not, | ||
547 | * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
548 | * Boston, MA 02111-1307, USA. | ||
549 | */ | ||
550 | |||
494 | #include "gunidecomp.h" | 551 | #include "gunidecomp.h" |
495 | #include "gunicomp.h" | 552 | #include "gunicomp.h" |
496 | 553 | ||
497 | #define CC_PART1(Page, Char) \ | 554 | #define CC_PART1(Page, Char) \ |
498 | ((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \ | 555 | ((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \ |
499 | ? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \ | 556 | ? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \ |
500 | : (cclass_data[combining_class_table_part1[Page]][Char])) | 557 | : (cclass_data[combining_class_table_part1[Page]][Char])) |
501 | 558 | ||
502 | #define CC_PART2(Page, Char) \ | 559 | #define CC_PART2(Page, Char) \ |
503 | ((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \ | 560 | ((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \ |
504 | ? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \ | 561 | ? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \ |
505 | : (cclass_data[combining_class_table_part2[Page]][Char])) | 562 | : (cclass_data[combining_class_table_part2[Page]][Char])) |
506 | 563 | ||
507 | #define COMBINING_CLASS(Char) \ | 564 | #define COMBINING_CLASS(Char) \ |
508 | (((Char) <= G_UNICODE_LAST_CHAR_PART1) \ | 565 | (((Char) <= G_UNICODE_LAST_CHAR_PART1) \ |
509 | ? CC_PART1 ((Char) >> 8, (Char) & 0xff) \ | 566 | ? CC_PART1 ((Char) >> 8, (Char) & 0xff) \ |
510 | : (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \ | 567 | : (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \ |
511 | ? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \ | 568 | ? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \ |
512 | : 0)) | 569 | : 0)) |
513 | 570 | ||
514 | /* constants for hangul syllable [de]composition */ | 571 | /* constants for hangul syllable [de]composition */ |
515 | #define SBase 0xAC00 | 572 | #define SBase 0xAC00 |
516 | #define LBase 0x1100 | 573 | #define LBase 0x1100 |
@@ -575,39 +632,26 @@ g_unicode_canonical_ordering (gunichar * string, gsize len) | |||
575 | * characters will always be big enough. */ | 632 | * characters will always be big enough. */ |
576 | static void | 633 | static void |
577 | decompose_hangul (gunichar s, gunichar * r, gsize * result_len) | 634 | decompose_hangul (gunichar s, gunichar * r, gsize * result_len) |
578 | { | 635 | { |
579 | gint SIndex = s - SBase; | 636 | gint SIndex = s - SBase; |
637 | gint TIndex = SIndex % TCount; | ||
580 | 638 | ||
581 | /* not a hangul syllable */ | 639 | if (r) |
582 | if (SIndex < 0 || SIndex >= SCount) | ||
583 | { | 640 | { |
584 | if (r) | 641 | r[0] = LBase + SIndex / NCount; |
585 | r[0] = s; | 642 | r[1] = VBase + (SIndex % NCount) / TCount; |
586 | *result_len = 1; | ||
587 | } | 643 | } |
588 | else | ||
589 | { | ||
590 | gunichar L = LBase + SIndex / NCount; | ||
591 | gunichar V = VBase + (SIndex % NCount) / TCount; | ||
592 | gunichar T = TBase + SIndex % TCount; | ||
593 | 644 | ||
645 | if (TIndex) | ||
646 | { | ||
594 | if (r) | 647 | if (r) |
595 | { | 648 | r[2] = TBase + TIndex; |
596 | r[0] = L; | 649 | *result_len = 3; |
597 | r[1] = V; | ||
598 | } | ||
599 | |||
600 | if (T != TBase) | ||
601 | { | ||
602 | if (r) | ||
603 | r[2] = T; | ||
604 | *result_len = 3; | ||
605 | } | ||
606 | else | ||
607 | *result_len = 2; | ||
608 | } | 650 | } |
651 | else | ||
652 | *result_len = 2; | ||
609 | } | 653 | } |
610 | 654 | ||
611 | /* returns a pointer to a null-terminated UTF-8 string */ | 655 | /* returns a pointer to a null-terminated UTF-8 string */ |
612 | static const gchar * | 656 | static const gchar * |
613 | find_decomposition (gunichar ch, gboolean compat) | 657 | find_decomposition (gunichar ch, gboolean compat) |
@@ -665,26 +709,26 @@ combine_hangul (gunichar a, gunichar b, gunichar * result) | |||
665 | { | 709 | { |
666 | *result = SBase + (LIndex * VCount + VIndex) * TCount; | 710 | *result = SBase + (LIndex * VCount + VIndex) * TCount; |
667 | return TRUE; | 711 | return TRUE; |
668 | } | 712 | } |
669 | else if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0 | 713 | else if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0 |
670 | && 0 <= TIndex && TIndex <= TCount) | 714 | && 0 < TIndex && TIndex < TCount) |
671 | { | 715 | { |
672 | *result = a + TIndex; | 716 | *result = a + TIndex; |
673 | return TRUE; | 717 | return TRUE; |
674 | } | 718 | } |
675 | 719 | ||
676 | return FALSE; | 720 | return FALSE; |
677 | } | 721 | } |
678 | 722 | ||
679 | #define CI(Page, Char) \ | 723 | #define CI(Page, Char) \ |
680 | ((compose_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \ | 724 | ((compose_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \ |
681 | ? (compose_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \ | 725 | ? (compose_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \ |
682 | : (compose_data[compose_table[Page]][Char])) | 726 | : (compose_data[compose_table[Page]][Char])) |
683 | 727 | ||
684 | #define COMPOSE_INDEX(Char) \ | 728 | #define COMPOSE_INDEX(Char) \ |
685 | ((((Char) >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff)) | 729 | (((Char >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff)) |
686 | 730 | ||
687 | static gboolean | 731 | static gboolean |
688 | combine (gunichar a, gunichar b, gunichar * result) | 732 | combine (gunichar a, gunichar b, gunichar * result) |
689 | { | 733 | { |
690 | gushort index_a, index_b; | 734 | gushort index_a, index_b; |
@@ -754,11 +798,11 @@ _g_utf8_normalize_wc (const gchar * str, gssize max_len, GNormalizeMode mode) | |||
754 | while ((max_len < 0 || p < str + max_len) && *p) | 798 | while ((max_len < 0 || p < str + max_len) && *p) |
755 | { | 799 | { |
756 | const gchar *decomp; | 800 | const gchar *decomp; |
757 | gunichar wc = g_utf8_get_char (p); | 801 | gunichar wc = g_utf8_get_char (p); |
758 | 802 | ||
759 | if (wc >= 0xac00 && wc <= 0xd7af) | 803 | if (wc >= SBase && wc < SBase + SCount) |
760 | { | 804 | { |
761 | gsize result_len; | 805 | gsize result_len; |
762 | decompose_hangul (wc, NULL, &result_len); | 806 | decompose_hangul (wc, NULL, &result_len); |
763 | n_wc += result_len; | 807 | n_wc += result_len; |
764 | } | 808 | } |
@@ -773,11 +817,11 @@ _g_utf8_normalize_wc (const gchar * str, gssize max_len, GNormalizeMode mode) | |||
773 | } | 817 | } |
774 | 818 | ||
775 | p = g_utf8_next_char (p); | 819 | p = g_utf8_next_char (p); |
776 | } | 820 | } |
777 | 821 | ||
778 | wc_buffer = g_new (gunichar, n_wc + 1); | 822 | wc_buffer = g_malloc (sizeof (gunichar) * (n_wc + 1)); |
779 | if (!wc_buffer) | 823 | if (!wc_buffer) |
780 | return NULL; | 824 | return NULL; |
781 | 825 | ||
782 | last_start = 0; | 826 | last_start = 0; |
783 | n_wc = 0; | 827 | n_wc = 0; |
@@ -787,11 +831,11 @@ _g_utf8_normalize_wc (const gchar * str, gssize max_len, GNormalizeMode mode) | |||
787 | gunichar wc = g_utf8_get_char (p); | 831 | gunichar wc = g_utf8_get_char (p); |
788 | const gchar *decomp; | 832 | const gchar *decomp; |
789 | int cc; | 833 | int cc; |
790 | gsize old_n_wc = n_wc; | 834 | gsize old_n_wc = n_wc; |
791 | 835 | ||
792 | if (wc >= 0xac00 && wc <= 0xd7af) | 836 | if (wc >= SBase && wc < SBase + SCount) |
793 | { | 837 | { |
794 | gsize result_len; | 838 | gsize result_len; |
795 | decompose_hangul (wc, wc_buffer + n_wc, &result_len); | 839 | decompose_hangul (wc, wc_buffer + n_wc, &result_len); |
796 | n_wc += result_len; | 840 | n_wc += result_len; |
797 | } | 841 | } |
@@ -826,11 +870,11 @@ _g_utf8_normalize_wc (const gchar * str, gssize max_len, GNormalizeMode mode) | |||
826 | 870 | ||
827 | if (n_wc > 0) | 871 | if (n_wc > 0) |
828 | { | 872 | { |
829 | g_unicode_canonical_ordering (wc_buffer + last_start, | 873 | g_unicode_canonical_ordering (wc_buffer + last_start, |
830 | n_wc - last_start); | 874 | n_wc - last_start); |
831 | last_start = n_wc; | 875 | // dead assignment: last_start = n_wc; |
832 | } | 876 | } |
833 | 877 | ||
834 | wc_buffer[n_wc] = 0; | 878 | wc_buffer[n_wc] = 0; |
835 | 879 | ||
836 | /* All decomposed and reordered */ | 880 | /* All decomposed and reordered */ |
@@ -882,53 +926,53 @@ _g_utf8_normalize_wc (const gchar * str, gssize max_len, GNormalizeMode mode) | |||
882 | * @mode: the type of normalization to perform. | 926 | * @mode: the type of normalization to perform. |
883 | * | 927 | * |
884 | * Converts a string into canonical form, standardizing | 928 | * Converts a string into canonical form, standardizing |
885 | * such issues as whether a character with an accent | 929 | * such issues as whether a character with an accent |
886 | * is represented as a base character and combining | 930 | * is represented as a base character and combining |
887 | * accent or as a single precomposed character. You | 931 | * accent or as a single precomposed character. The |
888 | * should generally call g_utf8_normalize() before | 932 | * string has to be valid UTF-8, otherwise %NULL is |
889 | * comparing two Unicode strings. | 933 | * returned. You should generally call g_utf8_normalize() |
934 | * before comparing two Unicode strings. | ||
890 | * | 935 | * |
891 | * The normalization mode %G_NORMALIZE_DEFAULT only | 936 | * The normalization mode %G_NORMALIZE_DEFAULT only |
892 | * standardizes differences that do not affect the | 937 | * standardizes differences that do not affect the |
893 | * text content, such as the above-mentioned accent | 938 | * text content, such as the above-mentioned accent |
894 | * representation. %G_NORMALIZE_ALL also standardizes | 939 | * representation. %G_NORMALIZE_ALL also standardizes |
895 | * the "compatibility" characters in Unicode, such | 940 | * the "compatibility" characters in Unicode, such |
896 | * as SUPERSCRIPT THREE to the standard forms | 941 | * as SUPERSCRIPT THREE to the standard forms |
897 | * (in this case DIGIT THREE). Formatting information | 942 | * (in this case DIGIT THREE). Formatting information |
898 | * may be lost but for most text operations such | 943 | * may be lost but for most text operations such |
899 | * characters should be considered the same. | 944 | * characters should be considered the same. |
900 | * For example, g_utf8_collate() normalizes | ||
901 | * with %G_NORMALIZE_ALL as its first step. | ||
902 | * | 945 | * |
903 | * %G_NORMALIZE_DEFAULT_COMPOSE and %G_NORMALIZE_ALL_COMPOSE | 946 | * %G_NORMALIZE_DEFAULT_COMPOSE and %G_NORMALIZE_ALL_COMPOSE |
904 | * are like %G_NORMALIZE_DEFAULT and %G_NORMALIZE_ALL, | 947 | * are like %G_NORMALIZE_DEFAULT and %G_NORMALIZE_ALL, |
905 | * but returned a result with composed forms rather | 948 | * but returned a result with composed forms rather |
906 | * than a maximally decomposed form. This is often | 949 | * than a maximally decomposed form. This is often |
907 | * useful if you intend to convert the string to | 950 | * useful if you intend to convert the string to |
908 | * a legacy encoding or pass it to a system with | 951 | * a legacy encoding or pass it to a system with |
909 | * less capable Unicode handling. | 952 | * less capable Unicode handling. |
910 | * | 953 | * |
911 | * Return value: a newly allocated string, that is the | 954 | * Return value: a newly allocated string, that is the |
912 | * normalized form of @str. | 955 | * normalized form of @str, or %NULL if @str is not |
956 | * valid UTF-8. | ||
913 | **/ | 957 | **/ |
914 | static gchar * | 958 | static gchar * |
915 | g_utf8_normalize (const gchar * str, gssize len, GNormalizeMode mode) | 959 | g_utf8_normalize (const gchar * str, gssize len, GNormalizeMode mode) |
916 | { | 960 | { |
917 | gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode); | 961 | gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode); |
918 | gchar *result; | 962 | gchar *result; |
919 | 963 | ||
920 | result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL, NULL); | 964 | result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL); |
921 | g_free (result_wc); | 965 | g_free (result_wc); |
922 | 966 | ||
923 | return result; | 967 | return result; |
924 | } | 968 | } |
925 | 969 | ||
926 | /* Public Libidn API starts here. */ | 970 | /* Public Libidn API starts here. */ |
927 | 971 | ||
928 | /** | 972 | /** |
929 | * stringprep_utf8_to_unichar - convert UTF-8 to Unicode code point | 973 | * stringprep_utf8_to_unichar: |
930 | * @p: a pointer to Unicode character encoded as UTF-8 | 974 | * @p: a pointer to Unicode character encoded as UTF-8 |
931 | * | 975 | * |
932 | * Converts a sequence of bytes encoded as UTF-8 to a Unicode character. | 976 | * Converts a sequence of bytes encoded as UTF-8 to a Unicode character. |
933 | * If @p does not point to a valid UTF-8 encoded character, results are | 977 | * If @p does not point to a valid UTF-8 encoded character, results are |
934 | * undefined. | 978 | * undefined. |
@@ -940,11 +984,11 @@ stringprep_utf8_to_unichar (const char *p) | |||
940 | { | 984 | { |
941 | return g_utf8_get_char (p); | 985 | return g_utf8_get_char (p); |
942 | } | 986 | } |
943 | 987 | ||
944 | /** | 988 | /** |
945 | * stringprep_unichar_to_utf8 - convert Unicode code point to UTF-8 | 989 | * stringprep_unichar_to_utf8: |
946 | * @c: a ISO10646 character code | 990 | * @c: a ISO10646 character code |
947 | * @outbuf: output buffer, must have at least 6 bytes of space. | 991 | * @outbuf: output buffer, must have at least 6 bytes of space. |
948 | * If %NULL, the length will be computed and returned | 992 | * If %NULL, the length will be computed and returned |
949 | * and nothing will be written to @outbuf. | 993 | * and nothing will be written to @outbuf. |
950 | * | 994 | * |
@@ -956,33 +1000,46 @@ int | |||
956 | stringprep_unichar_to_utf8 (uint32_t c, char *outbuf) | 1000 | stringprep_unichar_to_utf8 (uint32_t c, char *outbuf) |
957 | { | 1001 | { |
958 | return g_unichar_to_utf8 (c, outbuf); | 1002 | return g_unichar_to_utf8 (c, outbuf); |
959 | } | 1003 | } |
960 | 1004 | ||
1005 | #include <unistr.h> | ||
1006 | |||
961 | /** | 1007 | /** |
962 | * stringprep_utf8_to_ucs4 - convert UTF-8 string to UCS-4 | 1008 | * stringprep_utf8_to_ucs4: |
963 | * @str: a UTF-8 encoded string | 1009 | * @str: a UTF-8 encoded string |
964 | * @len: the maximum length of @str to use. If @len < 0, then | 1010 | * @len: the maximum length of @str to use. If @len < 0, then |
965 | * the string is nul-terminated. | 1011 | * the string is nul-terminated. |
966 | * @items_written: location to store the number of characters in the | 1012 | * @items_written: location to store the number of characters in the |
967 | * result, or %NULL. | 1013 | * result, or %NULL. |
968 | * | 1014 | * |
969 | * Convert a string from UTF-8 to a 32-bit fixed width | 1015 | * Convert a string from UTF-8 to a 32-bit fixed width representation |
970 | * representation as UCS-4, assuming valid UTF-8 input. | 1016 | * as UCS-4. The function now performs error checking to verify that |
971 | * This function does no error checking on the input. | 1017 | * the input is valid UTF-8 (before it was documented to not do error |
1018 | * checking). | ||
972 | * | 1019 | * |
973 | * Return value: a pointer to a newly allocated UCS-4 string. | 1020 | * Return value: a pointer to a newly allocated UCS-4 string. |
974 | * This value must be freed with free(). | 1021 | * This value must be deallocated by the caller. |
975 | **/ | 1022 | **/ |
976 | uint32_t * | 1023 | uint32_t * |
977 | stringprep_utf8_to_ucs4 (const char *str, ssize_t len, size_t * items_written) | 1024 | stringprep_utf8_to_ucs4 (const char *str, ssize_t len, size_t * items_written) |
978 | { | 1025 | { |
1026 | size_t n; | ||
1027 | |||
1028 | if (len < 0) | ||
1029 | n = strlen (str); | ||
1030 | else | ||
1031 | n = len; | ||
1032 | |||
1033 | if (u8_check ((const uint8_t *) str, n)) | ||
1034 | return NULL; | ||
1035 | |||
979 | return g_utf8_to_ucs4_fast (str, (glong) len, (glong *) items_written); | 1036 | return g_utf8_to_ucs4_fast (str, (glong) len, (glong *) items_written); |
980 | } | 1037 | } |
981 | 1038 | ||
982 | /** | 1039 | /** |
983 | * stringprep_ucs4_to_utf8 - convert UCS-4 string to UTF-8 | 1040 | * stringprep_ucs4_to_utf8: |
984 | * @str: a UCS-4 encoded string | 1041 | * @str: a UCS-4 encoded string |
985 | * @len: the maximum length of @str to use. If @len < 0, then | 1042 | * @len: the maximum length of @str to use. If @len < 0, then |
986 | * the string is terminated with a 0 character. | 1043 | * the string is terminated with a 0 character. |
987 | * @items_read: location to store number of characters read read, or %NULL. | 1044 | * @items_read: location to store number of characters read read, or %NULL. |
988 | * @items_written: location to store number of bytes written or %NULL. | 1045 | * @items_written: location to store number of bytes written or %NULL. |
@@ -991,24 +1048,23 @@ stringprep_utf8_to_ucs4 (const char *str, ssize_t len, size_t * items_written) | |||
991 | * | 1048 | * |
992 | * Convert a string from a 32-bit fixed width representation as UCS-4. | 1049 | * Convert a string from a 32-bit fixed width representation as UCS-4. |
993 | * to UTF-8. The result will be terminated with a 0 byte. | 1050 | * to UTF-8. The result will be terminated with a 0 byte. |
994 | * | 1051 | * |
995 | * Return value: a pointer to a newly allocated UTF-8 string. | 1052 | * Return value: a pointer to a newly allocated UTF-8 string. |
996 | * This value must be freed with free(). If an | 1053 | * This value must be deallocated by the caller. |
997 | * error occurs, %NULL will be returned and | 1054 | * If an error occurs, %NULL will be returned. |
998 | * @error set. | ||
999 | **/ | 1055 | **/ |
1000 | char * | 1056 | char * |
1001 | stringprep_ucs4_to_utf8 (const uint32_t * str, ssize_t len, | 1057 | stringprep_ucs4_to_utf8 (const uint32_t * str, ssize_t len, |
1002 | size_t * items_read, size_t * items_written) | 1058 | size_t * items_read, size_t * items_written) |
1003 | { | 1059 | { |
1004 | return g_ucs4_to_utf8 (str, len, (glong *) items_read, | 1060 | return g_ucs4_to_utf8 (str, len, (glong *) items_read, |
1005 | (glong *) items_written, NULL); | 1061 | (glong *) items_written); |
1006 | } | 1062 | } |
1007 | 1063 | ||
1008 | /** | 1064 | /** |
1009 | * stringprep_utf8_nfkc_normalize - normalize Unicode string | 1065 | * stringprep_utf8_nfkc_normalize: |
1010 | * @str: a UTF-8 encoded string. | 1066 | * @str: a UTF-8 encoded string. |
1011 | * @len: length of @str, in bytes, or -1 if @str is nul-terminated. | 1067 | * @len: length of @str, in bytes, or -1 if @str is nul-terminated. |
1012 | * | 1068 | * |
1013 | * Converts a string into canonical form, standardizing | 1069 | * Converts a string into canonical form, standardizing |
1014 | * such issues as whether a character with an accent | 1070 | * such issues as whether a character with an accent |
@@ -1032,22 +1088,22 @@ stringprep_utf8_nfkc_normalize (const char *str, ssize_t len) | |||
1032 | { | 1088 | { |
1033 | return g_utf8_normalize (str, len, G_NORMALIZE_NFKC); | 1089 | return g_utf8_normalize (str, len, G_NORMALIZE_NFKC); |
1034 | } | 1090 | } |
1035 | 1091 | ||
1036 | /** | 1092 | /** |
1037 | * stringprep_ucs4_nfkc_normalize - normalize Unicode string | 1093 | * stringprep_ucs4_nfkc_normalize: |
1038 | * @str: a Unicode string. | 1094 | * @str: a Unicode string. |
1039 | * @len: length of @str array, or -1 if @str is nul-terminated. | 1095 | * @len: length of @str array, or -1 if @str is nul-terminated. |
1040 | * | 1096 | * |
1041 | * Converts UCS4 string into UTF-8 and runs | 1097 | * Converts a UCS4 string into canonical form, see |
1042 | * stringprep_utf8_nfkc_normalize(). | 1098 | * stringprep_utf8_nfkc_normalize() for more information. |
1043 | * | 1099 | * |
1044 | * Return value: a newly allocated Unicode string, that is the NFKC | 1100 | * Return value: a newly allocated Unicode string, that is the NFKC |
1045 | * normalized form of @str. | 1101 | * normalized form of @str. |
1046 | **/ | 1102 | **/ |
1047 | uint32_t * | 1103 | uint32_t * |
1048 | stringprep_ucs4_nfkc_normalize (uint32_t * str, ssize_t len) | 1104 | stringprep_ucs4_nfkc_normalize (const uint32_t * str, ssize_t len) |
1049 | { | 1105 | { |
1050 | char *p; | 1106 | char *p; |
1051 | uint32_t *result_wc; | 1107 | uint32_t *result_wc; |
1052 | 1108 | ||
1053 | p = stringprep_ucs4_to_utf8 (str, len, 0, 0); | 1109 | p = stringprep_ucs4_to_utf8 (str, len, 0, 0); |