From 5664c795f091f1fef4976954e776fd5c87b0d85d Mon Sep 17 00:00:00 2001 From: J Smith Date: Mon, 7 Nov 2011 00:46:22 -0500 Subject: [PATCH] Fix weirdness when dealing with UTF-8 in buggy libc implementations. OSX libc has a bug from an older FreeBSD libc that causes it to see certain characters as spaces incorrectly when using UTF-8. --- contrib/unaccent/unaccent.c | 11 ++++++++++- 1 files changed, 10 insertions(+), 1 deletions(-) diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c index d9c2eac..e2d01eb 100644 --- a/contrib/unaccent/unaccent.c +++ b/contrib/unaccent/unaccent.c @@ -96,6 +96,7 @@ initSuffixTree(char *filename) int srclen; int trglen; char *line = NULL; + char *tok = NULL; skip = true; @@ -108,8 +109,16 @@ initSuffixTree(char *filename) */ while ((line = tsearch_readline(&trst)) != NULL) { - if (sscanf(line, "%s\t%s\n", src, trg) != 2) + if ((tok = strchr(line, '\t')) == NULL) { continue; + } + + sprintf(src, "%.*s", (int) (tok - line), line); + sprintf(trg, "%s", tok + 1); + + if ((tok = strchr(trg, '\n')) != NULL) { + tok[0] = '\0'; + } srclen = strlen(src); trglen = strlen(trg); -- 1.7.7.2