GnuCash  5.6-150-g038405b370+
gnc-unicode.cpp
1 /********************************************************************
2  * gnc-icu-locale.cpp -- Localization with ICU. *
3  * *
4  * Copyright (C) 2025 John Ralls <jralls@ceridwen.us *
5  * *
6  * This program is free software; you can redistribute it and/or *
7  * modify it under the terms of the GNU General Public License as *
8  * published by the Free Software Foundation; either version 2 of *
9  * the License, or (at your option) any later version. *
10  * *
11  * This program is distributed in the hope that it will be useful, *
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14  * GNU General Public License for more details. *
15  * *
16  * You should have received a copy of the GNU General Public License*
17  * along with this program; if not, contact: *
18  * *
19  * Free Software Foundation Voice: +1-617-542-5942 *
20  * 51 Franklin Street, Fifth Floor Fax: +1-617-542-2652 *
21  * Boston, MA 02110-1301, USA gnu@gnu.org *
22  ********************************************************************/
23 
24 #include "gnc-unicode.h"
25 
26 #include <memory>
27 #include <unicode/stsearch.h>
28 #include <unicode/tblcoll.h>
29 #include <unicode/coll.h>
30 #include "gnc-locale-utils.h"
31 #include <glib-2.0/glib.h>
32 
33 constexpr const char *logdomain{"gnc.locale"};
34 
35 enum class CompareStrength {
36  PRIMARY,
37  SECONDARY,
38  TERTIARY,
39  QUATERNARY,
40  IDENTICAL
41 };
42 
43 static void
44 collator_set_strength(icu::Collator* collator, CompareStrength strength)
45 {
46  switch (strength)
47  {
48  case CompareStrength::PRIMARY:
49  collator->setStrength(icu::Collator::PRIMARY);
50  break;
51  case CompareStrength::SECONDARY:
52  collator->setStrength(icu::Collator::SECONDARY);
53  break;
54  case CompareStrength::TERTIARY:
55  collator->setStrength(icu::Collator::TERTIARY);
56  break;
57  case CompareStrength::QUATERNARY:
58  collator->setStrength(icu::Collator::QUATERNARY);
59  break;
60  case CompareStrength::IDENTICAL:
61  collator->setStrength(icu::Collator::IDENTICAL);
62  break;
63  }
64 }
65 
66 static bool
67 unicode_has_substring_internal(const char* needle, const char* haystack,
68  int* position, int* length,
69  CompareStrength strength)
70 {
71  UErrorCode status{U_ZERO_ERROR};
72  auto locale{gnc_locale_name()};
73  auto u_needle{icu::UnicodeString::fromUTF8(needle)};
74  auto u_haystack{icu::UnicodeString::fromUTF8(haystack)};
75  icu::StringSearch search(u_needle, u_haystack, locale, nullptr, status);
76  g_free(locale);
77 
78  if (U_SUCCESS(status))
79  {
80  auto collator = search.getCollator();
81  collator_set_strength(collator, strength);
82  search.reset();
83  }
84 
85  if (U_FAILURE(status))
86  {
87  g_log(logdomain, G_LOG_LEVEL_ERROR,
88  "StringSearch creation failed for %s", haystack);
89  return false;
90  }
91 
92  auto pos{search.first(status)};
93  if (U_FAILURE(status))
94  {
95  g_log(logdomain, G_LOG_LEVEL_ERROR,
96  "StringSearch encountered an error finding %s in %s",
97  needle, haystack);
98  return false;
99  }
100  if (pos == USEARCH_DONE)
101  {
102  g_log(logdomain, G_LOG_LEVEL_DEBUG, "%s not found in %s",
103  needle, haystack);
104  return false;
105  }
106 
107  if (position && length)
108  {
109  *position = pos;
110  *length = search.getMatchedLength();
111  }
112 
113  g_log(logdomain, G_LOG_LEVEL_DEBUG, "%s found in %s at index %d",
114  needle, haystack, pos);
115  return true;
116 }
117 
118 bool
119 gnc_unicode_has_substring_base_chars(const char* needle,
120  const char* haystack,
121  int* position,
122  int* length)
123 {
124  return unicode_has_substring_internal(needle, haystack, position, length,
125  CompareStrength::PRIMARY);
126 }
127 
128 bool
129 gnc_unicode_has_substring_accented_chars(const char* needle,
130  const char* haystack,
131  int* position,
132  int* length)
133 {
134  return unicode_has_substring_internal(needle, haystack, position, length,
135  CompareStrength::SECONDARY);
136 }
137 
138 bool
139 gnc_unicode_has_substring_accented_case_sensitive(const char* needle,
140  const char* haystack,
141  int* position,
142  int* length)
143 {
144  return unicode_has_substring_internal(needle, haystack, position, length,
145  CompareStrength::TERTIARY);
146 }
147 
148 bool
149 gnc_unicode_has_substring_identical(const char* needle,
150  const char*haystack,
151  int* position,
152  int* length)
153 {
154  auto location = strstr(haystack, needle);
155  if (location && location != haystack)
156  {
157  *position = static_cast<int>(location - haystack);
158  *length = strlen(needle);
159  return true;
160  }
161  return false;
162 }
163 
164 static int
165 unicode_compare_internal(const char* one, const char* two,
166  CompareStrength strength)
167 {
168  UErrorCode status{U_ZERO_ERROR};
169  auto locale{gnc_locale_name()};
170  std::unique_ptr<icu::Collator> coll(
171  icu::Collator::createInstance(icu::Locale(locale), status));
172 
173  if (U_SUCCESS(status))
174  collator_set_strength(coll.get(), strength);
175 
176  if (U_FAILURE(status))
177  {
178  g_log(logdomain, G_LOG_LEVEL_ERROR,
179  "Failed to create collator for locale %s: %s",
180  locale, u_errorName(status));
181  g_free(locale);
182  return -99;
183  }
184 
185  auto result = coll->compare(one, two, status);
186 
187  if (U_FAILURE(status))
188  {
189  g_log(logdomain, G_LOG_LEVEL_ERROR,
190  "Comparison of %s and %s in locale %s failed: %s",
191  one, two, locale, u_errorName(status));
192  g_free(locale);
193  return -99;
194  }
195 
196  g_free(locale);
197  return result == UCOL_LESS ? -1 : result == UCOL_EQUAL ? 0 : 1;
198 }
199 
200 int
201 gnc_unicode_compare_base_chars(const char* one, const char* two)
202 {
203  return unicode_compare_internal(one, two, CompareStrength::PRIMARY);
204 }
205 
206 int
207 gnc_unicode_compare_accented_chars(const char* one, const char* two)
208 {
209  return unicode_compare_internal(one, two, CompareStrength::SECONDARY);
210 }
211 
212 int
213 gnc_unicode_compare_accented_case_sensitive(const char* one, const char* two)
214 {
215  return unicode_compare_internal(one, two, CompareStrength::TERTIARY);
216 }
217 
218 int
219 gnc_unicode_compare_identical(const char* one, const char* two)
220 {
221  return unicode_compare_internal(one, two, CompareStrength::IDENTICAL);
222 
223 }