-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathUnicode test2.cpp
83 lines (64 loc) · 2.65 KB
/
Unicode test2.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
// Unicode test2.cpp
#include <iostream>
#include <icu.h>
const int BUFFER_SIZE = 256;
UChar* string1 = (UChar*)u"Test string\uFBB3\u2502";
UChar* string2 = (UChar*)u"Test string";
void transliterate(UTransliterator* trans, UChar* string, UChar* buf) {
u_strcpy(buf, string);
int32_t limit = u_strlen(buf);
UErrorCode status = U_ZERO_ERROR;
utrans_transUChars(trans, buf, NULL, BUFFER_SIZE, 0, &limit, &status);
// Remove any empty space from start and end of string
if (buf[0] == '\u0020') {
// std::cout << "empty space at start of string!" << "\n";
u_strcpy(buf, &buf[1]);
}
int32_t trim_buf_length = u_strlen(buf);
if (buf[trim_buf_length - 1] == '\u0020') {
// std::cout << "empty space at end of string!" << "\n";
buf[trim_buf_length - 1] = '\0';
}
return;
}
int main()
{
// UChar* id = (UChar*)u"NFKD; [[:C:][:M:]] Remove;"; // Using rules instead of ID for filter
UChar* id = (UChar*)u"My transliterator";
// Rules:
//
// Remove some empty space characters that are not displayed in TF2 and replace some with empty space '\u0020' that are not normalized.
//
// Replace \u00A1 with i since they look almost the same in TF2 but won't show up as confusable (may need to do this with other characters as well?)
//
// Normalize to NFKD form
//
// Filter out all characters in the categories:
// C (Other), Lm (Letter, modifier), Lo (Letter, other), M (Mark), P (Punctuation), Sk (Symbol, modifier), Sm (Symbol, math) and So (Symbol, other)
//
// Lastly remove any spaces that are followed by another space.
//
UChar* rules = (UChar*)u"\u200A > ; \u2009 > ; \u202F > ; \u205F > ; \u2006 > ; \u1680 > '\u0020' ; '\u2029' > '\u0020'; '\u2028' > '\u0020'; \u00A1 > i; ::NFKD; [[:C:][:Lm:][:Lo:][:M:][:P:][:Sk:][:Sm:][:So:]] > ; '\u0020' } '\u0020' > ; ";
UErrorCode status = U_ZERO_ERROR;
UParseError pe;
UTransliterator* my_trans = utrans_openU(id, -1, UTRANS_FORWARD, rules, -1, &pe, &status);
UChar buf1[BUFFER_SIZE];
transliterate(my_trans, string1, buf1);
UChar buf2[BUFFER_SIZE];
transliterate(my_trans, string2, buf2);
utrans_close(my_trans);
USpoofChecker* sc = uspoof_open(&status);
int confuse = uspoof_areConfusable(sc, buf1, -1, buf2, -1, &status);
uspoof_close(sc);
wprintf(L"string1 = %s\n", string1);
wprintf(L"string2 = %s\n", string2);
wprintf(L"trans1 = %s\n", buf1);
wprintf(L"trans2 = %s\n", buf2);
if (confuse != 0) {
std::cout << "\nStrings are confusable! \n";
}
else {
std::cout << "\nStrings are NOT confusable. \n";
}
return 0;
}