@@ -63,28 +63,49 @@ std::vector<MisspelledRange> HunspellSpellchecker::CheckSpelling(const uint16_t
63
63
64
64
std::vector<char > utf8_buffer (256 );
65
65
66
- size_t word_start = 0 ;
67
- bool within_word = false ;
68
- for (size_t i = 0 ; i < utf16_length; i++) {
66
+ enum {
67
+ unknown,
68
+ in_separator,
69
+ in_word,
70
+ } state = in_separator;
71
+
72
+ for (size_t word_start = 0 , i = 0 ; i < utf16_length; i++) {
69
73
uint16_t c = utf16_text[i];
70
- bool is_word_character = iswalpha (c);
71
- if (within_word) {
72
- if (!is_word_character) {
73
- within_word = false ;
74
-
75
- bool converted = TranscodeUTF16ToUTF8 (transcoder, (char *)utf8_buffer.data (), utf8_buffer.size (), utf16_text + word_start, i - word_start);
76
- if (converted) {
77
- if (hunspell->spell (utf8_buffer.data ()) == 0 ) {
78
- MisspelledRange range;
79
- range.start = word_start;
80
- range.end = i;
81
- result.push_back (range);
74
+
75
+ switch (state) {
76
+ case unknown:
77
+ if (iswpunct (c) || iswspace (c)) {
78
+ state = in_separator;
79
+ }
80
+ break ;
81
+
82
+ case in_separator:
83
+ if (iswalpha (c)) {
84
+ word_start = i;
85
+ state = in_word;
86
+ } else if (!iswpunct (c) && !iswspace (c)) {
87
+ state = unknown;
88
+ }
89
+ break ;
90
+
91
+ case in_word:
92
+ if (c == ' \' ' && iswalpha (utf16_text[i + 1 ])) {
93
+ i++;
94
+ } else if (c == 0 || iswpunct (c) || iswspace (c)) {
95
+ state = in_separator;
96
+ bool converted = TranscodeUTF16ToUTF8 (transcoder, (char *)utf8_buffer.data (), utf8_buffer.size (), utf16_text + word_start, i - word_start);
97
+ if (converted) {
98
+ if (hunspell->spell (utf8_buffer.data ()) == 0 ) {
99
+ MisspelledRange range;
100
+ range.start = word_start;
101
+ range.end = i;
102
+ result.push_back (range);
103
+ }
82
104
}
105
+ } else if (!iswalpha (c)) {
106
+ state = unknown;
83
107
}
84
- }
85
- } else if (is_word_character) {
86
- word_start = i;
87
- within_word = true ;
108
+ break ;
88
109
}
89
110
}
90
111
0 commit comments