-
Notifications
You must be signed in to change notification settings - Fork 108
/
Copy pathGraphemes.g4
55 lines (51 loc) · 2.14 KB
/
Graphemes.g4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
grammar Graphemes;
Extend: [\p{Grapheme_Cluster_Break=Extend}];
ZWJ: '\u200D';
SpacingMark: [\p{Grapheme_Cluster_Break=SpacingMark}];
fragment VS15: '\uFE0E';
fragment VS16: '\uFE0F';
fragment NonspacingMark: [\p{Nonspacing_Mark}];
fragment TextPresentationCharacter: [\p{EmojiPresentation=TextDefault}];
fragment EmojiPresentationCharacter: [\p{EmojiPresentation=EmojiDefault}];
fragment TextPresentationSequence: EmojiPresentationCharacter VS15;
fragment EmojiPresentationSequence: TextPresentationCharacter VS16;
fragment EmojiModifierSequence:
[\p{Grapheme_Cluster_Break=E_Base}\p{Grapheme_Cluster_Break=E_Base_GAZ}] [\p{Grapheme_Cluster_Break=E_Modifier}];
fragment EmojiFlagSequence:
[\p{Grapheme_Cluster_Break=Regional_Indicator}] [\p{Grapheme_Cluster_Break=Regional_Indicator}];
fragment ExtendedPictographic: [\p{Extended_Pictographic}];
fragment EmojiNRK: [\p{EmojiNRK}];
fragment EmojiCombiningSequence:
( EmojiPresentationSequence
| TextPresentationSequence
| EmojiPresentationCharacter )
NonspacingMark*;
EmojiCoreSequence:
EmojiModifierSequence
| EmojiCombiningSequence
| EmojiFlagSequence;
fragment EmojiZWJElement:
EmojiModifierSequence
| EmojiPresentationSequence
| EmojiPresentationCharacter
| ExtendedPictographic
| EmojiNRK;
EmojiZWJSequence:
EmojiZWJElement (ZWJ EmojiZWJElement)+;
emojiSequence:
( EmojiZWJSequence
| EmojiCoreSequence )
( Extend | ZWJ | SpacingMark )*;
Prepend: [\p{Grapheme_Cluster_Break=Prepend}];
NonControl: [\P{Grapheme_Cluster_Break=Control}];
CRLF: [\p{Grapheme_Cluster_Break=CR}][\p{Grapheme_Cluster_Break=LF}];
HangulSyllable:
[\p{Grapheme_Cluster_Break=L}]* [\p{Grapheme_Cluster_Break=V}]+ [\p{Grapheme_Cluster_Break=T}]*
| [\p{Grapheme_Cluster_Break=L}]* [\p{Grapheme_Cluster_Break=LV}] [\p{Grapheme_Cluster_Break=V}]* [\p{Grapheme_Cluster_Break=T}]*
| [\p{Grapheme_Cluster_Break=L}]* [\p{Grapheme_Cluster_Break=LVT}] [\p{Grapheme_Cluster_Break=T}]*
| [\p{Grapheme_Cluster_Break=L}]+
| [\p{Grapheme_Cluster_Break=T}]+;
graphemeCluster:
CRLF
| Prepend* ( emojiSequence | HangulSyllable | NonControl ) ( Extend | ZWJ | SpacingMark )*;
graphemes: graphemeCluster* EOF;