Skip to content

Commit f12d94e

Browse files
committed
xkb: treat pattern_white_space as whitespace
1 parent 16d5a8e commit f12d94e

File tree

14 files changed

+119
-21
lines changed

14 files changed

+119
-21
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
xkb_keymap {
2+
xkb_keycodes {
3+
minimum = 8;
4+
maximum = 255;
5+
6+
indicator 1 = "DUMMY";
7+
};
8+
9+
xkb_types {
10+
virtual_modifiers Dummy;
11+
};
12+
13+
xkb_compat {
14+
interpret VoidSymbol {
15+
repeat = false;
16+
};
17+
};
18+
19+
xkb_symbols {
20+
};
21+
};
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
2+
… ‎ ‏ 
 

3+
// ^tab ^vertical tab ^form feed ^carriage return ^next line ^left-to-right mark ^right-to-left mark ^line separator ^paragraph separator
4+
xkb_keymap { };
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
keycodes = ""
2+
types = ""
3+
compat = ""
4+
symbols = "xyz"
5+
geometry = ""
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
rules = "a"
2+
3+
[[groups]]
4+
layout = "X"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
2+
… ‎ ‏ 
 

3+
// ^tab ^vertical tab ^form feed ^carriage return ^next line ^left-to-right mark ^right-to-left mark ^line separator ^paragraph separator
4+
5+
! layout[any] = symbols
6+
* = xyz

compose-tests/testcases/t00/t004/t0046/XCompose

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
… ‎ ‏ 
 
 <a>: X
2+
// ^tab ^vertical tab ^form feed ^carriage return ^next line ^left-to-right mark ^right-to-left mark ^line separator ^paragraph separator
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
a
2+
composed
3+
X
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
a

kbvm/release-notes.md

+15
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,21 @@
5252
```
5353
- Fixed an arithmetic underflow when printing diagnostic messages containing
5454
non-ascii text.
55+
- The following characters are now always treated as insignificant whitespace
56+
between tokens
57+
- U+0009 - tab
58+
- U+000b - vertical tab
59+
- U+000c - form feed
60+
- U+000d - carriage return
61+
- U+0020 - space
62+
- U+0085 - next line
63+
- U+200e - left-to-right mark
64+
- U+200f - right-to-left mark
65+
- U+2028 - line separator
66+
- U+2029 - paragraph separator
67+
In xkb files, the following character is also insignificant whitespace
68+
- U+000a - line feed
69+
In RMLVO and compose files, line feeds are significant.
5570

5671
# 0.1.3 (2025-02-13)
5772

kbvm/src/xkb.rs

+1
Original file line numberDiff line numberDiff line change
@@ -86,5 +86,6 @@ mod resolved;
8686
pub mod rmlvo;
8787
pub(crate) mod span;
8888
mod string_cooker;
89+
mod whitespace;
8990
#[cfg(feature = "x11")]
9091
pub mod x11;

kbvm/src/xkb/compose/lexer.rs

+2-7
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use {
1010
diagnostic::{DiagnosticKind, DiagnosticSink},
1111
interner::Interner,
1212
span::{SpanExt, SpanUnit, Spanned},
13+
whitespace::consume_whitespace,
1314
},
1415
kbvm_proc::ad_hoc_display,
1516
std::sync::Arc,
@@ -111,13 +112,7 @@ impl LineLexer<'_, '_, '_> {
111112

112113
fn lex_one(&mut self) -> Result<One, Spanned<LexerError>> {
113114
use LexerError::*;
114-
while self.pos < self.code.len() {
115-
if matches!(self.code[self.pos], b' ' | b'\t' | b'\r') {
116-
self.pos += 1;
117-
} else {
118-
break;
119-
}
120-
}
115+
consume_whitespace(&mut self.pos, &self.code, true);
121116
let mut b = match self.code.get(self.pos) {
122117
Some(c) => *c,
123118
_ => return Ok(One::Eof),

kbvm/src/xkb/kccgst/lexer.rs

+2-7
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use {
1414
Token::{self, Float, Ident, Integer, KeyName, String},
1515
},
1616
span::{Span, SpanExt, SpanUnit, Spanned},
17+
whitespace::consume_whitespace,
1718
},
1819
},
1920
std::{num::ParseFloatError, path::PathBuf, str::FromStr, sync::Arc},
@@ -125,13 +126,7 @@ impl ItemLexer<'_> {
125126
use LexerError::*;
126127
let mut b;
127128
loop {
128-
while self.pos < self.code.len() {
129-
if matches!(self.code[self.pos], b' ' | b'\t'..=b'\r') {
130-
self.pos += 1;
131-
} else {
132-
break;
133-
}
134-
}
129+
consume_whitespace(&mut self.pos, &self.code, false);
135130
match self.code.get(self.pos) {
136131
Some(c) => b = *c,
137132
_ => return Ok(None),

kbvm/src/xkb/rmlvo/lexer.rs

+2-7
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use {
99
interner::Interner,
1010
rmlvo::token::Token,
1111
span::{SpanExt, SpanUnit, Spanned},
12+
whitespace::consume_whitespace,
1213
},
1314
std::{path::PathBuf, sync::Arc},
1415
thiserror::Error,
@@ -104,13 +105,7 @@ impl LineLexer<'_> {
104105
use LexerError::*;
105106
let mut b;
106107
loop {
107-
while self.pos < self.code.len() {
108-
if matches!(self.code[self.pos], b' ' | b'\t' | b'\r') {
109-
self.pos += 1;
110-
} else {
111-
break;
112-
}
113-
}
108+
consume_whitespace(&mut self.pos, &self.code, true);
114109
b = match self.code.get(self.pos) {
115110
Some(c) => *c,
116111
_ => return Ok(One::Eof),

kbvm/src/xkb/whitespace.rs

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
pub(crate) fn consume_whitespace(pos: &mut usize, c: &[u8], significant_newline: bool) {
2+
let mut p = *pos;
3+
while p < c.len() {
4+
let b0 = c[p];
5+
match b0 {
6+
0x0020 | 0x0009 => {
7+
// U+0020 - space
8+
// U+0009 - tab
9+
p += 1;
10+
continue;
11+
}
12+
0x000a if significant_newline => {
13+
// U+000a - line feed
14+
break;
15+
}
16+
0x000a..=0x000d => {
17+
// U+000a - line feed
18+
// U+000b - vertical tab
19+
// U+000c - form feed
20+
// U+000d - carriage return
21+
p += 1;
22+
continue;
23+
}
24+
0xc2 if p + 1 < c.len() => {
25+
let b1 = c[p + 1];
26+
if b1 == 0x85 {
27+
// U+0085 - next line
28+
p += 2;
29+
continue;
30+
}
31+
}
32+
0xe2 if p + 2 < c.len() => {
33+
let b1 = c[p + 1];
34+
if b1 == 0x80 {
35+
let b2 = c[p + 2];
36+
if matches!(b2, 0x8e | 0x8f | 0xa8 | 0xa9) {
37+
// U+200e - left-to-right mark
38+
// U+200f - right-to-left mark
39+
// U+2028 - line separator
40+
// U+2029 - paragraph separator
41+
p += 3;
42+
continue;
43+
}
44+
}
45+
}
46+
_ => {}
47+
}
48+
break;
49+
}
50+
*pos = p;
51+
}

0 commit comments

Comments
 (0)