From 9131a310e4081fba942cee23a42599805169ddc4 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Sun, 19 Jan 2025 17:26:54 -0300 Subject: [PATCH 1/4] column: handle OSC 8 hyperlink escape sequences --- text-utils/column.c | 70 +++++++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/text-utils/column.c b/text-utils/column.c index a0a94867f11..2524c984859 100644 --- a/text-utils/column.c +++ b/text-utils/column.c @@ -112,18 +112,19 @@ typedef enum { ANSI_ESC = 0x1b, ANSI_SGR = '[', ANSI_OSC = ']', - ANSI_APC = '_', - ANSI_BSL = '\\' + ANSI_LNK = '8', + ANSI_LBL = 0x7, + ANSI_END = '\\' } ansi_esc_states; /** * Count how many characters are non-printable due to ANSI X3.41 escape codes. * - * It detects and count only Fe Escape sequences. These sequences contains characters - * that normally are printable, but due to being part of a escape sequence are ignored - * when displayed in console terminals. + * It detects and count Fe Escape and OSC 8 links sequences. These sequences contains + * characters that normally are printable, but due to being part of a escape sequence + * are ignored when displayed in console terminals. */ -static inline size_t ansi_esc_width(ansi_esc_states *state, size_t *found, const wchar_t *str) +static inline size_t ansi_esc_width(ansi_esc_states *state, size_t *found, const wchar_t *str, int chw) { switch (*state) { case ANSI_CHR: @@ -144,7 +145,7 @@ static inline size_t ansi_esc_width(ansi_esc_states *state, size_t *found, const case '_': // APC - Application Program Command case 'P': // DCS - Device Control String case '^': // PM - Privacy Message - *state = ANSI_APC; + *state = ANSI_END; break; default: *state = ANSI_CHR; @@ -153,7 +154,7 @@ static inline size_t ansi_esc_width(ansi_esc_states *state, size_t *found, const *found = 1; return 0; case ANSI_SGR: - *found += 1; + *found += chw; // Fe escape sequences allows the range 0x30-0x3f // However SGR (Select Graphic Rendition) only uses: 0-9 ';' ':' if (*str >= '0' && *str <= '?') @@ -162,25 +163,45 @@ static inline size_t ansi_esc_width(ansi_esc_states *state, size_t *found, const if (*str <= '@' && *str >= '~') *found = 0; break; - case ANSI_APC: case ANSI_OSC: - *found += 1; -#ifdef HAVE_WIDECHAR - if (*str == 0x9c || *str == 0x7) // ends with ST (String Terminator) or BEL (\a) + *found += chw; + if (*str == ANSI_LNK) // OSC8-Link + *state = ANSI_LNK; + else + *state = ANSI_END; // other command sequences are ignored + return 0; + case ANSI_LNK: // OSC8 Terminal Hiperlink Sequence + switch (*str) { + case 0x7: // Separated by BEL + *state = ANSI_LBL; //# \e]8;;LINK\aTEXT\e]8;;\a # break; -#else - if (((unsigned char)*str) == 0x9c || *str == 0x7) + case 0x1b: // OSC8-Link separated by ESC BACKSLASH + *found += 2; + *state = ANSI_LBL; //# \e]8;;LINK\e\\TEXT\e]8;;\e\\ # break; -#endif - else if (*str == 0x1b) // ends with ESC BACKSLASH - *state = ANSI_BSL; + default: + *found += 1; + } + return 0; // ignore link width + case ANSI_LBL: + if (*str == 0x1b) { // Link label goes until ESC BACKSLASH + *found += chw; + *state = ANSI_END; + } return 0; - case ANSI_BSL: - if (*str == '\\') // ends with BACKSLASH + case ANSI_END: + switch (*str) { + case 0x1b: // APC/OSC8-Links ends with ESC BACKSLASH + *found += chw; break; - *found = 0; + case 0x7: // APC/OSC/OSC8-Links ends with BEL + case 0x9c: // APC/DCS/DM ends with ST (String Terminator) + break; + default: + *found += chw; return 0; } + } size_t res = *found; *state = ANSI_CHR; *found = 0; @@ -196,13 +217,12 @@ static size_t width(const wchar_t *str) for (; *str != '\0'; str++) { #ifdef HAVE_WIDECHAR int x = wcwidth(*str); /* don't use wcswidth(), need to ignore non-printable */ - if (x > 0) - count += x; #else - if (isprint(*str)) - count++; + int x = isprint(*str) ? 1 : 0; #endif - count -= ansi_esc_width(&state, &found, str); + int chw = x > 0 ? x : 0; + size_t nonpr = ansi_esc_width(&state, &found, str, chw); + count += chw - nonpr; } return count; } From 38266f3c1b5ceda351fad52a5e8fe9e427e9ac79 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Sun, 19 Jan 2025 17:28:07 -0300 Subject: [PATCH 2/4] column: test OSC 8 hyperlink escape sequences --- tests/expected/column/ansiescape | 7 ++++--- tests/ts/column/files/ansiescape | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/expected/column/ansiescape b/tests/expected/column/ansiescape index bb2de8a0e0f..61a73501a77 100644 --- a/tests/expected/column/ansiescape +++ b/tests/expected/column/ansiescape @@ -1,3 +1,4 @@ -A1234567890a D123^[[1;48;5;3m456789^[[49m0d -B1^[[48;5;1m23456789^[[49m0b E12345^[[m67890e -C12^[[48;5;3m3456789^[[49m0c F^[]8;;http://x.com^[\LINK^[]8;;^[\f +A1234567890a E12345^[[m67890e +B1^[[48;5;1m23456789^[[49m0b F^[]8;;file://COPYING^GCOPYING^[]8;;^Gf +C12^[[48;5;3m3456789^[[49m0c G^[]8;;file://AUTHORS^[\AUTHORS^[]8;;^[\g +D123^[[1;48;5;3m456789^[[49m0d diff --git a/tests/ts/column/files/ansiescape b/tests/ts/column/files/ansiescape index 4ca85d46d61..df183eb194f 100644 --- a/tests/ts/column/files/ansiescape +++ b/tests/ts/column/files/ansiescape @@ -3,4 +3,5 @@ B1\033[48;5;1m23456789\033[49m0b C12\033[48;5;3m3456789\033[49m0c D123\033[1;48;5;3m456789\033[49m0d E12345\e[m67890e -F\e]8;;http://x.com\e\\LINK\e]8;;\e\\f +F\e]8;;file://COPYING\aCOPYING\e]8;;\af +G\e]8;;file://AUTHORS\e\\AUTHORS\e]8;;\e\\g From b787803e17de115bf6a618d2242ac2cca4f3a380 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Sun, 19 Jan 2025 19:10:53 -0300 Subject: [PATCH 3/4] column: handle ANSI SGR colors inside OSC 8 hyperlink escape codes --- text-utils/column.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/text-utils/column.c b/text-utils/column.c index 2524c984859..85a303455c3 100644 --- a/text-utils/column.c +++ b/text-utils/column.c @@ -114,6 +114,8 @@ typedef enum { ANSI_OSC = ']', ANSI_LNK = '8', ANSI_LBL = 0x7, + ANSI_LSP = ';', + ANSI_LSG = 'M', ANSI_END = '\\' } ansi_esc_states; @@ -175,23 +177,35 @@ static inline size_t ansi_esc_width(ansi_esc_states *state, size_t *found, const case 0x7: // Separated by BEL *state = ANSI_LBL; //# \e]8;;LINK\aTEXT\e]8;;\a # break; - case 0x1b: // OSC8-Link separated by ESC BACKSLASH + case 0x1b: // OSC8-Link separated by ESC-BACKSLASH *found += 2; *state = ANSI_LBL; //# \e]8;;LINK\e\\TEXT\e]8;;\e\\ # break; default: - *found += 1; + *found += chw; } return 0; // ignore link width case ANSI_LBL: if (*str == 0x1b) { // Link label goes until ESC BACKSLASH *found += chw; - *state = ANSI_END; + *state = ANSI_LSP; } return 0; + case ANSI_LSP: + *found += chw; + if (*str == '[') // SGR FG/BG colors nested inside OSC8-Link sequence + *state = ANSI_LSG; + else + *state = ANSI_END; //# Link label ends with \e[8;;\e\\ # + return 0; + case ANSI_LSG: //# \e]8;;LINK\e\\\e[1;34mTEXT\e[0m\e]8;;\e\\ # + *found += chw; + if (*str < '0' || *str > '?') // SGR color sequence ends with 'm' + *state = ANSI_LBL; + return 0; case ANSI_END: switch (*str) { - case 0x1b: // APC/OSC8-Links ends with ESC BACKSLASH + case 0x1b: // APC/OSC8-Links ends with ESC-BACKSLASH *found += chw; break; case 0x7: // APC/OSC/OSC8-Links ends with BEL From 25ec10771f7e11003e32a783dd5b60871346e36c Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Sun, 19 Jan 2025 19:11:35 -0300 Subject: [PATCH 4/4] column: test ANSI SGR colors inside OSC 8 hyperlink escape codes --- tests/expected/column/ansiescape | 2 +- tests/ts/column/files/ansiescape | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/expected/column/ansiescape b/tests/expected/column/ansiescape index 61a73501a77..670b4f77895 100644 --- a/tests/expected/column/ansiescape +++ b/tests/expected/column/ansiescape @@ -1,4 +1,4 @@ A1234567890a E12345^[[m67890e B1^[[48;5;1m23456789^[[49m0b F^[]8;;file://COPYING^GCOPYING^[]8;;^Gf C12^[[48;5;3m3456789^[[49m0c G^[]8;;file://AUTHORS^[\AUTHORS^[]8;;^[\g -D123^[[1;48;5;3m456789^[[49m0d +D123^[[1;48;5;3m456789^[[49m0d H^[]8;;file://ABOUT-NLS^[\^[[1;34mABOUT-NLS^[[0m^[]8;;^[\h diff --git a/tests/ts/column/files/ansiescape b/tests/ts/column/files/ansiescape index df183eb194f..995bf1579ea 100644 --- a/tests/ts/column/files/ansiescape +++ b/tests/ts/column/files/ansiescape @@ -5,3 +5,4 @@ D123\033[1;48;5;3m456789\033[49m0d E12345\e[m67890e F\e]8;;file://COPYING\aCOPYING\e]8;;\af G\e]8;;file://AUTHORS\e\\AUTHORS\e]8;;\e\\g +H\e]8;;file://ABOUT-NLS\e\\\e[1;34mABOUT-NLS\e[0m\e]8;;\e\\h