Skip to content

Commit

Permalink
Merge branch 'column-handle-osc8-links' of https://github.com/juarezr…
Browse files Browse the repository at this point in the history
…/util-linux

* 'column-handle-osc8-links' of https://github.com/juarezr/util-linux:
  column: test ANSI SGR colors inside OSC 8 hyperlink escape codes
  column: handle ANSI SGR colors inside OSC 8 hyperlink escape codes
  column: test OSC 8 hyperlink escape sequences
  column: handle OSC 8 hyperlink escape sequences
  • Loading branch information
karelzak committed Jan 22, 2025
2 parents ed14684 + 25ec107 commit eba4f37
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 29 deletions.
7 changes: 4 additions & 3 deletions tests/expected/column/ansiescape
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
A1234567890a D123^[[1;48;5;3m456789^[[49m0d
B1^[[48;5;1m23456789^[[49m0b E12345^[[m67890e
C12^[[48;5;3m3456789^[[49m0c F^[]8;;http://x.com^[\LINK^[]8;;^[\f
A1234567890a E12345^[[m67890e
B1^[[48;5;1m23456789^[[49m0b F^[]8;;file://COPYING^GCOPYING^[]8;;^Gf
C12^[[48;5;3m3456789^[[49m0c G^[]8;;file://AUTHORS^[\AUTHORS^[]8;;^[\g
D123^[[1;48;5;3m456789^[[49m0d H^[]8;;file://ABOUT-NLS^[\^[[1;34mABOUT-NLS^[[0m^[]8;;^[\h
4 changes: 3 additions & 1 deletion tests/ts/column/files/ansiescape
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ B1\033[48;5;1m23456789\033[49m0b
C12\033[48;5;3m3456789\033[49m0c
D123\033[1;48;5;3m456789\033[49m0d
E12345\e[m67890e
F\e]8;;http://x.com\e\\LINK\e]8;;\e\\f
F\e]8;;file://COPYING\aCOPYING\e]8;;\af
G\e]8;;file://AUTHORS\e\\AUTHORS\e]8;;\e\\g
H\e]8;;file://ABOUT-NLS\e\\\e[1;34mABOUT-NLS\e[0m\e]8;;\e\\h
84 changes: 59 additions & 25 deletions text-utils/column.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,18 +112,21 @@ typedef enum {
ANSI_ESC = 0x1b,
ANSI_SGR = '[',
ANSI_OSC = ']',
ANSI_APC = '_',
ANSI_BSL = '\\'
ANSI_LNK = '8',
ANSI_LBL = 0x7,
ANSI_LSP = ';',
ANSI_LSG = 'M',
ANSI_END = '\\'
} ansi_esc_states;

/**
* Count how many characters are non-printable due to ANSI X3.41 escape codes.
*
* It detects and count only Fe Escape sequences. These sequences contains characters
* that normally are printable, but due to being part of a escape sequence are ignored
* when displayed in console terminals.
* It detects and count Fe Escape and OSC 8 links sequences. These sequences contains
* characters that normally are printable, but due to being part of a escape sequence
* are ignored when displayed in console terminals.
*/
static inline size_t ansi_esc_width(ansi_esc_states *state, size_t *found, const wchar_t *str)
static inline size_t ansi_esc_width(ansi_esc_states *state, size_t *found, const wchar_t *str, int chw)
{
switch (*state) {
case ANSI_CHR:
Expand All @@ -144,7 +147,7 @@ static inline size_t ansi_esc_width(ansi_esc_states *state, size_t *found, const
case '_': // APC - Application Program Command
case 'P': // DCS - Device Control String
case '^': // PM - Privacy Message
*state = ANSI_APC;
*state = ANSI_END;
break;
default:
*state = ANSI_CHR;
Expand All @@ -153,7 +156,7 @@ static inline size_t ansi_esc_width(ansi_esc_states *state, size_t *found, const
*found = 1;
return 0;
case ANSI_SGR:
*found += 1;
*found += chw;
// Fe escape sequences allows the range 0x30-0x3f
// However SGR (Select Graphic Rendition) only uses: 0-9 ';' ':'
if (*str >= '0' && *str <= '?')
Expand All @@ -162,25 +165,57 @@ static inline size_t ansi_esc_width(ansi_esc_states *state, size_t *found, const
if (*str <= '@' && *str >= '~')
*found = 0;
break;
case ANSI_APC:
case ANSI_OSC:
*found += 1;
#ifdef HAVE_WIDECHAR
if (*str == 0x9c || *str == 0x7) // ends with ST (String Terminator) or BEL (\a)
*found += chw;
if (*str == ANSI_LNK) // OSC8-Link
*state = ANSI_LNK;
else
*state = ANSI_END; // other command sequences are ignored
return 0;
case ANSI_LNK: // OSC8 Terminal Hiperlink Sequence
switch (*str) {
case 0x7: // Separated by BEL
*state = ANSI_LBL; //# \e]8;;LINK\aTEXT\e]8;;\a #
break;
#else
if (((unsigned char)*str) == 0x9c || *str == 0x7)
case 0x1b: // OSC8-Link separated by ESC-BACKSLASH
*found += 2;
*state = ANSI_LBL; //# \e]8;;LINK\e\\TEXT\e]8;;\e\\ #
break;
#endif
else if (*str == 0x1b) // ends with ESC BACKSLASH
*state = ANSI_BSL;
default:
*found += chw;
}
return 0; // ignore link width
case ANSI_LBL:
if (*str == 0x1b) { // Link label goes until ESC BACKSLASH
*found += chw;
*state = ANSI_LSP;
}
return 0;
case ANSI_LSP:
*found += chw;
if (*str == '[') // SGR FG/BG colors nested inside OSC8-Link sequence
*state = ANSI_LSG;
else
*state = ANSI_END; //# Link label ends with \e[8;;\e\\ #
return 0;
case ANSI_LSG: //# \e]8;;LINK\e\\\e[1;34mTEXT\e[0m\e]8;;\e\\ #
*found += chw;
if (*str < '0' || *str > '?') // SGR color sequence ends with 'm'
*state = ANSI_LBL;
return 0;
case ANSI_BSL:
if (*str == '\\') // ends with BACKSLASH
case ANSI_END:
switch (*str) {
case 0x1b: // APC/OSC8-Links ends with ESC-BACKSLASH
*found += chw;
break;
case 0x7: // APC/OSC/OSC8-Links ends with BEL
case 0x9c: // APC/DCS/DM ends with ST (String Terminator)
break;
*found = 0;
default:
*found += chw;
return 0;
}
}
size_t res = *found;
*state = ANSI_CHR;
*found = 0;
Expand All @@ -196,13 +231,12 @@ static size_t width(const wchar_t *str)
for (; *str != '\0'; str++) {
#ifdef HAVE_WIDECHAR
int x = wcwidth(*str); /* don't use wcswidth(), need to ignore non-printable */
if (x > 0)
count += x;
#else
if (isprint(*str))
count++;
int x = isprint(*str) ? 1 : 0;
#endif
count -= ansi_esc_width(&state, &found, str);
int chw = x > 0 ? x : 0;
size_t nonpr = ansi_esc_width(&state, &found, str, chw);
count += chw - nonpr;
}
return count;
}
Expand Down

0 comments on commit eba4f37

Please sign in to comment.