From 9fa86512f3f37cc35e73620990e08e981cb06dc0 Mon Sep 17 00:00:00 2001 From: Jason Hood Date: Tue, 18 Feb 2014 21:34:39 +1000 Subject: [PATCH] Better parsing of escape & CSI sequences. Recognise general Parameter & Intermediate Bytes, rather than testing specific characters. Explicitly ignore xterm 38 & 48 SGR values. --- ANSI.c | 94 +++++++++++++++++++++++++++++++++++++++++++-------- ansicon.c | 2 +- readme.txt | 23 ++++++++----- sequences.txt | 1 + 4 files changed, 97 insertions(+), 23 deletions(-) diff --git a/ANSI.c b/ANSI.c index a983415..1c3940d 100644 --- a/ANSI.c +++ b/ANSI.c @@ -111,7 +111,7 @@ v1.66, 20 & 21 September, 2013: fix 32-bit process trying to detect 64-bit process. - v1.70, 25 January to 10 February, 2014: + v1.70, 25 January to 18 February, 2014: don't hook ourself from LoadLibrary or LoadLibraryEx; update the LoadLibraryEx flags that should not cause hooking; inject by manipulating the import directory table; for 64-bit AnyCPU use @@ -123,7 +123,10 @@ exclude entire programs, by not using an extension in ANSICON_EXC; hook modules injected via CreateRemoteThread+LoadLibrary; hook all modules loaded due to LoadLibrary, not just the specified; - don't hook a module that's already hooked us. + don't hook a module that's already hooked us; + better parsing of escape & CSI sequences; + ignore xterm 38 & 48 SGR values; + change G1 blank from space to U+00A0 - No-Break Space. */ #include "ansicon.h" @@ -147,6 +150,7 @@ int state; // automata state TCHAR prefix; // escape sequence prefix ( '[', ']' or '(' ); TCHAR prefix2; // secondary prefix ( '?' or '>' ); TCHAR suffix; // escape sequence suffix +TCHAR suffix2; // escape sequence secondary suffix int es_argc; // escape sequence args count int es_argv[MAX_ARG]; // escape sequence args TCHAR Pt_arg[MAX_PATH*2]; // text parameter for Operating System Command @@ -160,7 +164,7 @@ BOOL shifted; // particular, the Control Pictures probably won't work at all). const WCHAR G1[] = { - ' ', // _ - blank + L'\x00a0', // _ - No-Break Space L'\x2666', // ` - Black Diamond Suit L'\x2592', // a - Medium Shade L'\x2409', // b - HT @@ -481,7 +485,7 @@ void InterpretEscSeq( void ) return; } } - // Ignore any other \e[? or \e[> sequences. + // Ignore any other private sequences. if (prefix2 != 0) return; @@ -494,9 +498,28 @@ void InterpretEscSeq( void ) for (i = 0; i < es_argc; i++) { if (30 <= es_argv[i] && es_argv[i] <= 37) + { pState->foreground = es_argv[i] - 30; + } else if (40 <= es_argv[i] && es_argv[i] <= 47) + { pState->background = es_argv[i] - 40; + } + else if (es_argv[i] == 38 || es_argv[i] == 48) + { + // This is technically incorrect, but it's what xterm does, so + // that's what we do. According to T.416 (ISO 8613-6), there is + // only one parameter, which is divided into elements. So where + // xterm does "38;2;R;G;B" it should really be "38;2:I:R:G:B" (I is + // a colour space identifier). + if (i+1 < es_argc) + { + if (es_argv[i+1] == 2) // rgb + i += 4; + else if (es_argv[i+1] == 5) // index + i += 2; + } + } else switch (es_argv[i]) { case 0: @@ -888,11 +911,12 @@ void InterpretEscSeq( void ) } else // (prefix == ']') { - // Ignore any \e]? or \e]> sequences. + // Ignore any "private" sequences. if (prefix2 != 0) return; - if (es_argc == 1 && es_argv[0] == 0) // ESC]0;titleST + if (es_argc == 1 && (es_argv[0] == 0 || // ESC]0;titleST - icon (ignored) & + es_argv[0] == 2)) // ESC]2;titleST - window { SetConsoleTitle( Pt_arg ); } @@ -928,24 +952,40 @@ ParseAndPrintString( HANDLE hDev, { if (state == 1) { - if (*s == ESC) state = 2; + if (*s == ESC) + { + suffix2 = 0; + state = 2; + } else if (*s == SO) shifted = TRUE; else if (*s == SI) shifted = FALSE; else PushBuffer( *s ); } else if (state == 2) { - if (*s == ESC) ; // \e\e...\e == \e - else if ((*s == '[') || (*s == ']')) + if (*s == ESC) ; // \e\e...\e == \e + else if (*s >= '\x20' && *s <= '\x2f') + suffix2 = *s; + else if (suffix2 != 0) + state = 1; + else if (*s == '[' || // CSI Control Sequence Introducer + *s == ']') // OSC Operating System Command { FlushBuffer(); prefix = *s; prefix2 = 0; - state = 3; Pt_len = 0; *Pt_arg = '\0'; + state = 3; + } + else if (*s == 'P' || // DCS Device Control String + *s == 'X' || // SOS Start Of String + *s == '^' || // PM Privacy Message + *s == '_') // APC Application Program Command + { + *Pt_arg = '\0'; + state = 6; } - else if (*s == ')' || *s == '(') state = 6; else state = 1; } else if (state == 3) @@ -963,10 +1003,22 @@ ParseAndPrintString( HANDLE hDev, es_argv[1] = 0; state = 4; } - else if (*s == '?' || *s == '>') + else if (*s == ':') + { + // ignore it + } + else if (*s >= '\x3b' && *s <= '\x3f') { prefix2 = *s; } + else if (*s >= '\x20' && *s <= '\x2f') + { + suffix2 = *s; + } + else if (suffix2 != 0) + { + state = 1; + } else { es_argc = 0; @@ -988,6 +1040,18 @@ ParseAndPrintString( HANDLE hDev, if (prefix == ']') state = 5; } + else if (*s >= '\x3a' && *s <= '\x3f') + { + // ignore 'em + } + else if (*s >= '\x20' && *s <= '\x2f') + { + suffix2 = *s; + } + else if (suffix2 != 0) + { + state = 1; + } else { es_argc++; @@ -1015,8 +1079,10 @@ ParseAndPrintString( HANDLE hDev, } else if (state == 6) { - // Ignore it (ESC ) 0 is implicit; nothing else is supported). - state = 1; + if (*s == BEL || (*s == '\\' && *Pt_arg == ESC)) + state = 1; + else + *Pt_arg = *s; } } FlushBuffer(); diff --git a/ansicon.c b/ansicon.c index 77391a7..a85fb26 100644 --- a/ansicon.c +++ b/ansicon.c @@ -87,7 +87,7 @@ add error codes to some message. */ -#define PDATE L"10 February, 2014" +#define PDATE L"18 February, 2014" #include "ansicon.h" #include "version.h" diff --git a/readme.txt b/readme.txt index c3dd787..a0e2df3 100644 --- a/readme.txt +++ b/readme.txt @@ -95,8 +95,11 @@ Usage The log option will not work with '-p'; set the environment variable ANSICON_LOG (to the number) instead. The variable is only read once when a process is started; changing it won't affect running processes. If you - identify a module that causes problems, add it to the ANSICON_EXC environ- - ment variable (see ANSICON_API below, but the extension is required). + identify a program or module that causes problems, add it to the + ANSICON_EXC environment variable (see ANSICON_API below; add the extension + to exclude a single module). Be aware that excluding a program will also + exclude any programs it creates (alghough excluding "program.exe" may still + hook created programs run through its DLLs). E.g.: 'ansicon -l5' will start a new command processor, logging every pro- cess it starts along with their output. @@ -144,7 +147,8 @@ Sequences Recognised The following escape sequences are recognised. - \e]0;titleBEL Set (xterm) window's title (and icon) + \e]0;titleBEL Set (xterm) window's title (and icon, ignored) + \e]2;titleBEL Set (xterm) window's title \e[21t Report (xterm) window's title \e[s Save Cursor \e[u Restore Cursor @@ -214,7 +218,7 @@ DEC Special Graphics Character Set Char Unicode Code Point & Name ---- ------------------------- - _ U+0020 Space (blank) + _ U+00A0 No-Break Space (blank) ` U+2666 Black Diamond Suit a U+2592 Medium Shade b U+2409 Symbol For Horizontal Tabulation @@ -273,17 +277,20 @@ Version History Legend: + added, - bug-fixed, * changed. - 1.70 - 8 February, 2014: + 1.70 - 18 February, 2014: - don't hook again if using LoadLibrary or LoadLibraryEx; - update the LoadLibraryEx flags that shouldn't hook; - restore original attributes on detach (for LoadLibrary/FreeLibrary usage); - ansicon.exe will start with ANSICON_DEF (if defined and -m not used); - an installed ansicon.exe will restore current (not default) attributes; - attributes and saved position are local to each console window; + - improved recognition of unsupported sequences; * inject into a created process by modifying the import descriptor table (-p will use CreateRemoteThread); * log: remove the quotes around the CreateProcess command line; - add an underscore in 64-bit addresses to distinguish 8-digit groups. + add an underscore in 64-bit addresses to distinguish 8-digit groups; + * ANSICON_EXC can exclude entire programs; + * switch G1 blank from space (U+0020) to No-Break Space (U+00A0). 1.66 - 20 September, 2013: - fix 32-bit process trying to detect 64-bit process. @@ -470,5 +477,5 @@ Distribution in LICENSE.txt. -============================= -Jason Hood, 8 February, 2014. +============================== +Jason Hood, 18 February, 2014. diff --git a/sequences.txt b/sequences.txt index f994445..d62aae1 100644 --- a/sequences.txt +++ b/sequences.txt @@ -114,5 +114,6 @@ roughly ordered by function. The initial escape character is assumed. [6n sends "\e[#;#R" (line & column) to console input [21t sends "\e]lTitle\e\" (the console's window title) to console input ]0;TitleST +]2;TitleST sets the console title to "Title"; ST (string terminator) is either character 7 (BEL) or escape and backslash