Better parsing of escape & CSI sequences.

Recognise general Parameter & Intermediate Bytes, rather than testing
specific characters.  Explicitly ignore xterm 38 & 48 SGR values.
This commit is contained in:
Jason Hood 2014-02-18 21:34:39 +10:00
parent bd696b55c8
commit 9fa86512f3
4 changed files with 97 additions and 23 deletions

94
ANSI.c
View File

@ -111,7 +111,7 @@
v1.66, 20 & 21 September, 2013: v1.66, 20 & 21 September, 2013:
fix 32-bit process trying to detect 64-bit process. fix 32-bit process trying to detect 64-bit process.
v1.70, 25 January to 10 February, 2014: v1.70, 25 January to 18 February, 2014:
don't hook ourself from LoadLibrary or LoadLibraryEx; don't hook ourself from LoadLibrary or LoadLibraryEx;
update the LoadLibraryEx flags that should not cause hooking; update the LoadLibraryEx flags that should not cause hooking;
inject by manipulating the import directory table; for 64-bit AnyCPU use inject by manipulating the import directory table; for 64-bit AnyCPU use
@ -123,7 +123,10 @@
exclude entire programs, by not using an extension in ANSICON_EXC; exclude entire programs, by not using an extension in ANSICON_EXC;
hook modules injected via CreateRemoteThread+LoadLibrary; hook modules injected via CreateRemoteThread+LoadLibrary;
hook all modules loaded due to LoadLibrary, not just the specified; hook all modules loaded due to LoadLibrary, not just the specified;
don't hook a module that's already hooked us. don't hook a module that's already hooked us;
better parsing of escape & CSI sequences;
ignore xterm 38 & 48 SGR values;
change G1 blank from space to U+00A0 - No-Break Space.
*/ */
#include "ansicon.h" #include "ansicon.h"
@ -147,6 +150,7 @@ int state; // automata state
TCHAR prefix; // escape sequence prefix ( '[', ']' or '(' ); TCHAR prefix; // escape sequence prefix ( '[', ']' or '(' );
TCHAR prefix2; // secondary prefix ( '?' or '>' ); TCHAR prefix2; // secondary prefix ( '?' or '>' );
TCHAR suffix; // escape sequence suffix TCHAR suffix; // escape sequence suffix
TCHAR suffix2; // escape sequence secondary suffix
int es_argc; // escape sequence args count int es_argc; // escape sequence args count
int es_argv[MAX_ARG]; // escape sequence args int es_argv[MAX_ARG]; // escape sequence args
TCHAR Pt_arg[MAX_PATH*2]; // text parameter for Operating System Command TCHAR Pt_arg[MAX_PATH*2]; // text parameter for Operating System Command
@ -160,7 +164,7 @@ BOOL shifted;
// particular, the Control Pictures probably won't work at all). // particular, the Control Pictures probably won't work at all).
const WCHAR G1[] = const WCHAR G1[] =
{ {
' ', // _ - blank L'\x00a0', // _ - No-Break Space
L'\x2666', // ` - Black Diamond Suit L'\x2666', // ` - Black Diamond Suit
L'\x2592', // a - Medium Shade L'\x2592', // a - Medium Shade
L'\x2409', // b - HT L'\x2409', // b - HT
@ -481,7 +485,7 @@ void InterpretEscSeq( void )
return; return;
} }
} }
// Ignore any other \e[? or \e[> sequences. // Ignore any other private sequences.
if (prefix2 != 0) if (prefix2 != 0)
return; return;
@ -494,9 +498,28 @@ void InterpretEscSeq( void )
for (i = 0; i < es_argc; i++) for (i = 0; i < es_argc; i++)
{ {
if (30 <= es_argv[i] && es_argv[i] <= 37) if (30 <= es_argv[i] && es_argv[i] <= 37)
{
pState->foreground = es_argv[i] - 30; pState->foreground = es_argv[i] - 30;
}
else if (40 <= es_argv[i] && es_argv[i] <= 47) else if (40 <= es_argv[i] && es_argv[i] <= 47)
{
pState->background = es_argv[i] - 40; pState->background = es_argv[i] - 40;
}
else if (es_argv[i] == 38 || es_argv[i] == 48)
{
// This is technically incorrect, but it's what xterm does, so
// that's what we do. According to T.416 (ISO 8613-6), there is
// only one parameter, which is divided into elements. So where
// xterm does "38;2;R;G;B" it should really be "38;2:I:R:G:B" (I is
// a colour space identifier).
if (i+1 < es_argc)
{
if (es_argv[i+1] == 2) // rgb
i += 4;
else if (es_argv[i+1] == 5) // index
i += 2;
}
}
else switch (es_argv[i]) else switch (es_argv[i])
{ {
case 0: case 0:
@ -888,11 +911,12 @@ void InterpretEscSeq( void )
} }
else // (prefix == ']') else // (prefix == ']')
{ {
// Ignore any \e]? or \e]> sequences. // Ignore any "private" sequences.
if (prefix2 != 0) if (prefix2 != 0)
return; return;
if (es_argc == 1 && es_argv[0] == 0) // ESC]0;titleST if (es_argc == 1 && (es_argv[0] == 0 || // ESC]0;titleST - icon (ignored) &
es_argv[0] == 2)) // ESC]2;titleST - window
{ {
SetConsoleTitle( Pt_arg ); SetConsoleTitle( Pt_arg );
} }
@ -928,24 +952,40 @@ ParseAndPrintString( HANDLE hDev,
{ {
if (state == 1) if (state == 1)
{ {
if (*s == ESC) state = 2; if (*s == ESC)
{
suffix2 = 0;
state = 2;
}
else if (*s == SO) shifted = TRUE; else if (*s == SO) shifted = TRUE;
else if (*s == SI) shifted = FALSE; else if (*s == SI) shifted = FALSE;
else PushBuffer( *s ); else PushBuffer( *s );
} }
else if (state == 2) else if (state == 2)
{ {
if (*s == ESC) ; // \e\e...\e == \e if (*s == ESC) ; // \e\e...\e == \e
else if ((*s == '[') || (*s == ']')) else if (*s >= '\x20' && *s <= '\x2f')
suffix2 = *s;
else if (suffix2 != 0)
state = 1;
else if (*s == '[' || // CSI Control Sequence Introducer
*s == ']') // OSC Operating System Command
{ {
FlushBuffer(); FlushBuffer();
prefix = *s; prefix = *s;
prefix2 = 0; prefix2 = 0;
state = 3;
Pt_len = 0; Pt_len = 0;
*Pt_arg = '\0'; *Pt_arg = '\0';
state = 3;
}
else if (*s == 'P' || // DCS Device Control String
*s == 'X' || // SOS Start Of String
*s == '^' || // PM Privacy Message
*s == '_') // APC Application Program Command
{
*Pt_arg = '\0';
state = 6;
} }
else if (*s == ')' || *s == '(') state = 6;
else state = 1; else state = 1;
} }
else if (state == 3) else if (state == 3)
@ -963,10 +1003,22 @@ ParseAndPrintString( HANDLE hDev,
es_argv[1] = 0; es_argv[1] = 0;
state = 4; state = 4;
} }
else if (*s == '?' || *s == '>') else if (*s == ':')
{
// ignore it
}
else if (*s >= '\x3b' && *s <= '\x3f')
{ {
prefix2 = *s; prefix2 = *s;
} }
else if (*s >= '\x20' && *s <= '\x2f')
{
suffix2 = *s;
}
else if (suffix2 != 0)
{
state = 1;
}
else else
{ {
es_argc = 0; es_argc = 0;
@ -988,6 +1040,18 @@ ParseAndPrintString( HANDLE hDev,
if (prefix == ']') if (prefix == ']')
state = 5; state = 5;
} }
else if (*s >= '\x3a' && *s <= '\x3f')
{
// ignore 'em
}
else if (*s >= '\x20' && *s <= '\x2f')
{
suffix2 = *s;
}
else if (suffix2 != 0)
{
state = 1;
}
else else
{ {
es_argc++; es_argc++;
@ -1015,8 +1079,10 @@ ParseAndPrintString( HANDLE hDev,
} }
else if (state == 6) else if (state == 6)
{ {
// Ignore it (ESC ) 0 is implicit; nothing else is supported). if (*s == BEL || (*s == '\\' && *Pt_arg == ESC))
state = 1; state = 1;
else
*Pt_arg = *s;
} }
} }
FlushBuffer(); FlushBuffer();

View File

@ -87,7 +87,7 @@
add error codes to some message. add error codes to some message.
*/ */
#define PDATE L"10 February, 2014" #define PDATE L"18 February, 2014"
#include "ansicon.h" #include "ansicon.h"
#include "version.h" #include "version.h"

View File

@ -95,8 +95,11 @@ Usage
The log option will not work with '-p'; set the environment variable The log option will not work with '-p'; set the environment variable
ANSICON_LOG (to the number) instead. The variable is only read once when a ANSICON_LOG (to the number) instead. The variable is only read once when a
process is started; changing it won't affect running processes. If you process is started; changing it won't affect running processes. If you
identify a module that causes problems, add it to the ANSICON_EXC environ- identify a program or module that causes problems, add it to the
ment variable (see ANSICON_API below, but the extension is required). ANSICON_EXC environment variable (see ANSICON_API below; add the extension
to exclude a single module). Be aware that excluding a program will also
exclude any programs it creates (alghough excluding "program.exe" may still
hook created programs run through its DLLs).
E.g.: 'ansicon -l5' will start a new command processor, logging every pro- E.g.: 'ansicon -l5' will start a new command processor, logging every pro-
cess it starts along with their output. cess it starts along with their output.
@ -144,7 +147,8 @@ Sequences Recognised
The following escape sequences are recognised. The following escape sequences are recognised.
\e]0;titleBEL Set (xterm) window's title (and icon) \e]0;titleBEL Set (xterm) window's title (and icon, ignored)
\e]2;titleBEL Set (xterm) window's title
\e[21t Report (xterm) window's title \e[21t Report (xterm) window's title
\e[s Save Cursor \e[s Save Cursor
\e[u Restore Cursor \e[u Restore Cursor
@ -214,7 +218,7 @@ DEC Special Graphics Character Set
Char Unicode Code Point & Name Char Unicode Code Point & Name
---- ------------------------- ---- -------------------------
_ U+0020 Space (blank) _ U+00A0 No-Break Space (blank)
` U+2666 Black Diamond Suit ` U+2666 Black Diamond Suit
a U+2592 Medium Shade a U+2592 Medium Shade
b U+2409 Symbol For Horizontal Tabulation b U+2409 Symbol For Horizontal Tabulation
@ -273,17 +277,20 @@ Version History
Legend: + added, - bug-fixed, * changed. Legend: + added, - bug-fixed, * changed.
1.70 - 8 February, 2014: 1.70 - 18 February, 2014:
- don't hook again if using LoadLibrary or LoadLibraryEx; - don't hook again if using LoadLibrary or LoadLibraryEx;
- update the LoadLibraryEx flags that shouldn't hook; - update the LoadLibraryEx flags that shouldn't hook;
- restore original attributes on detach (for LoadLibrary/FreeLibrary usage); - restore original attributes on detach (for LoadLibrary/FreeLibrary usage);
- ansicon.exe will start with ANSICON_DEF (if defined and -m not used); - ansicon.exe will start with ANSICON_DEF (if defined and -m not used);
- an installed ansicon.exe will restore current (not default) attributes; - an installed ansicon.exe will restore current (not default) attributes;
- attributes and saved position are local to each console window; - attributes and saved position are local to each console window;
- improved recognition of unsupported sequences;
* inject into a created process by modifying the import descriptor table * inject into a created process by modifying the import descriptor table
(-p will use CreateRemoteThread); (-p will use CreateRemoteThread);
* log: remove the quotes around the CreateProcess command line; * log: remove the quotes around the CreateProcess command line;
add an underscore in 64-bit addresses to distinguish 8-digit groups. add an underscore in 64-bit addresses to distinguish 8-digit groups;
* ANSICON_EXC can exclude entire programs;
* switch G1 blank from space (U+0020) to No-Break Space (U+00A0).
1.66 - 20 September, 2013: 1.66 - 20 September, 2013:
- fix 32-bit process trying to detect 64-bit process. - fix 32-bit process trying to detect 64-bit process.
@ -470,5 +477,5 @@ Distribution
in LICENSE.txt. in LICENSE.txt.
============================= ==============================
Jason Hood, 8 February, 2014. Jason Hood, 18 February, 2014.

View File

@ -114,5 +114,6 @@ roughly ordered by function. The initial escape character is assumed.
[6n sends "\e[#;#R" (line & column) to console input [6n sends "\e[#;#R" (line & column) to console input
[21t sends "\e]lTitle\e\" (the console's window title) to console input [21t sends "\e]lTitle\e\" (the console's window title) to console input
]0;TitleST ]0;TitleST
]2;TitleST
sets the console title to "Title"; ST (string terminator) is either sets the console title to "Title"; ST (string terminator) is either
character 7 (BEL) or escape and backslash character 7 (BEL) or escape and backslash