Better parsing of escape & CSI sequences.

Recognise general Parameter & Intermediate Bytes, rather than testing
specific characters.  Explicitly ignore xterm 38 & 48 SGR values.
This commit is contained in:
Jason Hood 2014-02-18 21:34:39 +10:00
parent bd696b55c8
commit 9fa86512f3
4 changed files with 97 additions and 23 deletions

90
ANSI.c
View File

@ -111,7 +111,7 @@
v1.66, 20 & 21 September, 2013:
fix 32-bit process trying to detect 64-bit process.
v1.70, 25 January to 10 February, 2014:
v1.70, 25 January to 18 February, 2014:
don't hook ourself from LoadLibrary or LoadLibraryEx;
update the LoadLibraryEx flags that should not cause hooking;
inject by manipulating the import directory table; for 64-bit AnyCPU use
@ -123,7 +123,10 @@
exclude entire programs, by not using an extension in ANSICON_EXC;
hook modules injected via CreateRemoteThread+LoadLibrary;
hook all modules loaded due to LoadLibrary, not just the specified;
don't hook a module that's already hooked us.
don't hook a module that's already hooked us;
better parsing of escape & CSI sequences;
ignore xterm 38 & 48 SGR values;
change G1 blank from space to U+00A0 - No-Break Space.
*/
#include "ansicon.h"
@ -147,6 +150,7 @@ int state; // automata state
TCHAR prefix; // escape sequence prefix ( '[', ']' or '(' );
TCHAR prefix2; // secondary prefix ( '?' or '>' );
TCHAR suffix; // escape sequence suffix
TCHAR suffix2; // escape sequence secondary suffix
int es_argc; // escape sequence args count
int es_argv[MAX_ARG]; // escape sequence args
TCHAR Pt_arg[MAX_PATH*2]; // text parameter for Operating System Command
@ -160,7 +164,7 @@ BOOL shifted;
// particular, the Control Pictures probably won't work at all).
const WCHAR G1[] =
{
' ', // _ - blank
L'\x00a0', // _ - No-Break Space
L'\x2666', // ` - Black Diamond Suit
L'\x2592', // a - Medium Shade
L'\x2409', // b - HT
@ -481,7 +485,7 @@ void InterpretEscSeq( void )
return;
}
}
// Ignore any other \e[? or \e[> sequences.
// Ignore any other private sequences.
if (prefix2 != 0)
return;
@ -494,9 +498,28 @@ void InterpretEscSeq( void )
for (i = 0; i < es_argc; i++)
{
if (30 <= es_argv[i] && es_argv[i] <= 37)
{
pState->foreground = es_argv[i] - 30;
}
else if (40 <= es_argv[i] && es_argv[i] <= 47)
{
pState->background = es_argv[i] - 40;
}
else if (es_argv[i] == 38 || es_argv[i] == 48)
{
// This is technically incorrect, but it's what xterm does, so
// that's what we do. According to T.416 (ISO 8613-6), there is
// only one parameter, which is divided into elements. So where
// xterm does "38;2;R;G;B" it should really be "38;2:I:R:G:B" (I is
// a colour space identifier).
if (i+1 < es_argc)
{
if (es_argv[i+1] == 2) // rgb
i += 4;
else if (es_argv[i+1] == 5) // index
i += 2;
}
}
else switch (es_argv[i])
{
case 0:
@ -888,11 +911,12 @@ void InterpretEscSeq( void )
}
else // (prefix == ']')
{
// Ignore any \e]? or \e]> sequences.
// Ignore any "private" sequences.
if (prefix2 != 0)
return;
if (es_argc == 1 && es_argv[0] == 0) // ESC]0;titleST
if (es_argc == 1 && (es_argv[0] == 0 || // ESC]0;titleST - icon (ignored) &
es_argv[0] == 2)) // ESC]2;titleST - window
{
SetConsoleTitle( Pt_arg );
}
@ -928,7 +952,11 @@ ParseAndPrintString( HANDLE hDev,
{
if (state == 1)
{
if (*s == ESC) state = 2;
if (*s == ESC)
{
suffix2 = 0;
state = 2;
}
else if (*s == SO) shifted = TRUE;
else if (*s == SI) shifted = FALSE;
else PushBuffer( *s );
@ -936,16 +964,28 @@ ParseAndPrintString( HANDLE hDev,
else if (state == 2)
{
if (*s == ESC) ; // \e\e...\e == \e
else if ((*s == '[') || (*s == ']'))
else if (*s >= '\x20' && *s <= '\x2f')
suffix2 = *s;
else if (suffix2 != 0)
state = 1;
else if (*s == '[' || // CSI Control Sequence Introducer
*s == ']') // OSC Operating System Command
{
FlushBuffer();
prefix = *s;
prefix2 = 0;
state = 3;
Pt_len = 0;
*Pt_arg = '\0';
state = 3;
}
else if (*s == 'P' || // DCS Device Control String
*s == 'X' || // SOS Start Of String
*s == '^' || // PM Privacy Message
*s == '_') // APC Application Program Command
{
*Pt_arg = '\0';
state = 6;
}
else if (*s == ')' || *s == '(') state = 6;
else state = 1;
}
else if (state == 3)
@ -963,10 +1003,22 @@ ParseAndPrintString( HANDLE hDev,
es_argv[1] = 0;
state = 4;
}
else if (*s == '?' || *s == '>')
else if (*s == ':')
{
// ignore it
}
else if (*s >= '\x3b' && *s <= '\x3f')
{
prefix2 = *s;
}
else if (*s >= '\x20' && *s <= '\x2f')
{
suffix2 = *s;
}
else if (suffix2 != 0)
{
state = 1;
}
else
{
es_argc = 0;
@ -988,6 +1040,18 @@ ParseAndPrintString( HANDLE hDev,
if (prefix == ']')
state = 5;
}
else if (*s >= '\x3a' && *s <= '\x3f')
{
// ignore 'em
}
else if (*s >= '\x20' && *s <= '\x2f')
{
suffix2 = *s;
}
else if (suffix2 != 0)
{
state = 1;
}
else
{
es_argc++;
@ -1015,8 +1079,10 @@ ParseAndPrintString( HANDLE hDev,
}
else if (state == 6)
{
// Ignore it (ESC ) 0 is implicit; nothing else is supported).
if (*s == BEL || (*s == '\\' && *Pt_arg == ESC))
state = 1;
else
*Pt_arg = *s;
}
}
FlushBuffer();

View File

@ -87,7 +87,7 @@
add error codes to some message.
*/
#define PDATE L"10 February, 2014"
#define PDATE L"18 February, 2014"
#include "ansicon.h"
#include "version.h"

View File

@ -95,8 +95,11 @@ Usage
The log option will not work with '-p'; set the environment variable
ANSICON_LOG (to the number) instead. The variable is only read once when a
process is started; changing it won't affect running processes. If you
identify a module that causes problems, add it to the ANSICON_EXC environ-
ment variable (see ANSICON_API below, but the extension is required).
identify a program or module that causes problems, add it to the
ANSICON_EXC environment variable (see ANSICON_API below; add the extension
to exclude a single module). Be aware that excluding a program will also
exclude any programs it creates (alghough excluding "program.exe" may still
hook created programs run through its DLLs).
E.g.: 'ansicon -l5' will start a new command processor, logging every pro-
cess it starts along with their output.
@ -144,7 +147,8 @@ Sequences Recognised
The following escape sequences are recognised.
\e]0;titleBEL Set (xterm) window's title (and icon)
\e]0;titleBEL Set (xterm) window's title (and icon, ignored)
\e]2;titleBEL Set (xterm) window's title
\e[21t Report (xterm) window's title
\e[s Save Cursor
\e[u Restore Cursor
@ -214,7 +218,7 @@ DEC Special Graphics Character Set
Char Unicode Code Point & Name
---- -------------------------
_ U+0020 Space (blank)
_ U+00A0 No-Break Space (blank)
` U+2666 Black Diamond Suit
a U+2592 Medium Shade
b U+2409 Symbol For Horizontal Tabulation
@ -273,17 +277,20 @@ Version History
Legend: + added, - bug-fixed, * changed.
1.70 - 8 February, 2014:
1.70 - 18 February, 2014:
- don't hook again if using LoadLibrary or LoadLibraryEx;
- update the LoadLibraryEx flags that shouldn't hook;
- restore original attributes on detach (for LoadLibrary/FreeLibrary usage);
- ansicon.exe will start with ANSICON_DEF (if defined and -m not used);
- an installed ansicon.exe will restore current (not default) attributes;
- attributes and saved position are local to each console window;
- improved recognition of unsupported sequences;
* inject into a created process by modifying the import descriptor table
(-p will use CreateRemoteThread);
* log: remove the quotes around the CreateProcess command line;
add an underscore in 64-bit addresses to distinguish 8-digit groups.
add an underscore in 64-bit addresses to distinguish 8-digit groups;
* ANSICON_EXC can exclude entire programs;
* switch G1 blank from space (U+0020) to No-Break Space (U+00A0).
1.66 - 20 September, 2013:
- fix 32-bit process trying to detect 64-bit process.
@ -470,5 +477,5 @@ Distribution
in LICENSE.txt.
=============================
Jason Hood, 8 February, 2014.
==============================
Jason Hood, 18 February, 2014.

View File

@ -114,5 +114,6 @@ roughly ordered by function. The initial escape character is assumed.
[6n sends "\e[#;#R" (line & column) to console input
[21t sends "\e]lTitle\e\" (the console's window title) to console input
]0;TitleST
]2;TitleST
sets the console title to "Title"; ST (string terminator) is either
character 7 (BEL) or escape and backslash