Skip to content

Commit

Permalink
[xtrace] argv array is printed as valid shell strings.
Browse files Browse the repository at this point in the history
This is done with the "QSN" encoder.

Addresses issue #700.
  • Loading branch information
Andy Chu committed Apr 10, 2020
1 parent afe8b6d commit 98e2b88
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 22 deletions.
3 changes: 2 additions & 1 deletion core/dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from asdl import pretty
from core import error
from core import optview
from core import qsn
from core.util import log
from osh import word_
from pylib import os_path
Expand Down Expand Up @@ -241,7 +242,7 @@ def OnSimpleCommand(self, argv):
return

first_char, prefix = self._EvalPS4()
tmp = [pretty.String(a) for a in argv]
tmp = [qsn.maybe_shell_encode(a) for a in argv]
cmd = ' '.join(tmp)
self.f.log('%s%s%s', first_char, prefix, cmd)

Expand Down
52 changes: 35 additions & 17 deletions core/qsn.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,21 +91,35 @@
help-bash thread:
Why not make ${var@Q} the same as 'printf %q' output?
It was an accident.
Embedding within JSON strings:
"'\\x00\\''"
Can be shortened to:
"\\x00\\'"
In other words, we don't need the leading and trailing quotes. Note that
backslashes are doubled.
"""
from __future__ import print_function

import re

BIT8_RAW = 0 # pass through
BIT8_X = 1 # escape everything as \xff
BIT8_U = 2 # decode and escape as \u03bc. Not implemented yet.

def maybe_shell_encode(s, bit8_display='p'):

def maybe_shell_encode(s, bit8_display=BIT8_RAW):
"""Encode simple strings to a "bare" word, and complex ones to a QSN literal.
Shell strings sometimes need the $'' prefix, e.g. for $'\x00'.
This technically isn't part of QSN, but shell can understand QSN, as long as
it doesn't have \u{}, e.g. bit8_display != 'u'.
it doesn't have \u{}, e.g. bit8_display != BIT8_U.
"""

quote = 0 # no quotes

if len(s) == 0: # empty string DOES need quotes!
Expand All @@ -121,13 +135,16 @@ def maybe_shell_encode(s, bit8_display='p'):

quote = 1

if (ch in '\\\'\r\n\t\0' or
ch < ' ' or
ch > '\x7F'):
if ch in '\\\'\r\n\t\0' or ch < ' ':
# It needs quotes like $''
quote = 2 # max quote, so don't look at the rest of the string
break

# Raw strings can use '', but \xff and \u{03bc} escapes require $''.
if ch > '\x7F' and bit8_display != BIT8_RAW:
quote = 2
break

# should we also figure out the length?
parts = []

Expand All @@ -145,7 +162,7 @@ def maybe_shell_encode(s, bit8_display='p'):
return ''.join(parts)


def maybe_encode(s, bit8_display='p'):
def maybe_encode(s, bit8_display=BIT8_RAW):
"""Encode simple strings to a "bare" word, and complex ones to a QSN literal.
"""
quote = 0
Expand All @@ -163,19 +180,17 @@ def maybe_encode(s, bit8_display='p'):

quote = 1

parts = []

if quote:
parts.append("'")
_encode_bytes(s, bit8_display, parts)
parts.append("'")
else:
if not quote:
return s

parts = []
parts.append("'")
_encode_bytes(s, bit8_display, parts)
parts.append("'")
return ''.join(parts)


def encode(s, bit8_display='p'):
def encode(s, bit8_display=BIT8_RAW):
parts = []
parts.append("'")
_encode_bytes(s, bit8_display, parts)
Expand Down Expand Up @@ -207,7 +222,7 @@ def _encode_bytes(s, bit8_display, parts):
part = '\\x%02x' % ord(ch)

elif ch >= '\x7f':
if bit8_display == 'x':
if bit8_display == BIT8_X:
# TODO: Print this LITERALLY if
# unicode=DO_NOT_TOUCH
# unicode=ESCAPE : gives you \x
Expand All @@ -218,7 +233,7 @@ def _encode_bytes(s, bit8_display, parts):
# high_bit='u' high_bit='x' high_bit=p : pass through

part = '\\x%02x' % ord(ch)
elif bit8_display == 'u':
elif bit8_display == BIT8_U:
# need utf-8 decoder
raise NotImplementedError()
else:
Expand Down Expand Up @@ -246,6 +261,9 @@ def decode(s):
pos = 0
n = len(s)

# TODO: This should be factored into maybe_decode
#assert s.startswith("'"), s

need_quote = False
if s.startswith("'"):
need_quote = True
Expand Down
2 changes: 1 addition & 1 deletion core/qsn_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def testEncodeDecode(self):
sh = qsn.maybe_shell_encode(c)
q1 = qsn.maybe_encode(c)
q2 = qsn.encode(c)
q3 = qsn.encode(c, bit8_display='x')
q3 = qsn.encode(c, bit8_display=qsn.BIT8_X)

print(' sh %s' % sh)
print('qsn maybe %s' % q1)
Expand Down
7 changes: 7 additions & 0 deletions doc/qsn.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,13 @@ c'\x01\n' # explicit CSTR
- `\r` `\n` `\t` `\0` (subset of C and shell; Rust has this)
- Everything else is either `\xFF` or `\u03bc`

## Extensions

- QTSV for tables. This is a priority for Oil.
- JSON-like dicts and lists. Someone else should run with this!
- warning: "\\x00\\'" will confuse people. Programmers don't understand
backslashes, and language implementers often don't either.

## Links

- <https://doc.rust-lang.org/reference/tokens.html#string-literals> - Rust is
Expand Down
16 changes: 14 additions & 2 deletions spec/xtrace.test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,13 @@ case $SH in (dash) exit ;; esac
s=$'a\x03b\004c\x00d'
set -o xtrace
echo "$s"
## stdout-repr: 'a\x03b\x04c\n'
## stderr-repr: "+ echo $'a\\003b\\004c'\n"
## stdout-repr: 'a\x03b\x04c\x00d\n'
## STDERR:
+ echo $'a\x03b\x04c\0d'
## END
## OK bash stdout-repr: 'a\x03b\x04c\n'
## OK bash stderr-repr: "+ echo $'a\\003b\\004c'\n"

# nonsensical output?
## BUG mksh stdout-repr: 'a;\x04c\r\n'
## BUG mksh stderr-repr: "+ echo $'a;\\004c\\r'\n"
Expand Down Expand Up @@ -75,9 +80,16 @@ echo '1 2' \' \" \\
## STDOUT:
1 2 ' " \
## END
# Oil is different bceause backslashes require $'\\' and not '\', but that's OK
## STDERR:
+ echo '1 2' $'\'' '"' $'\\'
## END
## OK bash/mksh STDERR:
+ echo '1 2' \' '"' '\'
## END

## BUG dash STDERR:
+ echo 1 2 ' " \
## END
Expand Down
2 changes: 1 addition & 1 deletion test/spec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,7 @@ sh-options() {
}

xtrace() {
sh-spec spec/xtrace.test.sh --osh-failures-allowed 8 \
sh-spec spec/xtrace.test.sh --osh-failures-allowed 3 \
${REF_SHELLS[@]} $OSH_LIST "$@"
}

Expand Down

0 comments on commit 98e2b88

Please sign in to comment.