Skip to content

Commit 5babc4b

Browse files
committed
Fix argument parsing and path handling on windows.
1 parent 9295338 commit 5babc4b

File tree

1 file changed

+54
-6
lines changed

1 file changed

+54
-6
lines changed

compiledb/parser.py

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import bashlex
2222
import re
2323
import logging
24+
import sys
2425

2526
from compiledb.compiler import get_compiler
2627
from compiledb.utils import run_cmd
@@ -137,7 +138,7 @@ def skip_line(cmd, reason):
137138

138139
# add entry to database
139140
tokens = c['tokens']
140-
arguments = [unescape(a) for a in tokens[len(wrappers):]]
141+
arguments = [a for a in tokens[len(wrappers):]]
141142

142143
compiler = get_compiler(arguments[0])
143144

@@ -178,12 +179,63 @@ def visitcommandsubstitution(self, n, cmd):
178179
self.substs.append(n)
179180
return False
180181

182+
def cross_platform_argline(s, platform='this'):
183+
"""Multi-platform variant of shlex.split() for command-line splitting.
184+
For use with subprocess, for argv injection etc. Using fast REGEX.
185+
186+
platform: 'this' = auto from current platform;
187+
1 = POSIX;
188+
0 = Windows/CMD
189+
(other values reserved)
190+
191+
Cudos: https://stackoverflow.com/a/35900070/2349761
192+
"""
193+
if platform == 'this':
194+
platform = (sys.platform != 'win32')
195+
if platform == 1:
196+
RE_CMD_LEX = r'''"((?:\\["\\]|[^"])*)"|'([^']*)'|(\\.)|(&&?|\|\|?|\d?\>|[<])|([^\s'"\\&|<>]+)|(\s+)|(.)'''
197+
elif platform == 0:
198+
RE_CMD_LEX = r'''"((?:""|\\["\\]|[^"])*)"?()|(\\\\(?=\\*")|\\")|(&&?|\|\|?|\d?>|[<])|([^\s"&|<>]+)|(\s+)|(.)'''
199+
else:
200+
raise AssertionError('unkown platform %r' % platform)
201+
202+
args = []
203+
accu = None # collects pieces of one arg
204+
for qs, qss, esc, pipe, word, white, fail in re.findall(RE_CMD_LEX, s):
205+
if word:
206+
if platform == 0:
207+
word = word.replace('\\\\', '\\').replace('\\', '\\\\')
208+
elif esc:
209+
word = esc[1]
210+
elif white or pipe:
211+
if accu is not None:
212+
args.append(accu)
213+
if pipe:
214+
args.append(pipe)
215+
accu = None
216+
continue
217+
elif fail:
218+
raise ValueError("invalid or incomplete shell string")
219+
elif qs:
220+
if platform == 0:
221+
word = word.replace('""', '"')
222+
word = repr(qs).encode().decode('unicode_escape')
223+
else:
224+
word = qss # may be even empty; must be last
225+
226+
accu = (accu or '') + word
227+
228+
if accu is not None:
229+
args.append(accu)
230+
231+
return args
181232

182233
class CommandProcessor(bashlex.ast.nodevisitor):
183234
"""Uses bashlex to parse and traverse the resulting bash AST
184235
looking for and extracting compilation commands."""
185-
@staticmethod
186236
def process(line, wd):
237+
args = cross_platform_argline(line)
238+
line = " ".join(args)
187239
trees = bashlex.parser.parse(line)
188240
if not trees:
189241
return []
@@ -254,8 +306,4 @@ def check_last_cmd(self):
254306
# reset state to process new command
255307
self.reset()
256308

257-
258-
def unescape(s):
259-
return s.encode().decode('unicode_escape')
260-
261309
# ex: ts=2 sw=4 et filetype=python

0 commit comments

Comments
 (0)