|
21 | 21 | import bashlex |
22 | 22 | import re |
23 | 23 | import logging |
| 24 | +import sys |
24 | 25 |
|
25 | 26 | from compiledb.compiler import get_compiler |
26 | 27 | from compiledb.utils import run_cmd |
@@ -137,7 +138,7 @@ def skip_line(cmd, reason): |
137 | 138 |
|
138 | 139 | # add entry to database |
139 | 140 | tokens = c['tokens'] |
140 | | - arguments = [unescape(a) for a in tokens[len(wrappers):]] |
| 141 | + arguments = [a for a in tokens[len(wrappers):]] |
141 | 142 |
|
142 | 143 | compiler = get_compiler(arguments[0]) |
143 | 144 |
|
@@ -178,12 +179,63 @@ def visitcommandsubstitution(self, n, cmd): |
178 | 179 | self.substs.append(n) |
179 | 180 | return False |
180 | 181 |
|
| 182 | +def cross_platform_argline(s, platform='this'): |
| 183 | + """Multi-platform variant of shlex.split() for command-line splitting. |
| 184 | + For use with subprocess, for argv injection etc. Using fast REGEX. |
| 185 | +
|
| 186 | + platform: 'this' = auto from current platform; |
| 187 | + 1 = POSIX; |
| 188 | + 0 = Windows/CMD |
| 189 | + (other values reserved) |
| 190 | +
|
| 191 | + Cudos: https://stackoverflow.com/a/35900070/2349761 |
| 192 | + """ |
| 193 | + if platform == 'this': |
| 194 | + platform = (sys.platform != 'win32') |
| 195 | + if platform == 1: |
| 196 | + RE_CMD_LEX = r'''"((?:\\["\\]|[^"])*)"|'([^']*)'|(\\.)|(&&?|\|\|?|\d?\>|[<])|([^\s'"\\&|<>]+)|(\s+)|(.)''' |
| 197 | + elif platform == 0: |
| 198 | + RE_CMD_LEX = r'''"((?:""|\\["\\]|[^"])*)"?()|(\\\\(?=\\*")|\\")|(&&?|\|\|?|\d?>|[<])|([^\s"&|<>]+)|(\s+)|(.)''' |
| 199 | + else: |
| 200 | + raise AssertionError('unkown platform %r' % platform) |
| 201 | + |
| 202 | + args = [] |
| 203 | + accu = None # collects pieces of one arg |
| 204 | + for qs, qss, esc, pipe, word, white, fail in re.findall(RE_CMD_LEX, s): |
| 205 | + if word: |
| 206 | + if platform == 0: |
| 207 | + word = word.replace('\\\\', '\\').replace('\\', '\\\\') |
| 208 | + elif esc: |
| 209 | + word = esc[1] |
| 210 | + elif white or pipe: |
| 211 | + if accu is not None: |
| 212 | + args.append(accu) |
| 213 | + if pipe: |
| 214 | + args.append(pipe) |
| 215 | + accu = None |
| 216 | + continue |
| 217 | + elif fail: |
| 218 | + raise ValueError("invalid or incomplete shell string") |
| 219 | + elif qs: |
| 220 | + if platform == 0: |
| 221 | + word = word.replace('""', '"') |
| 222 | + word = repr(qs).encode().decode('unicode_escape') |
| 223 | + else: |
| 224 | + word = qss # may be even empty; must be last |
| 225 | + |
| 226 | + accu = (accu or '') + word |
| 227 | + |
| 228 | + if accu is not None: |
| 229 | + args.append(accu) |
| 230 | + |
| 231 | + return args |
181 | 232 |
|
182 | 233 | class CommandProcessor(bashlex.ast.nodevisitor): |
183 | 234 | """Uses bashlex to parse and traverse the resulting bash AST |
184 | 235 | looking for and extracting compilation commands.""" |
185 | | - @staticmethod |
186 | 236 | def process(line, wd): |
| 237 | + args = cross_platform_argline(line) |
| 238 | + line = " ".join(args) |
187 | 239 | trees = bashlex.parser.parse(line) |
188 | 240 | if not trees: |
189 | 241 | return [] |
@@ -254,8 +306,4 @@ def check_last_cmd(self): |
254 | 306 | # reset state to process new command |
255 | 307 | self.reset() |
256 | 308 |
|
257 | | - |
258 | | -def unescape(s): |
259 | | - return s.encode().decode('unicode_escape') |
260 | | - |
261 | 309 | # ex: ts=2 sw=4 et filetype=python |
0 commit comments