Skip to content

Commit 1881413

Browse files
committed
Migrate to tree-sitter-language-pack: expand language support and address maintenance
Resolves Aider-AI#7. This commit replaces the tree-sitter language pack from grantjenks/py-tree-sitter-languages with Goldziher/tree-sitter-language-pack, significantly expanding language support and addressing maintenance issues. Key changes include: 1. Greatly increases the number of supported languages, including Swift and Svelte. 2. Resolves dependency on an unmaintained package that was forcing grep-ast to use an old tree-sitter version (0.21). 3. Unlocks the ability to use more recent tree-sitter versions. 4. Updates requirements.txt to use tree-sitter-language-pack>=0.2.0. 5. Increments the version number to 0.3.4-dev in setup.py. 6. Adds extensive test cases for parsing various languages in test_parsers.py. Notable changes: - Removed support for DOT, OCaml, ql (GitHub CodeQL), and tsq (Tree Sitter Query) due to their absence in the new pack. - Removed potentially incorrect mappings for .gomod, .sqlite, and .regex extensions. - Replaced the uncommon ".et" mapping for "embeddedtemplate" with mappings for ERB and EJS, which are common uses of embedded templates. - Re-enabled markdown as the new pack uses to a different markdown grammar that likely doesn't suffer from previous bugs.
1 parent a5dd50c commit 1881413

File tree

5 files changed

+2138
-22
lines changed

5 files changed

+2138
-22
lines changed

Diff for: grep_ast/grep_ast.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import re
44

5-
from tree_sitter_languages import get_parser
5+
from tree_sitter_language_pack import get_parser
66

77
from .dump import dump # noqa: F401
88
from .parsers import filename_to_lang

Diff for: grep_ast/parsers.py

+149-17
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,189 @@
11
import os
22

3-
# Updated mapping of file extensions to parsers
43
PARSERS = {
5-
".py": "python",
6-
".js": "javascript",
7-
".mjs": "javascript", # mjs file extension stands for "module JavaScript."
8-
".go": "go",
4+
".as": "actionscript",
5+
".adb": "ada",
6+
".ads": "ada",
7+
".agda": "agda",
8+
".ino": "arduino",
9+
".s": "asm",
10+
".asm": "asm",
11+
".astro": "astro",
12+
".sh": "bash",
913
".bash": "bash",
14+
".beancount": "beancount",
15+
".bib": "bibtex",
16+
".bicep": "bicep",
17+
".bb": "bitbake",
1018
".c": "c",
11-
".cc": "cpp",
12-
".cs": "c_sharp",
19+
".h": "c",
20+
".cairo": "cairo",
21+
".capnp": "capnp",
22+
".chatito": "chatito",
23+
".clar": "clarity",
24+
".clj": "clojure",
25+
".cljs": "clojure",
26+
".cmake": "cmake",
27+
".cmake.in": "cmake",
28+
".lisp": "commonlisp",
1329
".cl": "commonlisp",
30+
".cpon": "cpon",
1431
".cpp": "cpp",
32+
".cc": "cpp",
33+
".hpp": "cpp",
34+
".hh": "cpp",
35+
".cs": "csharp",
1536
".css": "css",
37+
".csv": "csv",
38+
".cu": "cuda",
39+
".cuh": "cuda",
40+
".d": "d",
41+
".dart": "dart",
1642
".dockerfile": "dockerfile",
17-
".dot": "dot",
43+
".dox": "doxygen",
44+
".dtd": "dtd",
1845
".el": "elisp",
1946
".ex": "elixir",
47+
".exs": "elixir",
2048
".elm": "elm",
21-
".et": "embedded_template",
49+
".erb": "embeddedtemplate", # ERB (Embedded Ruby) templates
50+
".ejs": "embeddedtemplate", # EJS (Embedded JavaScript) templates
2251
".erl": "erlang",
23-
".gomod": "gomod",
52+
".hrl": "erlang",
53+
".fnl": "fennel",
54+
".fir": "firrtl",
55+
".fish": "fish",
56+
".f90": "fortran",
57+
".f95": "fortran",
58+
".f03": "fortran",
59+
".f08": "fortran",
60+
".fun": "func",
61+
".gd": "gdscript",
62+
".gitattributes": "gitattributes",
63+
".gitcommit": "gitcommit",
64+
".gitignore": "gitignore",
65+
".gleam": "gleam",
66+
".glsl": "glsl",
67+
".gn": "gn",
68+
".go": "go",
69+
".mod": "gomod",
70+
".sum": "gosum",
71+
".groovy": "groovy",
72+
".launch": "gstlaunch",
2473
".hack": "hack",
74+
".ha": "hare",
2575
".hs": "haskell",
76+
".hx": "haxe",
2677
".hcl": "hcl",
78+
".tf": "hcl",
79+
".heex": "heex",
80+
".hlsl": "hlsl",
2781
".html": "html",
82+
".htm": "html",
83+
".hypr": "hyprlang",
84+
".ispc": "ispc",
85+
".janet": "janet",
2886
".java": "java",
87+
".js": "javascript",
88+
".mjs": "javascript", # mjs file extension stands for "module JavaScript."
2989
".jsdoc": "jsdoc",
3090
".json": "json",
91+
".jsonnet": "jsonnet",
3192
".jl": "julia",
93+
".kconfig": "kconfig",
94+
".kdl": "kdl",
3295
".kt": "kotlin",
96+
".ld": "linkerscript",
97+
".ll": "llvm",
3398
".lua": "lua",
99+
".luadoc": "luadoc",
100+
# ".???": "luap", # "luap" is not a standalone language
101+
".luau": "luau",
102+
".magik": "magik",
34103
".mk": "make",
35-
# ".md": "markdown", # https://github.com/ikatyang/tree-sitter-markdown/issues/59
104+
".makefile": "make",
105+
".md": "markdown", # TODO: verify that markdown grammar used by Goldziher’s tree-sitter languages pack doesn't suffer from https://github.com/ikatyang/tree-sitter-markdown/issues/59
106+
# ".m": "matlab", # both matlab and objc use ".m" extension; we choose to map to objc
107+
".mermaid": "mermaid",
108+
".mmd": "mermaid",
109+
".meson": "meson",
110+
".ninja": "ninja",
111+
".nix": "nix",
112+
".nqc": "nqc",
36113
".m": "objc",
37-
".ml": "ocaml",
114+
".mm": "objc",
115+
".odin": "odin",
116+
".org": "org",
117+
".pas": "pascal",
118+
".pem": "pem",
38119
".pl": "perl",
120+
".pm": "perl",
121+
".pgn": "pgn",
39122
".php": "php",
40-
".ql": "ql",
123+
".po": "po",
124+
".pony": "pony",
125+
".ps1": "powershell",
126+
".psm1": "powershell",
127+
".printf": "printf",
128+
".prisma": "prisma",
129+
".properties": "properties",
130+
".psv": "psv",
131+
".pp": "puppet",
132+
".purs": "purescript",
133+
".in": "pymanifest",
134+
".py": "python",
135+
".qmldir": "qmldir",
136+
".qml": "qmljs",
41137
".r": "r",
42138
".R": "r",
43-
".regex": "regex",
139+
".rkt": "racket",
140+
# ".???": "re2c", # re2c is not a standalone language
141+
".inputrc": "readline",
142+
".requirements": "requirements",
143+
".ron": "ron",
44144
".rst": "rst",
45145
".rb": "ruby",
46146
".rs": "rust",
47147
".scala": "scala",
148+
".sc": "scala",
149+
".scm": "scheme",
150+
".ss": "scheme",
151+
".scss": "scss",
152+
".smali": "smali",
153+
".smithy": "smithy",
154+
".sol": "solidity",
48155
".sql": "sql",
49-
".sqlite": "sqlite",
156+
".nut": "squirrel",
157+
".star": "starlark",
158+
".svelte": "svelte",
159+
".swift": "swift",
160+
".td": "tablegen",
161+
".tcl": "tcl",
162+
".thrift": "thrift",
50163
".toml": "toml",
51-
".tsq": "tsq",
52-
".tsx": "typescript",
164+
".tsv": "tsv",
165+
".tsx": "tsx",
166+
".twig": "twig",
53167
".ts": "typescript",
168+
".typ": "typst",
169+
".rules": "udev",
170+
".ungram": "ungrammar",
171+
".tal": "uxntal",
172+
# ".v": "v", # ".v" is overloaded: vlang, verilog (and coq). vlang grammar has 24 stars as of 2024-08-22
173+
".v": "verilog", # verilog grammar has 90 stars as of 2024-08-22
174+
".sv": "verilog",
175+
".svh": "verilog",
176+
".vhd": "vhdl",
177+
".vhdl": "vhdl",
178+
".vim": "vim",
179+
".vue": "vue",
180+
".wgsl": "wgsl",
181+
".XCompose": "xcompose",
182+
".xml": "xml",
54183
".yaml": "yaml",
184+
".yml": "yaml",
185+
".yuck": "yuck",
186+
".zig": "zig",
55187
}
56188

57189

Diff for: requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
tree-sitter-languages>=1.8.0
1+
tree-sitter-language-pack>=0.2.0
22
pathspec

Diff for: setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
setup(
1313
name="grep-ast",
14-
version="0.3.3",
14+
version="0.3.4-dev",
1515
description="A tool to grep through the AST of a source file",
1616
url="https://github.com/paul-gauthier/grep-ast",
1717
long_description=long_description,

0 commit comments

Comments
 (0)