|
1 | 1 | import os
|
2 | 2 |
|
3 |
| -# Updated mapping of file extensions to parsers |
4 | 3 | PARSERS = {
|
5 |
| - ".py": "python", |
6 |
| - ".js": "javascript", |
7 |
| - ".mjs": "javascript", # mjs file extension stands for "module JavaScript." |
8 |
| - ".go": "go", |
| 4 | + ".as": "actionscript", |
| 5 | + ".adb": "ada", |
| 6 | + ".ads": "ada", |
| 7 | + ".agda": "agda", |
| 8 | + ".ino": "arduino", |
| 9 | + ".s": "asm", |
| 10 | + ".asm": "asm", |
| 11 | + ".astro": "astro", |
| 12 | + ".sh": "bash", |
9 | 13 | ".bash": "bash",
|
| 14 | + ".beancount": "beancount", |
| 15 | + ".bib": "bibtex", |
| 16 | + ".bicep": "bicep", |
| 17 | + ".bb": "bitbake", |
10 | 18 | ".c": "c",
|
11 |
| - ".cc": "cpp", |
12 |
| - ".cs": "c_sharp", |
| 19 | + ".h": "c", |
| 20 | + ".cairo": "cairo", |
| 21 | + ".capnp": "capnp", |
| 22 | + ".chatito": "chatito", |
| 23 | + ".clar": "clarity", |
| 24 | + ".clj": "clojure", |
| 25 | + ".cljs": "clojure", |
| 26 | + ".cmake": "cmake", |
| 27 | + ".cmake.in": "cmake", |
| 28 | + ".lisp": "commonlisp", |
13 | 29 | ".cl": "commonlisp",
|
| 30 | + ".cpon": "cpon", |
14 | 31 | ".cpp": "cpp",
|
| 32 | + ".cc": "cpp", |
| 33 | + ".hpp": "cpp", |
| 34 | + ".hh": "cpp", |
| 35 | + ".cs": "csharp", |
15 | 36 | ".css": "css",
|
| 37 | + ".csv": "csv", |
| 38 | + ".cu": "cuda", |
| 39 | + ".cuh": "cuda", |
| 40 | + ".d": "d", |
| 41 | + ".dart": "dart", |
16 | 42 | ".dockerfile": "dockerfile",
|
17 |
| - ".dot": "dot", |
| 43 | + ".dox": "doxygen", |
| 44 | + ".dtd": "dtd", |
18 | 45 | ".el": "elisp",
|
19 | 46 | ".ex": "elixir",
|
| 47 | + ".exs": "elixir", |
20 | 48 | ".elm": "elm",
|
21 |
| - ".et": "embedded_template", |
| 49 | + ".erb": "embeddedtemplate", # ERB (Embedded Ruby) templates |
| 50 | + ".ejs": "embeddedtemplate", # EJS (Embedded JavaScript) templates |
22 | 51 | ".erl": "erlang",
|
23 |
| - ".gomod": "gomod", |
| 52 | + ".hrl": "erlang", |
| 53 | + ".fnl": "fennel", |
| 54 | + ".fir": "firrtl", |
| 55 | + ".fish": "fish", |
| 56 | + ".f90": "fortran", |
| 57 | + ".f95": "fortran", |
| 58 | + ".f03": "fortran", |
| 59 | + ".f08": "fortran", |
| 60 | + ".fun": "func", |
| 61 | + ".gd": "gdscript", |
| 62 | + ".gitattributes": "gitattributes", |
| 63 | + ".gitcommit": "gitcommit", |
| 64 | + ".gitignore": "gitignore", |
| 65 | + ".gleam": "gleam", |
| 66 | + ".glsl": "glsl", |
| 67 | + ".gn": "gn", |
| 68 | + ".go": "go", |
| 69 | + ".mod": "gomod", |
| 70 | + ".sum": "gosum", |
| 71 | + ".groovy": "groovy", |
| 72 | + ".launch": "gstlaunch", |
24 | 73 | ".hack": "hack",
|
| 74 | + ".ha": "hare", |
25 | 75 | ".hs": "haskell",
|
| 76 | + ".hx": "haxe", |
26 | 77 | ".hcl": "hcl",
|
| 78 | + ".tf": "hcl", |
| 79 | + ".heex": "heex", |
| 80 | + ".hlsl": "hlsl", |
27 | 81 | ".html": "html",
|
| 82 | + ".htm": "html", |
| 83 | + ".hypr": "hyprlang", |
| 84 | + ".ispc": "ispc", |
| 85 | + ".janet": "janet", |
28 | 86 | ".java": "java",
|
| 87 | + ".js": "javascript", |
| 88 | + ".mjs": "javascript", # mjs file extension stands for "module JavaScript." |
29 | 89 | ".jsdoc": "jsdoc",
|
30 | 90 | ".json": "json",
|
| 91 | + ".jsonnet": "jsonnet", |
31 | 92 | ".jl": "julia",
|
| 93 | + ".kconfig": "kconfig", |
| 94 | + ".kdl": "kdl", |
32 | 95 | ".kt": "kotlin",
|
| 96 | + ".ld": "linkerscript", |
| 97 | + ".ll": "llvm", |
33 | 98 | ".lua": "lua",
|
| 99 | + ".luadoc": "luadoc", |
| 100 | + # ".???": "luap", # "luap" is not a standalone language |
| 101 | + ".luau": "luau", |
| 102 | + ".magik": "magik", |
34 | 103 | ".mk": "make",
|
35 |
| - # ".md": "markdown", # https://github.com/ikatyang/tree-sitter-markdown/issues/59 |
| 104 | + ".makefile": "make", |
| 105 | + ".md": "markdown", # TODO: verify that markdown grammar used by Goldziher’s tree-sitter languages pack doesn't suffer from https://github.com/ikatyang/tree-sitter-markdown/issues/59 |
| 106 | + # ".m": "matlab", # both matlab and objc use ".m" extension; we choose to map to objc |
| 107 | + ".mermaid": "mermaid", |
| 108 | + ".mmd": "mermaid", |
| 109 | + ".meson": "meson", |
| 110 | + ".ninja": "ninja", |
| 111 | + ".nix": "nix", |
| 112 | + ".nqc": "nqc", |
36 | 113 | ".m": "objc",
|
37 |
| - ".ml": "ocaml", |
| 114 | + ".mm": "objc", |
| 115 | + ".odin": "odin", |
| 116 | + ".org": "org", |
| 117 | + ".pas": "pascal", |
| 118 | + ".pem": "pem", |
38 | 119 | ".pl": "perl",
|
| 120 | + ".pm": "perl", |
| 121 | + ".pgn": "pgn", |
39 | 122 | ".php": "php",
|
40 |
| - ".ql": "ql", |
| 123 | + ".po": "po", |
| 124 | + ".pony": "pony", |
| 125 | + ".ps1": "powershell", |
| 126 | + ".psm1": "powershell", |
| 127 | + ".printf": "printf", |
| 128 | + ".prisma": "prisma", |
| 129 | + ".properties": "properties", |
| 130 | + ".psv": "psv", |
| 131 | + ".pp": "puppet", |
| 132 | + ".purs": "purescript", |
| 133 | + ".in": "pymanifest", |
| 134 | + ".py": "python", |
| 135 | + ".qmldir": "qmldir", |
| 136 | + ".qml": "qmljs", |
41 | 137 | ".r": "r",
|
42 | 138 | ".R": "r",
|
43 |
| - ".regex": "regex", |
| 139 | + ".rkt": "racket", |
| 140 | + # ".???": "re2c", # re2c is not a standalone language |
| 141 | + ".inputrc": "readline", |
| 142 | + ".requirements": "requirements", |
| 143 | + ".ron": "ron", |
44 | 144 | ".rst": "rst",
|
45 | 145 | ".rb": "ruby",
|
46 | 146 | ".rs": "rust",
|
47 | 147 | ".scala": "scala",
|
| 148 | + ".sc": "scala", |
| 149 | + ".scm": "scheme", |
| 150 | + ".ss": "scheme", |
| 151 | + ".scss": "scss", |
| 152 | + ".smali": "smali", |
| 153 | + ".smithy": "smithy", |
| 154 | + ".sol": "solidity", |
48 | 155 | ".sql": "sql",
|
49 |
| - ".sqlite": "sqlite", |
| 156 | + ".nut": "squirrel", |
| 157 | + ".star": "starlark", |
| 158 | + ".svelte": "svelte", |
| 159 | + ".swift": "swift", |
| 160 | + ".td": "tablegen", |
| 161 | + ".tcl": "tcl", |
| 162 | + ".thrift": "thrift", |
50 | 163 | ".toml": "toml",
|
51 |
| - ".tsq": "tsq", |
52 |
| - ".tsx": "typescript", |
| 164 | + ".tsv": "tsv", |
| 165 | + ".tsx": "tsx", |
| 166 | + ".twig": "twig", |
53 | 167 | ".ts": "typescript",
|
| 168 | + ".typ": "typst", |
| 169 | + ".rules": "udev", |
| 170 | + ".ungram": "ungrammar", |
| 171 | + ".tal": "uxntal", |
| 172 | + # ".v": "v", # ".v" is overloaded: vlang, verilog (and coq). vlang grammar has 24 stars as of 2024-08-22 |
| 173 | + ".v": "verilog", # verilog grammar has 90 stars as of 2024-08-22 |
| 174 | + ".sv": "verilog", |
| 175 | + ".svh": "verilog", |
| 176 | + ".vhd": "vhdl", |
| 177 | + ".vhdl": "vhdl", |
| 178 | + ".vim": "vim", |
| 179 | + ".vue": "vue", |
| 180 | + ".wgsl": "wgsl", |
| 181 | + ".XCompose": "xcompose", |
| 182 | + ".xml": "xml", |
54 | 183 | ".yaml": "yaml",
|
| 184 | + ".yml": "yaml", |
| 185 | + ".yuck": "yuck", |
| 186 | + ".zig": "zig", |
55 | 187 | }
|
56 | 188 |
|
57 | 189 |
|
|
0 commit comments