From 523dd033197beafa09d5d21f1335a1b7fd8d8216 Mon Sep 17 00:00:00 2001 From: midichef <67946319+midichef@users.noreply.github.com> Date: Mon, 23 Sep 2024 22:02:14 -0700 Subject: [PATCH] [grep-] add loader for output of grep/ripgrep (#2443) -H will make a filename appear even in the case of grepping only 1 file. --------- Co-authored-by: anjakefala --- dev/formats.jsonl | 1 + docs/formats.md | 10 +++ sample_data/smiths-json.grep | 35 ++++++++ sample_data/smiths-standard.grep | 16 ++++ tests/golden/load-grep-json.grep | 16 ++++ tests/golden/load-grep-standard.grep | 16 ++++ tests/load-grep-json.vd | 2 + tests/load-grep-standard.vd | 2 + visidata/guide.py | 1 + visidata/guides/GrepSheet.md | 28 +++++++ visidata/loaders/grep.py | 121 +++++++++++++++++++++++++++ 11 files changed, 248 insertions(+) create mode 100644 sample_data/smiths-json.grep create mode 100644 sample_data/smiths-standard.grep create mode 100644 tests/golden/load-grep-json.grep create mode 100644 tests/golden/load-grep-standard.grep create mode 100644 tests/load-grep-json.vd create mode 100644 tests/load-grep-standard.vd create mode 100644 visidata/guides/GrepSheet.md create mode 100644 visidata/loaders/grep.py diff --git a/dev/formats.jsonl b/dev/formats.jsonl index 9f43c44ae..f2d798c35 100644 --- a/dev/formats.jsonl +++ b/dev/formats.jsonl @@ -46,3 +46,4 @@ {"filetype": "parquet", "requirements": "pyarrow or pandas", "format": "Apache Parquet", "loader": "yes", "version_added": "1.3", "created": "2013", "creator": "Apache Software Foundation", "open format": "yes", "format_url": "https://parquet.apache.org/"} {"filetype": "toml", "filetype_url": "#toml", "aliases": "", "requirements": "", "format": "Tom's Obvious, Minimal Language (TOML)", "loader": "yes", "VisiData saver": "", "version_added": "2.12", "created": "2013", "creator": "Tom Preston-Werner", "description": "minimal configuration file format that's easy to read", "open format": "yes", "format_url": "https://toml.io/en/latest"} {"filetype": "msgpack", "aliases": "msgpackz", "requirements": "msgpack", "format": "MessagePack", "loader": "yes", "version_added": "3.1", "created": "2008", "creator": "Sadayuki Furuhashi", "description": "efficient binary serialization format.", "open format": "yes", "nestable": "nestable", "format_url": "https://github.com/msgpack/msgpack/blob/master/spec.md"} +{"filetype": "grep", "format": "grep-printer", "loader": "yes", "version_added": "3.1", "created": "", "creator": "Andrew Gallant", "description": "search results from ripgrep in JSONL", "open format": "yes", "format_url": "https://docs.rs/grep-printer/latest/grep_printer/struct.JSON.html"} diff --git a/docs/formats.md b/docs/formats.md index 63257068f..ccba166df 100644 --- a/docs/formats.md +++ b/docs/formats.md @@ -69,6 +69,7 @@ eleventyNavigation: | f5log | Parser for f5 logs | 3\.0 | | | | f5 | | | toml | Tom's Obvious Minimal Language | 3\.0 | | | | Tom Preston-Werner | tomllib | | conll | CoNLL annotation scheme | 3\.0 | | | | Conference on Natural Language Learning | pyconll | +| [grep](#grep) | grep command-line utility | 3\.1 | | | 1973 | AT&T Bell Laboratories | | # Extra notes about formats @@ -182,3 +183,12 @@ This should similarly work for any format that has a `pandas.read_format()` func - loader-specific requirements - require setting authentication information in `~/.visidatarc` or on the CLI - launch the loader with `-f loadername` for steps to obtain and configure authentication credentials + +## Grep {#grep} +A .grep file is a JSON lines file. It can be in two formats: +1) A simple container with three fields: + file - a string with the path to the file where the match was found (absolute or relative path) + line_no - an integer with the line number in the file where the match was found, + text - a string with the text of the line that matched. +2) ripgrep `grep_printer` format, described here: +https://docs.rs/grep-printer/latest/grep_printer/struct.JSON.html diff --git a/sample_data/smiths-json.grep b/sample_data/smiths-json.grep new file mode 100644 index 000000000..4826b37a8 --- /dev/null +++ b/sample_data/smiths-json.grep @@ -0,0 +1,35 @@ +{"type":"begin","data":{"path":{"text":"benchmark.csv"}}} +{"type":"match","data":{"path":{"text":"benchmark.csv"},"lines":{"text":"8/16/2018 5:15p,Michael Smith,BIRD160,\"Parakeet, Blue (Melopsittacus undulatus)\",1,29.95,$31.85\r\n"},"line_number":36,"absolute_offset":3080,"submatches":[{"match":{"text":"Smith"},"start":24,"end":29}]}} +{"type":"end","data":{"path":{"text":"benchmark.csv"},"binary_offset":null,"stats":{"elapsed":{"secs":0,"nanos":13540,"human":"0.000014s"},"searches":1,"searches_with_match":1,"bytes_searched":4668,"bytes_printed":339,"matched_lines":1,"matches":1}}} +{"type":"begin","data":{"path":{"text":"benchmark.jsonl"}}} +{"type":"match","data":{"path":{"text":"benchmark.jsonl"},"lines":{"text":"{\"Date\": \"8/16/2018 5:15p\", \"Customer\": \"Michael Smith\", \"SKU\": \"BIRD160\", \"Item\": \"Parakeet, Blue (Melopsittacus undulatus)\", \"Quantity\": \"1\", \"Unit\": \"29.95\", \"Paid\": \"$31.85\"}\n"},"line_number":36,"absolute_offset":5935,"submatches":[{"match":{"text":"Smith"},"start":49,"end":54}]}} +{"type":"end","data":{"path":{"text":"benchmark.jsonl"},"binary_offset":null,"stats":{"elapsed":{"secs":0,"nanos":10708,"human":"0.000011s"},"searches":1,"searches_with_match":1,"bytes_searched":9068,"bytes_printed":450,"matched_lines":1,"matches":1}}} +{"type":"begin","data":{"path":{"text":"benchmark.lsv"}}} +{"type":"match","data":{"path":{"text":"benchmark.lsv"},"lines":{"text":"Customer: Michael Smith\n"},"line_number":266,"absolute_offset":4519,"submatches":[{"match":{"text":"Smith"},"start":18,"end":23}]}} +{"type":"end","data":{"path":{"text":"benchmark.lsv"},"binary_offset":null,"stats":{"elapsed":{"secs":0,"nanos":11773,"human":"0.000012s"},"searches":1,"searches_with_match":1,"bytes_searched":6875,"bytes_printed":264,"matched_lines":1,"matches":1}}} +{"type":"begin","data":{"path":{"text":"benchmark.xml"}}} +{"type":"match","data":{"path":{"text":"benchmark.xml"},"lines":{"text":" Michael Smith\n"},"line_number":346,"absolute_offset":8910,"submatches":[{"match":{"text":"Smith"},"start":22,"end":27}]}} +{"type":"end","data":{"path":{"text":"benchmark.xml"},"binary_offset":null,"stats":{"elapsed":{"secs":0,"nanos":11228,"human":"0.000011s"},"searches":1,"searches_with_match":1,"bytes_searched":13335,"bytes_printed":279,"matched_lines":1,"matches":1}}} +{"type":"begin","data":{"path":{"text":"benchmark.yml"}}} +{"type":"match","data":{"path":{"text":"benchmark.yml"},"lines":{"text":"- customer: Michael Smith\n"},"line_number":239,"absolute_offset":5230,"submatches":[{"match":{"text":"Smith"},"start":20,"end":25}]}} +{"type":"end","data":{"path":{"text":"benchmark.yml"},"binary_offset":null,"stats":{"elapsed":{"secs":0,"nanos":8369,"human":"0.000008s"},"searches":1,"searches_with_match":1,"bytes_searched":8092,"bytes_printed":266,"matched_lines":1,"matches":1}}} +{"type":"begin","data":{"path":{"text":"numeric-cols.tsv"}}} +{"type":"match","data":{"path":{"text":"numeric-cols.tsv"},"lines":{"text":"2016-09-01\tCentral\tSmith\tDesk\t2\t125.00\t250.00\n"},"line_number":16,"absolute_offset":684,"submatches":[{"match":{"text":"Smith"},"start":19,"end":24}]}} +{"type":"match","data":{"path":{"text":"numeric-cols.tsv"},"lines":{"text":"2016-12-12\tCentral\tSmith\tPencil\t67\t1.29\t86.43\n"},"line_number":22,"absolute_offset":957,"submatches":[{"match":{"text":"Smith"},"start":19,"end":24}]}} +{"type":"match","data":{"path":{"text":"numeric-cols.tsv"},"lines":{"text":"2017-02-01\tCentral\tSmith\tBinder\t87\t15.00\t1305.00\n"},"line_number":25,"absolute_offset":1097,"submatches":[{"match":{"text":"Smith"},"start":19,"end":24}]}} +{"type":"end","data":{"path":{"text":"numeric-cols.tsv"},"binary_offset":null,"stats":{"elapsed":{"secs":0,"nanos":12169,"human":"0.000012s"},"searches":1,"searches_with_match":1,"bytes_searched":2035,"bytes_printed":770,"matched_lines":3,"matches":3}}} +{"type":"begin","data":{"path":{"text":"sample.tsv"}}} +{"type":"match","data":{"path":{"text":"sample.tsv"},"lines":{"text":"2016-09-01\tCentral\tSmith\tDesk\t2\t125.00\t250.00\n"},"line_number":17,"absolute_offset":705,"submatches":[{"match":{"text":"Smith"},"start":19,"end":24}]}} +{"type":"match","data":{"path":{"text":"sample.tsv"},"lines":{"text":"2016-12-12\tCentral\tSmith\tPencil\t67\t1.29\t86.43\n"},"line_number":23,"absolute_offset":978,"submatches":[{"match":{"text":"Smith"},"start":19,"end":24}]}} +{"type":"match","data":{"path":{"text":"sample.tsv"},"lines":{"text":"2017-02-01\tCentral\tSmith\tBinder\t87\t15.00\t1305.00\n"},"line_number":26,"absolute_offset":1118,"submatches":[{"match":{"text":"Smith"},"start":19,"end":24}]}} +{"type":"end","data":{"path":{"text":"sample.tsv"},"binary_offset":null,"stats":{"elapsed":{"secs":0,"nanos":11934,"human":"0.000012s"},"searches":1,"searches_with_match":1,"bytes_searched":2056,"bytes_printed":746,"matched_lines":3,"matches":3}}} +{"type":"begin","data":{"path":{"text":"sample.vds"}}} +{"type":"match","data":{"path":{"text":"sample.vds"},"lines":{"text":"{\"OrderDate\": \"2016-09-01\", \"Region\": \"Central\", \"Rep\": \"Smith\", \"Item\": \"Desk\", \"Units\": \"2\", \"Unit_Cost\": \"125.00\", \"Total\": \"250.00\"}\n"},"line_number":23,"absolute_offset":3094,"submatches":[{"match":{"text":"Smith"},"start":57,"end":62}]}} +{"type":"match","data":{"path":{"text":"sample.vds"},"lines":{"text":"{\"OrderDate\": \"2016-12-12\", \"Region\": \"Central\", \"Rep\": \"Smith\", \"Item\": \"Pencil\", \"Units\": \"67\", \"Unit_Cost\": \"1.29\", \"Total\": \"86.43\"}\n"},"line_number":29,"absolute_offset":3913,"submatches":[{"match":{"text":"Smith"},"start":57,"end":62}]}} +{"type":"match","data":{"path":{"text":"sample.vds"},"lines":{"text":"{\"OrderDate\": \"2017-02-01\", \"Region\": \"Central\", \"Rep\": \"Smith\", \"Item\": \"Binder\", \"Units\": \"87\", \"Unit_Cost\": \"15.00\", \"Total\": \"1305.00\"}\n"},"line_number":32,"absolute_offset":4326,"submatches":[{"match":{"text":"Smith"},"start":57,"end":62}]}} +{"type":"match","data":{"path":{"text":"sample.vds"},"lines":{"text":"{\"Date\": \"2018-08-16 17:15\", \"Customer\": \"Michael Smith\", \"SKU\": \"BIRD160\", \"Item\": \"Parakeet, Blue (Melopsittacus undulatus)\", \"Quantity\": \"1\", \"Unit\": \"29.95\", \"Paid\": \"31.85\"}\n"},"line_number":94,"absolute_offset":14126,"submatches":[{"match":{"text":"Smith"},"start":50,"end":55}]}} +{"type":"end","data":{"path":{"text":"sample.vds"},"binary_offset":null,"stats":{"elapsed":{"secs":0,"nanos":18940,"human":"0.000019s"},"searches":1,"searches_with_match":1,"bytes_searched":16704,"bytes_printed":1473,"matched_lines":4,"matches":4}}} +{"type":"begin","data":{"path":{"text":"sunshinelist.html"}}} +{"type":"match","data":{"path":{"text":"sunshinelist.html"},"lines":{"text":"\t\t\t \t\t\tKevin Smith\n"},"line_number":194,"absolute_offset":7276,"submatches":[{"match":{"text":"Smith"},"start":20,"end":25}]}} +{"type":"end","data":{"path":{"text":"sunshinelist.html"},"binary_offset":null,"stats":{"elapsed":{"secs":0,"nanos":22646,"human":"0.000023s"},"searches":1,"searches_with_match":1,"bytes_searched":33309,"bytes_printed":285,"matched_lines":1,"matches":1}}} +{"data":{"elapsed_total":{"human":"0.007334s","nanos":7333949,"secs":0},"stats":{"bytes_printed":4872,"bytes_searched":96142,"elapsed":{"human":"0.000121s","nanos":121307,"secs":0},"matched_lines":16,"matches":16,"searches":9,"searches_with_match":9}},"type":"summary"} diff --git a/sample_data/smiths-standard.grep b/sample_data/smiths-standard.grep new file mode 100644 index 000000000..813ae14f0 --- /dev/null +++ b/sample_data/smiths-standard.grep @@ -0,0 +1,16 @@ +benchmark.csv:36:8/16/2018 5:15p,Michael Smith,BIRD160,"Parakeet, Blue (Melopsittacus undulatus)",1,29.95,$31.85 +benchmark.jsonl:36:{"Date": "8/16/2018 5:15p", "Customer": "Michael Smith", "SKU": "BIRD160", "Item": "Parakeet, Blue (Melopsittacus undulatus)", "Quantity": "1", "Unit": "29.95", "Paid": "$31.85"} +benchmark.lsv:266:Customer: Michael Smith +benchmark.xml:346: Michael Smith +benchmark.yml:239:- customer: Michael Smith +numeric-cols.tsv:16:2016-09-01 Central Smith Desk 2 125.00 250.00 +numeric-cols.tsv:22:2016-12-12 Central Smith Pencil 67 1.29 86.43 +numeric-cols.tsv:25:2017-02-01 Central Smith Binder 87 15.00 1305.00 +sample.tsv:17:2016-09-01 Central Smith Desk 2 125.00 250.00 +sample.tsv:23:2016-12-12 Central Smith Pencil 67 1.29 86.43 +sample.tsv:26:2017-02-01 Central Smith Binder 87 15.00 1305.00 +sample.vds:23:{"OrderDate": "2016-09-01", "Region": "Central", "Rep": "Smith", "Item": "Desk", "Units": "2", "Unit_Cost": "125.00", "Total": "250.00"} +sample.vds:29:{"OrderDate": "2016-12-12", "Region": "Central", "Rep": "Smith", "Item": "Pencil", "Units": "67", "Unit_Cost": "1.29", "Total": "86.43"} +sample.vds:32:{"OrderDate": "2017-02-01", "Region": "Central", "Rep": "Smith", "Item": "Binder", "Units": "87", "Unit_Cost": "15.00", "Total": "1305.00"} +sample.vds:94:{"Date": "2018-08-16 17:15", "Customer": "Michael Smith", "SKU": "BIRD160", "Item": "Parakeet, Blue (Melopsittacus undulatus)", "Quantity": "1", "Unit": "29.95", "Paid": "31.85"} +sunshinelist.html:194: Kevin Smith diff --git a/tests/golden/load-grep-json.grep b/tests/golden/load-grep-json.grep new file mode 100644 index 000000000..359dfb5ab --- /dev/null +++ b/tests/golden/load-grep-json.grep @@ -0,0 +1,16 @@ +{"file": "benchmark.csv", "line_no": 36, "text": "8/16/2018 5:15p,Michael Smith,BIRD160,\"Parakeet, Blue (Melopsittacus undulatus)\",1,29.95,$31.85\r"} +{"file": "benchmark.jsonl", "line_no": 36, "text": "{\"Date\": \"8/16/2018 5:15p\", \"Customer\": \"Michael Smith\", \"SKU\": \"BIRD160\", \"Item\": \"Parakeet, Blue (Melopsittacus undulatus)\", \"Quantity\": \"1\", \"Unit\": \"29.95\", \"Paid\": \"$31.85\"}"} +{"file": "benchmark.lsv", "line_no": 266, "text": "Customer: Michael Smith"} +{"file": "benchmark.xml", "line_no": 346, "text": " Michael Smith"} +{"file": "benchmark.yml", "line_no": 239, "text": "- customer: Michael Smith"} +{"file": "numeric-cols.tsv", "line_no": 16, "text": "2016-09-01\tCentral\tSmith\tDesk\t2\t125.00\t250.00"} +{"file": "numeric-cols.tsv", "line_no": 22, "text": "2016-12-12\tCentral\tSmith\tPencil\t67\t1.29\t86.43"} +{"file": "numeric-cols.tsv", "line_no": 25, "text": "2017-02-01\tCentral\tSmith\tBinder\t87\t15.00\t1305.00"} +{"file": "sample.tsv", "line_no": 17, "text": "2016-09-01\tCentral\tSmith\tDesk\t2\t125.00\t250.00"} +{"file": "sample.tsv", "line_no": 23, "text": "2016-12-12\tCentral\tSmith\tPencil\t67\t1.29\t86.43"} +{"file": "sample.tsv", "line_no": 26, "text": "2017-02-01\tCentral\tSmith\tBinder\t87\t15.00\t1305.00"} +{"file": "sample.vds", "line_no": 23, "text": "{\"OrderDate\": \"2016-09-01\", \"Region\": \"Central\", \"Rep\": \"Smith\", \"Item\": \"Desk\", \"Units\": \"2\", \"Unit_Cost\": \"125.00\", \"Total\": \"250.00\"}"} +{"file": "sample.vds", "line_no": 29, "text": "{\"OrderDate\": \"2016-12-12\", \"Region\": \"Central\", \"Rep\": \"Smith\", \"Item\": \"Pencil\", \"Units\": \"67\", \"Unit_Cost\": \"1.29\", \"Total\": \"86.43\"}"} +{"file": "sample.vds", "line_no": 32, "text": "{\"OrderDate\": \"2017-02-01\", \"Region\": \"Central\", \"Rep\": \"Smith\", \"Item\": \"Binder\", \"Units\": \"87\", \"Unit_Cost\": \"15.00\", \"Total\": \"1305.00\"}"} +{"file": "sample.vds", "line_no": 94, "text": "{\"Date\": \"2018-08-16 17:15\", \"Customer\": \"Michael Smith\", \"SKU\": \"BIRD160\", \"Item\": \"Parakeet, Blue (Melopsittacus undulatus)\", \"Quantity\": \"1\", \"Unit\": \"29.95\", \"Paid\": \"31.85\"}"} +{"file": "sunshinelist.html", "line_no": 194, "text": "\t\t\t \t\t\tKevin Smith"} diff --git a/tests/golden/load-grep-standard.grep b/tests/golden/load-grep-standard.grep new file mode 100644 index 000000000..c31d0e123 --- /dev/null +++ b/tests/golden/load-grep-standard.grep @@ -0,0 +1,16 @@ +{"file": "benchmark.csv", "line_no": 36, "text": "8/16/2018 5:15p,Michael Smith,BIRD160,\"Parakeet, Blue (Melopsittacus undulatus)\",1,29.95,$31.85"} +{"file": "benchmark.jsonl", "line_no": 36, "text": "{\"Date\": \"8/16/2018 5:15p\", \"Customer\": \"Michael Smith\", \"SKU\": \"BIRD160\", \"Item\": \"Parakeet, Blue (Melopsittacus undulatus)\", \"Quantity\": \"1\", \"Unit\": \"29.95\", \"Paid\": \"$31.85\"}"} +{"file": "benchmark.lsv", "line_no": 266, "text": "Customer: Michael Smith"} +{"file": "benchmark.xml", "line_no": 346, "text": " Michael Smith"} +{"file": "benchmark.yml", "line_no": 239, "text": "- customer: Michael Smith"} +{"file": "numeric-cols.tsv", "line_no": 16, "text": "2016-09-01\tCentral\tSmith\tDesk\t2\t125.00\t250.00"} +{"file": "numeric-cols.tsv", "line_no": 22, "text": "2016-12-12\tCentral\tSmith\tPencil\t67\t1.29\t86.43"} +{"file": "numeric-cols.tsv", "line_no": 25, "text": "2017-02-01\tCentral\tSmith\tBinder\t87\t15.00\t1305.00"} +{"file": "sample.tsv", "line_no": 17, "text": "2016-09-01\tCentral\tSmith\tDesk\t2\t125.00\t250.00"} +{"file": "sample.tsv", "line_no": 23, "text": "2016-12-12\tCentral\tSmith\tPencil\t67\t1.29\t86.43"} +{"file": "sample.tsv", "line_no": 26, "text": "2017-02-01\tCentral\tSmith\tBinder\t87\t15.00\t1305.00"} +{"file": "sample.vds", "line_no": 23, "text": "{\"OrderDate\": \"2016-09-01\", \"Region\": \"Central\", \"Rep\": \"Smith\", \"Item\": \"Desk\", \"Units\": \"2\", \"Unit_Cost\": \"125.00\", \"Total\": \"250.00\"}"} +{"file": "sample.vds", "line_no": 29, "text": "{\"OrderDate\": \"2016-12-12\", \"Region\": \"Central\", \"Rep\": \"Smith\", \"Item\": \"Pencil\", \"Units\": \"67\", \"Unit_Cost\": \"1.29\", \"Total\": \"86.43\"}"} +{"file": "sample.vds", "line_no": 32, "text": "{\"OrderDate\": \"2017-02-01\", \"Region\": \"Central\", \"Rep\": \"Smith\", \"Item\": \"Binder\", \"Units\": \"87\", \"Unit_Cost\": \"15.00\", \"Total\": \"1305.00\"}"} +{"file": "sample.vds", "line_no": 94, "text": "{\"Date\": \"2018-08-16 17:15\", \"Customer\": \"Michael Smith\", \"SKU\": \"BIRD160\", \"Item\": \"Parakeet, Blue (Melopsittacus undulatus)\", \"Quantity\": \"1\", \"Unit\": \"29.95\", \"Paid\": \"31.85\"}"} +{"file": "sunshinelist.html", "line_no": 194, "text": "\t\t\t \t\t\tKevin Smith"} diff --git a/tests/load-grep-json.vd b/tests/load-grep-json.vd new file mode 100644 index 000000000..43bd9d11c --- /dev/null +++ b/tests/load-grep-json.vd @@ -0,0 +1,2 @@ +sheet col row longname input keystrokes comment + open-file sample_data/smiths-json.grep o diff --git a/tests/load-grep-standard.vd b/tests/load-grep-standard.vd new file mode 100644 index 000000000..5e57098c7 --- /dev/null +++ b/tests/load-grep-standard.vd @@ -0,0 +1,2 @@ +sheet col row longname input keystrokes comment + open-file sample_data/smiths-standard.grep o diff --git a/visidata/guide.py b/visidata/guide.py index 91a2815f1..d42b0c268 100644 --- a/visidata/guide.py +++ b/visidata/guide.py @@ -65,6 +65,7 @@ # Specific use cases XsvGuide ("CSV/TSV and other text-delimited formats") +GrepSheet ("Load output of grep-like tools") # advanced usage and developers diff --git a/visidata/guides/GrepSheet.md b/visidata/guides/GrepSheet.md new file mode 100644 index 000000000..f07f58fb9 --- /dev/null +++ b/visidata/guides/GrepSheet.md @@ -0,0 +1,28 @@ +--- +sheettype: GrepSheet +--- + +# Using grep sheet + +The **GrepSheet** allows you to examine output of grep-like tools for line search. +The typical way to use it would be: + grep -H -n pat file1 file2 | vd -f grep +You can also load from a saved file ending in .grep: + grep -H -n pat file1 file2 > out.grep; vd -f grep out.grep + +**GrepSheet** works with other grep-like searchers, like the best-in-class ripgrep: + rg --sort path --json pat file1 file2 | vd -f grep +or git-grep: + git grep -n class | vd -f grep + +## Editing files by pressing Enter + +- {help.commands.sysopen_row} + +If the file path is relative to a different directory from the current +directory, use [:code]options.grep_base_dir[/] to specify the base directory +for all relative paths. `$EDITOR` is a set environment variable. + +## Options to control GrepSheet behavior + +- {help.options.grep_base_dir} diff --git a/visidata/loaders/grep.py b/visidata/loaders/grep.py new file mode 100644 index 000000000..0fc0e735c --- /dev/null +++ b/visidata/loaders/grep.py @@ -0,0 +1,121 @@ +#!/usr/bin/python3 + +from visidata import vd, VisiData, JsonSheet, ColumnAttr, Path, ENTER, AttrDict, ExpectedException, stacktrace, TypedExceptionWrapper +import json +import os +from os import linesep + +@VisiData.api +def open_grep(vd, p): + return GrepSheet(p.base_stem, source=p) + +@VisiData.api +def save_grep(vd, p, *vsheets): + vd.save_jsonl(p, *vsheets) + +def format_row(rowdict): + # handle rows that are output of 'rg --json' + if 'type' in rowdict and rowdict['type'] == 'match': + match_data = rowdict['data'] + d = { + 'file': match_data['path']['text'], + 'line_no': match_data['line_number'], + 'text': match_data['lines']['text'].rstrip(linesep) + } + return AttrDict(d) + # handle a .grep file that was saved by visidata, or + # ripgrep rows that were preprocessed by jq: 'rg --json |jq [...]' + if 'line_no' in rowdict: + rowdict['text'] = rowdict['text'].rstrip(linesep) + return AttrDict(rowdict) + return None + +class GrepSheet(JsonSheet): + # The input file is in JSON Lines format, where each line describes a JSON object. + # The JSON objects are either in the ripgrep grep_printer format: + # https://docs.rs/grep-printer/0.1.0/grep_printer/struct.JSON.html + # or contain the keys 'file', 'line_no', and 'text'. + _rowtype = 'lines' # rowdef: AttrDict + + columns = [ + ColumnAttr('file', type=str), + ColumnAttr('line_no', type=int), + ColumnAttr('text', type=str) + ] + nKeys = 2 + def iterload(self): + with self.open_text_source() as fp: + for L in fp: + try: + if not L: # skip blank lines + continue + json_obj = json.loads(L) + if not isinstance(json_obj, dict): + vd.fail(f'line does not hold a JSON object: {L}') + row = format_row(json_obj) + if not row: #skip lines that do not contain match data + continue + yield row + except ValueError as e: + if self.rows: # if any rows have been added already + e.stacktrace = stacktrace() + yield TypedExceptionWrapper(json.loads, L, exception=e) # an error on one line + else: + # If input is not JSON, parse it as output of 'grep -n': file:line_no:text + # If that does not parse, parse it as output of typical 'grep': file:text + with self.open_text_source() as fp: + try: + extract_line_no = True + for L in fp: + L = L.rstrip(linesep) + sep1 = L.index(':') + if extract_line_no: + sep2 = L.find(':', sep1+1) + try: + if sep2 == -1: raise ValueError + line_no = int(L[sep1+1:sep2]) # may raise ValueError + if line_no < 1: raise ValueError + text = L[sep2+1:] + except ValueError: # if we can't find a line_no that is > 0, with a separator after it + extract_line_no = False + line_no = None + text = L[sep1+1:] + else: + text = L[sep1+1:] + yield AttrDict({'file': L[:sep1], + 'line_no': line_no, + 'text': text}) + except ValueError: + vd.fail('file is not grep output') + break + + def afterLoad(self): + if self.nRows == 0: + vd.status('no grep results found in input data') + +@GrepSheet.api +def sysopen_row(sheet, row): + '''Open the file in an editor at the specific line.''' + if sheet.nRows == 0: return + try: + given = row.file + if vd.options.grep_base_dir and not os.path.isabs(given): + given = vd.options.grep_base_dir + os.sep + row.file + p = Path(given) + except TypeError: + vd.fail(f'cannot open row: {given}') + if p.exists(): + # works for vim and emacsclient + if row.line_no is not None: + vd.launchEditor(p.given, f'+{row.line_no:d}') + else: + vd.launchEditor(p.given) + else: + vd.fail(f'cannot find file: {p.given}') + +GrepSheet.addCommand(ENTER, 'sysopen-row', 'sysopen_row(cursorRow)', 'open current file in external $EDITOR, at the line') + +vd.addGlobals({ + 'GrepSheet': GrepSheet, +}) +vd.option('grep_base_dir', None, 'base directory for relative paths opened with sysopen-row')