From 9011633db01ced79c1bd9db33fee42a96d660e72 Mon Sep 17 00:00:00 2001 From: Jud Dagnall <github@dagnall.net> Date: Tue, 8 Oct 2024 23:24:28 -0700 Subject: [PATCH 1/5] Add a new regex guide. --- visidata/guides/RegexGuide.md | 62 +++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 visidata/guides/RegexGuide.md diff --git a/visidata/guides/RegexGuide.md b/visidata/guides/RegexGuide.md new file mode 100644 index 000000000..bbd220f3e --- /dev/null +++ b/visidata/guides/RegexGuide.md @@ -0,0 +1,62 @@ +# Matching And Transforming Strings With Regexes + +Visidata has built-in support for Regular Expressions (regexes). There are many commands that enable you to search and transform your data using these patterns. + + +## Select Rows + +- {help.commands.select-col-regex} +- {help.commands.select-cols-regex} + +- {help.commands.unselect-col-regex} +- {help.commands.unselect-cols-regex} + +## Search + +### Column Name Search + +- {help.commands.go-col-regex} + +### Search Within Cells + +- {help.commands.search-col} +- {help.commands.search-cols} + +- {help.commands.searchr-col} +- {help.commands.searchr-cols} + +- {help.commands.search-next} +- {help.commands.searchr-next} + +### Search Within Key Column(s) + +- {help.commands.search-keys} + +This command limits searches only to the key columns + +## Substitution + +- {help.commands.setcol-regex-subst} +- {help.commands.setcol-regex-subst-all} + +Note that after starting these command, there are three steps: +1. Enter the search regex +2. Press Tab, and enter replacement string +3. Press Enter to activate the replacement + +Pressing enter after step 1 will immediately execute the replacement, REMOVING whatever was matched. + +# Column Creation + +- {help.commands.addcol-regex-subst} + +- {help.commands.addcol-split} + +Example: split directory paths /foo/bar/baz.jpg + +- {help.commands.addcol-capture} + +Example: extract the values from 'key=value key2=val2' pairs + + + From a65c8513103d72c5ae922f12a0d0393559e59fa7 Mon Sep 17 00:00:00 2001 From: anjakefala <anja.kefala@gmail.com> Date: Thu, 10 Oct 2024 22:36:18 -0700 Subject: [PATCH 2/5] Clean up regex guide --- visidata/features/regex.py | 10 +++++----- visidata/guides/RegexGuide.md | 23 +++-------------------- 2 files changed, 8 insertions(+), 25 deletions(-) diff --git a/visidata/features/regex.py b/visidata/features/regex.py index 94983aceb..7fc965cc9 100644 --- a/visidata/features/regex.py +++ b/visidata/features/regex.py @@ -126,12 +126,12 @@ def inputRegexSubst(vd, prompt): after=dict(type='regex-replace', prompt='replace: ', help=prompt)) -Sheet.addCommand(':', 'addcol-split', 'addColumnAtCursor(RegexColumn(makeRegexSplitter, cursorCol, inputRegex("split regex: ", type="regex-split")))', 'Add column split by regex') -Sheet.addCommand(';', 'addcol-capture', 'addColumnAtCursor(RegexColumn(makeRegexMatcher, cursorCol, inputRegex("capture regex: ", type="regex-capture")))', 'Add column captured by regex') +Sheet.addCommand(':', 'addcol-split', 'addColumnAtCursor(RegexColumn(makeRegexSplitter, cursorCol, inputRegex("split regex: ", type="regex-split")))', 'add column split by regex') +Sheet.addCommand(';', 'addcol-capture', 'addColumnAtCursor(RegexColumn(makeRegexMatcher, cursorCol, inputRegex("capture regex: ", type="regex-capture")))', 'add column captured by regex') -Sheet.addCommand('*', 'addcol-regex-subst', 'addColumnAtCursor(Column(cursorCol.name + "_re", getter=regexTransform(cursorCol, **inputRegexSubst("regex transform column"))))', 'add column derived from current column, replacing regex with subst (may include \1 backrefs)') -Sheet.addCommand('g*', 'setcol-regex-subst', 'setValuesFromRegex([cursorCol], someSelectedRows, **inputRegexSubst("regex transform column"))', 'regex/subst - modify selected rows in current column, replacing regex with subst, (may include backreferences \\1 etc)') -Sheet.addCommand('gz*', 'setcol-regex-subst-all', 'setValuesFromRegex(visibleCols, someSelectedRows, **inputRegexSubst(f"regex transform {nVisibleCols} columns"))', 'modify selected rows in all visible columns, replacing regex with subst (may include \\1 backrefs)') +Sheet.addCommand('*', 'addcol-regex-subst', 'addColumnAtCursor(Column(cursorCol.name + "_re", getter=regexTransform(cursorCol, **inputRegexSubst("regex transform column"))))', 'add column derived from current column, replacing `search` regex with `replace` (may include \1 backrefs)') +Sheet.addCommand('g*', 'setcol-regex-subst', 'setValuesFromRegex([cursorCol], someSelectedRows, **inputRegexSubst("regex transform column"))', 'modify selected rows in current column, replacing `search` regex with `replace`, (may include backreferences \\1 etc)') +Sheet.addCommand('gz*', 'setcol-regex-subst-all', 'setValuesFromRegex(visibleCols, someSelectedRows, **inputRegexSubst(f"regex transform {nVisibleCols} columns"))', 'modify selected rows in all visible columns, replacing `search` regex with `replace` (may include \\1 backrefs)') vd.addMenuItems(''' diff --git a/visidata/guides/RegexGuide.md b/visidata/guides/RegexGuide.md index bbd220f3e..abb112715 100644 --- a/visidata/guides/RegexGuide.md +++ b/visidata/guides/RegexGuide.md @@ -1,7 +1,6 @@ # Matching And Transforming Strings With Regexes -Visidata has built-in support for Regular Expressions (regexes). There are many commands that enable you to search and transform your data using these patterns. - +Visidata has built-in support for using Regular Expressions as input for some commands. This includes many commands that enable you to search and transform your data using these patterns. ## Select Rows @@ -13,12 +12,8 @@ Visidata has built-in support for Regular Expressions (regexes). There are many ## Search -### Column Name Search - - {help.commands.go-col-regex} -### Search Within Cells - - {help.commands.search-col} - {help.commands.search-cols} @@ -28,23 +23,15 @@ Visidata has built-in support for Regular Expressions (regexes). There are many - {help.commands.search-next} - {help.commands.searchr-next} -### Search Within Key Column(s) - - {help.commands.search-keys} -This command limits searches only to the key columns - ## Substitution - {help.commands.setcol-regex-subst} - {help.commands.setcol-regex-subst-all} -Note that after starting these command, there are three steps: -1. Enter the search regex -2. Press Tab, and enter replacement string -3. Press Enter to activate the replacement - -Pressing enter after step 1 will immediately execute the replacement, REMOVING whatever was matched. +Press `Tab` to move between `search` and `replace` inputs. +Only including a `search` will remove whatever was matched. # Column Creation @@ -52,11 +39,7 @@ Pressing enter after step 1 will immediately execute the replacement, REMOVING w - {help.commands.addcol-split} -Example: split directory paths /foo/bar/baz.jpg - - {help.commands.addcol-capture} -Example: extract the values from 'key=value key2=val2' pairs - From b6e5c942a62458e789b37ecd7dfc089ae0983ff6 Mon Sep 17 00:00:00 2001 From: anjakefala <anja.kefala@gmail.com> Date: Fri, 11 Oct 2024 19:38:49 -0700 Subject: [PATCH 3/5] Add examples --- visidata/guides/RegexGuide.md | 37 +++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/visidata/guides/RegexGuide.md b/visidata/guides/RegexGuide.md index abb112715..12dd4a1af 100644 --- a/visidata/guides/RegexGuide.md +++ b/visidata/guides/RegexGuide.md @@ -1,3 +1,6 @@ +--- +sheet: Sheet +--- # Matching And Transforming Strings With Regexes Visidata has built-in support for using Regular Expressions as input for some commands. This includes many commands that enable you to search and transform your data using these patterns. @@ -41,5 +44,39 @@ Only including a `search` will remove whatever was matched. - {help.commands.addcol-capture} +## Examples +Sample input sheet **sales**: + + date price + ---------- ----- + 2024-09-01 30 + 2024-09-02 28 + 2024-09-03 100 + +1. [:keys]:[/] (`addcol-split`) on **date** column, followed by `-` to split on `-` character. + + date date_re price + ---------- ---------------- ----- + 2024-09-01 [3] 2024; 09; 01 30 + 2024-09-02 [3] 2024; 09; 02 28 + 2024-09-03 [3] 2024; 09; 03 100 + +Note that the resulting `date_re` column is of type **List**. + +2. Press [:keys]([/] (`expand-col`) to expand it to multiple individual columns. + + date date_re[0] date_re[1] date_re[2] price + ---------- ---------- ---------- ---------- ----- + 2024-09-01 2024 09 01 30 + 2024-09-02 2024 09 02 28 + 2024-09-03 2024 09 03 100 + +3. On the **date** column, press [:keys]*[/] (`addcol-regex-subst`). Beside search type `-`. Press `Tab` and beside replace type `,` to replace all `-` with `,`. + + date date_re price + ---------- ---------- ----- + 2024-09-01 2024,09,01 30 + 2024-09-02 2024,09,02 28 + 2024-09-03 2024,09,03 100 From 41abe0b2652ad30ef6577de35aa83cf51ddeaf4e Mon Sep 17 00:00:00 2001 From: anjakefala <anja.kefala@gmail.com> Date: Fri, 11 Oct 2024 20:36:29 -0700 Subject: [PATCH 4/5] Add capture example; cleanup by Saul --- visidata/guides/RegexGuide.md | 41 ++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/visidata/guides/RegexGuide.md b/visidata/guides/RegexGuide.md index 12dd4a1af..2c03149e6 100644 --- a/visidata/guides/RegexGuide.md +++ b/visidata/guides/RegexGuide.md @@ -1,9 +1,9 @@ --- sheet: Sheet --- -# Matching And Transforming Strings With Regexes +# Matching and Transforming Strings with Regex -Visidata has built-in support for using Regular Expressions as input for some commands. This includes many commands that enable you to search and transform your data using these patterns. +Some commands for selecting, searching, and transforming data, accept a regular expression as input. ## Select Rows @@ -33,19 +33,19 @@ Visidata has built-in support for using Regular Expressions as input for some co - {help.commands.setcol-regex-subst} - {help.commands.setcol-regex-subst-all} -Press `Tab` to move between `search` and `replace` inputs. -Only including a `search` will remove whatever was matched. +`Tab` to move between `search` and `replace` inputs. +An empty `replace` removes the matching string. # Column Creation - {help.commands.addcol-regex-subst} - - {help.commands.addcol-split} - - {help.commands.addcol-capture} ## Examples +### Split + Sample input sheet **sales**: date price @@ -54,7 +54,7 @@ Sample input sheet **sales**: 2024-09-02 28 2024-09-03 100 -1. [:keys]:[/] (`addcol-split`) on **date** column, followed by `-` to split on `-` character. +1. [:code]:[/] (`addcol-split`) on **date** column, followed by `-` to split on hyphens. date date_re price ---------- ---------------- ----- @@ -62,9 +62,9 @@ Sample input sheet **sales**: 2024-09-02 [3] 2024; 09; 02 28 2024-09-03 [3] 2024; 09; 03 100 -Note that the resulting `date_re` column is of type **List**. +Note that the results in the `date_re` column are lists of length 3. -2. Press [:keys]([/] (`expand-col`) to expand it to multiple individual columns. +2. [:code]([/] (`expand-col`) to expand a column with lists into multiple columns with the list elements. date date_re[0] date_re[1] date_re[2] price ---------- ---------- ---------- ---------- ----- @@ -72,7 +72,9 @@ Note that the resulting `date_re` column is of type **List**. 2024-09-02 2024 09 02 28 2024-09-03 2024 09 03 100 -3. On the **date** column, press [:keys]*[/] (`addcol-regex-subst`). Beside search type `-`. Press `Tab` and beside replace type `,` to replace all `-` with `,`. +### Substitution + +1. On the **date** column, [:code]*[/] (`addcol-regex-subst`) and type `-`, then `Tab` to "replace" and type `,`. Then `Enter` to replace all `-` with `,`. date date_re price ---------- ---------- ----- @@ -80,3 +82,22 @@ Note that the resulting `date_re` column is of type **List**. 2024-09-02 2024,09,02 28 2024-09-03 2024,09,03 100 +### Capture + +1. On the **date** column, [:code];[/] (`addcol-capture`) and type `(\d\d\d\d)` to capture and pull out the year. + + date date_re price + ---------- -------- ----- + 2024-09-01 [1] 2024 30 + 2024-09-02 [1] 2024 28 + 2024-09-03 [1] 2024 100 + +Note that the results in the `date_re` column are lists of length 1. + +2. [:code]([/] (`expand-col`) to expand a column with lists into multiple columns with the list elements. + + date date_re[0] price + ---------- ---------- ----- + 2024-09-01 2024 30 + 2024-09-02 2024 28 + 2024-09-03 2024 100 From fe2cb11ca178b89cbb1c2d0f54e652e2156e9c04 Mon Sep 17 00:00:00 2001 From: anjakefala <anja.kefala@gmail.com> Date: Fri, 11 Oct 2024 20:39:41 -0700 Subject: [PATCH 5/5] Missing escape char --- visidata/features/regex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/visidata/features/regex.py b/visidata/features/regex.py index 7fc965cc9..3ec3990f0 100644 --- a/visidata/features/regex.py +++ b/visidata/features/regex.py @@ -129,7 +129,7 @@ def inputRegexSubst(vd, prompt): Sheet.addCommand(':', 'addcol-split', 'addColumnAtCursor(RegexColumn(makeRegexSplitter, cursorCol, inputRegex("split regex: ", type="regex-split")))', 'add column split by regex') Sheet.addCommand(';', 'addcol-capture', 'addColumnAtCursor(RegexColumn(makeRegexMatcher, cursorCol, inputRegex("capture regex: ", type="regex-capture")))', 'add column captured by regex') -Sheet.addCommand('*', 'addcol-regex-subst', 'addColumnAtCursor(Column(cursorCol.name + "_re", getter=regexTransform(cursorCol, **inputRegexSubst("regex transform column"))))', 'add column derived from current column, replacing `search` regex with `replace` (may include \1 backrefs)') +Sheet.addCommand('*', 'addcol-regex-subst', 'addColumnAtCursor(Column(cursorCol.name + "_re", getter=regexTransform(cursorCol, **inputRegexSubst("regex transform column"))))', 'add column derived from current column, replacing `search` regex with `replace` (may include \\1 backrefs)') Sheet.addCommand('g*', 'setcol-regex-subst', 'setValuesFromRegex([cursorCol], someSelectedRows, **inputRegexSubst("regex transform column"))', 'modify selected rows in current column, replacing `search` regex with `replace`, (may include backreferences \\1 etc)') Sheet.addCommand('gz*', 'setcol-regex-subst-all', 'setValuesFromRegex(visibleCols, someSelectedRows, **inputRegexSubst(f"regex transform {nVisibleCols} columns"))', 'modify selected rows in all visible columns, replacing `search` regex with `replace` (may include \\1 backrefs)')