From d44df91a4be229dad5090186c21daed9cc6f5294 Mon Sep 17 00:00:00 2001 From: jburnett Date: Mon, 25 Sep 2023 13:18:41 -0400 Subject: [PATCH 01/23] Add pta link to readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d2611e6..f44d65f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # pta-parser -A Plain Text Accounting parser built in [Rust](https://www.rust-lang.org/) with [Pest](https://pest.rs/) +A [Plain Text Accounting](https://plaintextaccounting.org/) parser built in [Rust](https://www.rust-lang.org/) with [Pest](https://pest.rs/) ## Copyright Notice From 379c63372027300af57ae0ccb0f48444dcf04b73 Mon Sep 17 00:00:00 2001 From: jburnett Date: Mon, 25 Sep 2023 13:18:55 -0400 Subject: [PATCH 02/23] create CI workflow --- .github/workflows/ci.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..66de1e3 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,22 @@ +name: CI + +on: + push: + branches: [ "main", "dev" ] + pull_request: + branches: [ "main" ] + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Build + run: cargo build --verbose + - name: Run tests + run: cargo test --verbose \ No newline at end of file From a4d23c442f11b8588748586855d5fd83c25d0aab Mon Sep 17 00:00:00 2001 From: jburnett Date: Mon, 25 Sep 2023 13:38:57 -0400 Subject: [PATCH 03/23] Initial code migrated from other project --- Cargo.toml | 5 + pta-parser/Cargo.toml | 15 + pta-parser/src/grammars/ledger.pest | 113 ++++++++ pta-parser/src/lib.rs | 407 ++++++++++++++++++++++++++++ 4 files changed, 540 insertions(+) create mode 100644 Cargo.toml create mode 100644 pta-parser/Cargo.toml create mode 100644 pta-parser/src/grammars/ledger.pest create mode 100644 pta-parser/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..8fb0c02 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,5 @@ +[workspace] +resolver = "2" +members = [ + "pta-parser" +] diff --git a/pta-parser/Cargo.toml b/pta-parser/Cargo.toml new file mode 100644 index 0000000..9ba78a5 --- /dev/null +++ b/pta-parser/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "pta-parser" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +bench = false + +[dependencies] +pest = "2.7.3" +pest_derive = "2.7.3" + +[dev-dependencies] +rstest = "0.18.2" diff --git a/pta-parser/src/grammars/ledger.pest b/pta-parser/src/grammars/ledger.pest new file mode 100644 index 0000000..e2994d1 --- /dev/null +++ b/pta-parser/src/grammars/ledger.pest @@ -0,0 +1,113 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under +// Pest's built-in rules: +// ASCII_ALPHA_LOWER = { 'a'..'z' } +// ASCII_ALPHA_UPPER = { 'A'..'Z' } +// ASCII_ALPHA = { ASCII_ALPHA_LOWER | ASCII_ALPHA_UPPER } +// ASCII_DIGIT = { '0'..'9' } +// ASCII_ALPHANUMERIC = { ASCII_ALPHA | ASCII-DIGIT } +// +// Avoid using WHITE_SPACE which targets [unicode](https://www.unicode.org/reports/tr31/#R3a) +// + +WHITESPACE = _{ " " | "\t" } + +// constants +acct_separator = { ":" } +comment_token = { ";" } + +// TODO: need to handle escaped semi-colon? +// TODO: consider whether comment must be preceded by whitespace (except at beginning of line) +// a comment +comment = { comment_token ~ (!NEWLINE ~ ANY)* ~ NEWLINE } +comment_or_newline = { (WHITESPACE+ ~ comment) | (WHITESPACE* ~ NEWLINE) } + +// Each acct token must begin with alpha and may be followed by any number of alpha or number +// Full account descriptors are comprised of colon-separated account names. The top-level +// account name must begin with an alpha char, but subaccounts may begin with alphanumeric. +top_level_acct = @{ ASCII_ALPHA ~ ASCII_ALPHANUMERIC* } +sub_acct = @{ acct_separator ~ ASCII_ALPHANUMERIC+ } + +// The full acct descriptor must be one or more acct tokens, each separated by a colon +acct_descriptor = @{ top_level_acct ~ (sub_acct)* } + +decimal_value = @{ (("-" ~ NUMBER+) | NUMBER+) ~ "." ~ NUMBER+ } + +iso8601_year = { ASCII_DIGIT{4} } +iso8601_month = @{ ( "0" ~ ASCII_NONZERO_DIGIT) | ("1" ~ '0'..'2') } +iso8601_day = @{ ("30" | "31") | ("0" ~ ASCII_NONZERO_DIGIT) | ('1'..'2' ~ ASCII_DIGIT) } +iso8601_date_extended = @{ iso8601_year ~ "-" ~ iso8601_month ~ "-" ~ iso8601_day } // YYYY-MM-DD + + +// TODO: consider more lax indent rules +posting_indent = { "\t" | " "{2} } +posting_basic = @{ + posting_indent + ~ acct_descriptor + ~ WHITESPACE+ ~ decimal_value + ~ comment_or_newline +} + +// TODO: improve on 'text' to allow more in description +trans_description_text = { (ASCII_ALPHANUMERIC | WHITESPACE)+ } +// TODO: full set of annotation options +trans_annotation = { "*" | "!" } +trans_description = { "\"" ~ (ASCII_ALPHANUMERIC+ | WHITESPACE)+ ~ "\"" } + +// TODO: how to ensure col 0 / no ws for header row +trans_header = @{ + iso8601_date_extended + ~ WHITESPACE+ + ~ trans_annotation + ~ WHITESPACE+ + ~ trans_description +} + +transaction_block = { trans_header ~ posting_basic+ } + + +currency = { ASCII_ALPHA_UPPER{3} } +commodity = { ASCII_ALPHA+ } // TODO: should commodity allow numbers? +options = { "operating_currency" } + +// TODO: open works but is incomplete +// YYYY-MM-DD open Account [ConstraintCurrency,...] ["BookingMethod"] +directive_open = @{ + iso8601_date_extended + ~ WHITESPACE+ ~ "open" + ~ WHITESPACE+ ~ acct_descriptor + ~ comment_or_newline +} +// YYYY-MM-DD close Account +directive_close = @{ + iso8601_date_extended + ~ WHITESPACE+ ~ "close" + ~ WHITESPACE+ ~ acct_descriptor + ~ comment_or_newline +} +// YYYY-MM-DD commodity Currency +directive_commodity = @{ + iso8601_date_extended + ~ WHITESPACE+ ~ commodity + ~ WHITESPACE+ ~ currency + ~ comment_or_newline +} +// YYYY-MM-DD balance Account Amount +balance_directive = @{ + iso8601_date_extended + ~ WHITESPACE+ ~ "balance" + ~ WHITESPACE+ ~ acct_descriptor + ~ WHITESPACE+ ~ decimal_value + ~ WHITESPACE+ ~ currency + ~ comment_or_newline +} + +// YYYY-MM-DD pad Account AccountPad +// YYYY-MM-DD note Account Description +// YYYY-MM-DD document Account PathToDocument +// YYYY-MM-DD price Commodity Price +// YYYY-MM-DD event Name Value +// option Name Value +// plugin ModuleName StringConfig +// include Filename diff --git a/pta-parser/src/lib.rs b/pta-parser/src/lib.rs new file mode 100644 index 0000000..425c0e1 --- /dev/null +++ b/pta-parser/src/lib.rs @@ -0,0 +1,407 @@ +extern crate pest; +#[macro_use] +extern crate pest_derive; + + +mod ledger { + + #[derive(Parser)] + #[grammar = "./grammars/ledger.pest"] + pub struct LedgerParser; + +} + + +#[cfg(test)] +mod parser_tests { + use super::*; + + use pest::{Parser, iterators::Pairs}; + use ledger::{LedgerParser, Rule}; + + use rstest::rstest; + + + mod acct_desc { + use super::*; + + #[rstest] + #[case ("a")] + #[case ("a1")] + #[case ("a:a")] + #[case ("a1:a")] + #[case ("a1:a1")] + #[case ("a:123")] // subaccts beginning w/number + #[case ("a1:sub:123")] + #[case ("asset")] + #[case ("asset:property")] + #[case ("asset:property:real")] + fn can_parse_acct_descriptor(#[case] acct_desc: &str) { + + let pairs = LedgerParser::parse( + Rule::acct_descriptor, acct_desc) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + + #[rstest] + // NOTE: invalid lead char in first acct segment ("1bad") fails top_level_acct rule & is included in verify_top_level_acct_parsing_error cases. + #[case ("a1:b@d")] + #[case ("bad1:")] // invalid: ends with acct descriptor separator (colon) + #[should_panic(expected = "expected acct_descriptor")] + #[ignore = "unexpectedly parses without error"] + fn verify_acct_descriptor_parsing_error(#[case] bad_acct_desc: &str) { + + LedgerParser::parse( + Rule::acct_descriptor, bad_acct_desc) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_acct_desc); + assert!(false); + } + + + + #[rstest] + #[case ("1")] // invalid: number as lead char + #[case ("1b")] + #[case ("1-b")] // invalid: non-alphanumeric char + #[case ("1b-")] + #[should_panic(expected = "expected top_level_acct")] + fn verify_top_level_acct_parsing_error(#[case] bad_top_level_acct: &str) { + + LedgerParser::parse( + Rule::top_level_acct, bad_top_level_acct) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_top_level_acct); + assert!(false); + + } + } + + + + mod decimal { + use super::*; + + + #[rstest] + #[case ("0.00000001")] + #[case ("1.23")] + #[case ("123.456")] + #[case ("-123.456789012")] // negative values + #[case ("-0.00000001")] + fn can_parse_decimal_value(#[case] dec: &str) { + + let pairs = LedgerParser::parse( + Rule::decimal_value, dec) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + + #[rstest] + #[case ("0.")] // fractional segment missing + #[case ("-0.")] + #[case ("123")] + #[case ("-123")] + #[case (".12")] // whole segment missing + #[case ("-.12")] + + #[should_panic(expected = "expected decimal_value")] + fn verify_decimal_value_error(#[case] bad_dec: &str) { + + LedgerParser::parse( + Rule::decimal_value, bad_dec) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_dec); + assert!(false); + } + + } + + + + mod iso8601 { + use super::*; + + #[rstest] + #[case ("1900-01-01")] + #[case ("2015-12-31")] + fn can_parse_iso8601_date_extended(#[case] year: &str) { + + let pairs = LedgerParser::parse( + Rule::iso8601_date_extended, year) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + + #[rstest] + #[case ("000-01-01")] // Year out of range + #[case ("99990-01-01")] + #[case ("01-01")] // year segment missing + + #[case ("1999")] // month segment missing + #[case ("1999-")] + #[case ("0000-00-01")] // Month out of range + #[case ("0000-13-01")] + + #[case ("1999-12")] // day segment missing + #[case ("1999-12-")] + #[case ("0000-01-00")] // Day out of range + #[case ("0000-01-32")] + + #[case ("000o-01-01")] // Invalid chars + #[case ("1999-0x-12")] + #[case ("1999-12-0x")] + + #[case ("1999 12-01")] // whitespace (ensure atomic rule modifier is used) + #[case ("1999-12 01")] + #[case (" 1999-12-01")] // leading space (reqs additional rule) + #[should_panic(expected = "expected iso8601_")] // matches errors from multiple iso8601 rules + fn verify_iso8601_date_extended_error(#[case] bad_date: &str) { + + LedgerParser::parse( + Rule::iso8601_date_extended, bad_date) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_date); + assert!(false); + } + } + + + mod posting { + use super::*; + + #[rstest] + #[case (" Assets:subacct1 1.0000")] + #[case ("\tEquity \t -1.0000")] + fn can_parse_posting_basic(#[case] base: &str) { + + // NOTE: addons must end in \n to match rules + let addons = [ + "\n" + ," \n" + ,"\t\n" + ," ; comment 123 ; \n" + ,"\t;\tcomment 123 ;\t\n" + ]; + + for suffix in addons.iter() { + + let tc = format!("{}{}", base, suffix); + println!("Test case: {}", tc); + + assert!(get_pairs(Rule::posting_basic, &tc).len() > 0); + } + } + + + #[rstest] + #[case (" Assets:subacct1 1.0000")] // Too many leading spaces or tabs + #[case (" \tEquity \t -1.0000")] + #[case ("\t Equity \t -1.0000")] + + #[should_panic(expected = "expected posting_basic")] // matches errors from multiple iso8601 rules + fn verify_posting_basic_error(#[case] bad_date: &str) { + + LedgerParser::parse( + Rule::posting_basic, bad_date) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_date); + assert!(false); + } + } + + + mod trans_header { + use super::*; + + + #[rstest] + // NOTE: use simple text in case; test function wraps in dbl quotes + #[case ("a")] + #[case ("description")] + #[case (" a description ")] + #[case ("\ta description\twith tabs ")] + fn can_parse_trans_descr(#[case] descr: &str) { + + let quoted_descr = format!("\"{}\"", descr); + let pairs = LedgerParser::parse( + Rule::trans_description, "ed_descr) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + + #[rstest] + // NOTE: use simple text in case; test function wraps in dbl quotes + #[case ("")] // empty - no text + #[case (" ")] // empty - only ws + #[case ("\ta description\twith tabs and\n a newline")] // newline is invalid + #[should_panic(expected = "expected trans_")] + fn verify_trans_descr_error(#[case] bad_descr: &str) { + + let quoted_bad_descr = format!("\"{}\"", bad_descr); + LedgerParser::parse( + Rule::trans_description, "ed_bad_descr) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", quoted_bad_descr); + assert!(false); + } + + + + #[rstest] + #[case ("2009-01-09 ! \"Bitcoin launch date\"")] + #[case ("2010-01-09 * \"multi whitespace test\"")] + #[case ("2011-01-09\t! \"tab test\"")] + #[case ("2012-01-09 * \"trailing tab test\"\t")] + #[case ("2013-01-09 ! \"trailing spaces test\" ")] + #[case ("2014-01-09 ! \"trailing tabs and spaces test\" \t \t\t ")] + // #[ignore = "TBD: handle special chars in transaction description"] + // #[case ("2009-01-09 ! \"Special chars in description: !@#$%^&*()-_=+\"")] + fn can_parse_trans_header(#[case] base: &str) { + + // NOTE: addons must end in \n to match rules + let addons = [ + "\n" + ," \n" + ,"\t\n" + ," ; comment 123 ; \n" + ,"\t;\tcomment 123 ;\t\n" + ]; + + for suffix in addons.iter() { + + let tc = format!("{}{}", base, suffix); + println!("Test case: {}", tc); + + assert!(get_pairs(Rule::trans_header, &tc).len() > 0); + } + + } + + #[rstest] + #[case ("2016-01-28 * \"comment after description w/o whitespace\"; 10:01 am, xfer id 56aa57787199a73d29000650\n")] + #[should_panic(expected = "expected trans_")] + fn verify_trans_header_error(#[case] bad_hdr: &str) { + + let quoted_bad_descr = format!("\"{}\"", bad_hdr); + LedgerParser::parse( + Rule::trans_header, "ed_bad_descr) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", quoted_bad_descr); + assert!(false); + } + + + } + + + mod trans_block { + use super::*; + + + // An example beancount transaction + // 2016-01-28 * " Buy BTC" ; 10:01 am, xfer id 56aa57787199a73d29000650 + // Assets:Exchanges:Coinbase 1.03683606 BTC { 381.9697397 USD, 2016-01-28 } + // Assets:Bank:AllyChk -400.00 USD ; verified w/register + // Liabilities:Fees:Coinbase 3.96 USD + // Liabilities:Fees:Adjustment 0.00000005 USD + + #[rstest] + #[ignore = "wip"] + #[case ("2009-01-09 ! \"Bitcoin launch date\" + assets:subacct1 1.0000 + equity -1.0000 + ")] + fn can_parse_trans_block(#[case] tblock: &str) { + + let quoted_descr = format!("\"{}\"", tblock); + let pairs = LedgerParser::parse( + Rule::trans_description, "ed_descr) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + + + } + + + + mod directives { + use super::*; + + // YYYY-MM-DD open Account [ConstraintCurrency,...] ["BookingMethod"] + + #[rstest] + #[case (Rule::directive_open, "2001-09-11 open assets")] + #[case (Rule::directive_open, "2001-09-11 open assets:cash")] + #[case (Rule::directive_open, "2001-09-11 open Assets1:cash2:3petty")] + #[case (Rule::directive_close, "2001-09-11 close assets")] + #[case (Rule::directive_close, "2001-09-11 close assets1:2cash:3petty")] + #[case (Rule::directive_commodity, "2001-09-11 thing USD")] + #[case (Rule::balance_directive, "2001-09-11 balance assets 123.456 USD")] + #[case (Rule::balance_directive, "2001-09-11 balance assets1:2cash -0.456 USD")] + fn can_parse_misc_directive(#[case] r: Rule, #[case] base: &str) { + + // NOTE: addons must end in \n to match rules + let addons = [ + "\n" + ," \n" + ,"\t\n" + ," ; comment 123 ; \n" + ,"\t;\tcomment 123 ;\t\n" + ]; + + for suffix in addons.iter() { + + let tc = format!("{}{}", base, suffix); + println!("Test case: {}", tc); + + assert!(get_pairs(r, &tc).len() > 0); + } + } + + } + + + fn get_pairs(r: Rule, content: &str) -> Pairs<'_, Rule> { + let x = LedgerParser::parse( + r, + + content) + .unwrap_or_else(|e| panic!("{}", e)); + + return x; + } + + +} From 54b9b652b4fa80829c951c94e7860a4035777271 Mon Sep 17 00:00:00 2001 From: jburnett Date: Mon, 25 Sep 2023 14:26:15 -0400 Subject: [PATCH 04/23] Rm'd extraneous ledger module; minor edits --- pta-parser/src/grammars/ledger.pest | 14 +- pta-parser/src/ledger_parser.rs | 402 +++++++++++++++++++++++++++ pta-parser/src/lib.rs | 409 +--------------------------- 3 files changed, 419 insertions(+), 406 deletions(-) create mode 100644 pta-parser/src/ledger_parser.rs diff --git a/pta-parser/src/grammars/ledger.pest b/pta-parser/src/grammars/ledger.pest index e2994d1..0eb3ead 100644 --- a/pta-parser/src/grammars/ledger.pest +++ b/pta-parser/src/grammars/ledger.pest @@ -1,6 +1,7 @@ // Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. // -// This project is licensed under +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// // Pest's built-in rules: // ASCII_ALPHA_LOWER = { 'a'..'z' } // ASCII_ALPHA_UPPER = { 'A'..'Z' } @@ -50,10 +51,10 @@ posting_basic = @{ } // TODO: improve on 'text' to allow more in description -trans_description_text = { (ASCII_ALPHANUMERIC | WHITESPACE)+ } -// TODO: full set of annotation options +trans_description_text = { (ASCII_ALPHANUMERIC+ | WHITESPACE)+ } +// TODO: is this the full set of annotation options? trans_annotation = { "*" | "!" } -trans_description = { "\"" ~ (ASCII_ALPHANUMERIC+ | WHITESPACE)+ ~ "\"" } +trans_description = { "\"" ~ trans_description_text ~ "\"" } // TODO: how to ensure col 0 / no ws for header row trans_header = @{ @@ -103,6 +104,7 @@ balance_directive = @{ ~ comment_or_newline } +// TODO: other directives to implement // YYYY-MM-DD pad Account AccountPad // YYYY-MM-DD note Account Description // YYYY-MM-DD document Account PathToDocument @@ -111,3 +113,7 @@ balance_directive = @{ // option Name Value // plugin ModuleName StringConfig // include Filename + +directives = { balance_directive | directive_close | directive_commodity | directive_open } + +ledger = { SOI ~ (options | directives | transaction_block)+ ~ EOI } \ No newline at end of file diff --git a/pta-parser/src/ledger_parser.rs b/pta-parser/src/ledger_parser.rs new file mode 100644 index 0000000..f4275f4 --- /dev/null +++ b/pta-parser/src/ledger_parser.rs @@ -0,0 +1,402 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + + +#[derive(Parser)] +#[grammar = "./grammars/ledger.pest"] +pub struct LedgerParser; + + +#[cfg(test)] +mod parser_tests { + + use super::*; + use pest::{Parser, iterators::Pairs}; + use rstest::rstest; + + + mod acct_desc { + use super::*; + + #[rstest] + #[case ("a")] + #[case ("a1")] + #[case ("a:a")] + #[case ("a1:a")] + #[case ("a1:a1")] + #[case ("a:123")] // subaccts beginning w/number + #[case ("a1:sub:123")] + #[case ("asset")] + #[case ("asset:property")] + #[case ("asset:property:real")] + fn can_parse_acct_descriptor(#[case] acct_desc: &str) { + + let pairs = LedgerParser::parse( + Rule::acct_descriptor, acct_desc) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + + #[rstest] + // NOTE: invalid lead char in first acct segment ("1bad") fails top_level_acct rule & is included in verify_top_level_acct_parsing_error cases. + #[case ("a1:b@d")] + #[case ("bad1:")] // invalid: ends with acct descriptor separator (colon) + #[should_panic(expected = "expected acct_descriptor")] + #[ignore = "unexpectedly parses without error"] + fn verify_acct_descriptor_parsing_error(#[case] bad_acct_desc: &str) { + + LedgerParser::parse( + Rule::acct_descriptor, bad_acct_desc) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_acct_desc); + assert!(false); + } + + + + #[rstest] + #[case ("1")] // invalid: number as lead char + #[case ("1b")] + #[case ("1-b")] // invalid: non-alphanumeric char + #[case ("1b-")] + #[should_panic(expected = "expected top_level_acct")] + fn verify_top_level_acct_parsing_error(#[case] bad_top_level_acct: &str) { + + LedgerParser::parse( + Rule::top_level_acct, bad_top_level_acct) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_top_level_acct); + assert!(false); + + } + } + + + + mod decimal { + use super::*; + + + #[rstest] + #[case ("0.00000001")] + #[case ("1.23")] + #[case ("123.456")] + #[case ("-123.456789012")] // negative values + #[case ("-0.00000001")] + fn can_parse_decimal_value(#[case] dec: &str) { + + let pairs = LedgerParser::parse( + Rule::decimal_value, dec) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + + #[rstest] + #[case ("0.")] // fractional segment missing + #[case ("-0.")] + #[case ("123")] + #[case ("-123")] + #[case (".12")] // whole segment missing + #[case ("-.12")] + + #[should_panic(expected = "expected decimal_value")] + fn verify_decimal_value_error(#[case] bad_dec: &str) { + + LedgerParser::parse( + Rule::decimal_value, bad_dec) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_dec); + assert!(false); + } + + } + + + + mod iso8601 { + use super::*; + + #[rstest] + #[case ("1900-01-01")] + #[case ("2015-12-31")] + fn can_parse_iso8601_date_extended(#[case] year: &str) { + + let pairs = LedgerParser::parse( + Rule::iso8601_date_extended, year) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + + #[rstest] + #[case ("000-01-01")] // Year out of range + #[case ("99990-01-01")] + #[case ("01-01")] // year segment missing + + #[case ("1999")] // month segment missing + #[case ("1999-")] + #[case ("0000-00-01")] // Month out of range + #[case ("0000-13-01")] + + #[case ("1999-12")] // day segment missing + #[case ("1999-12-")] + #[case ("0000-01-00")] // Day out of range + #[case ("0000-01-32")] + + #[case ("000o-01-01")] // Invalid chars + #[case ("1999-0x-12")] + #[case ("1999-12-0x")] + + #[case ("1999 12-01")] // whitespace (ensure atomic rule modifier is used) + #[case ("1999-12 01")] + #[case (" 1999-12-01")] // leading space (reqs additional rule) + #[should_panic(expected = "expected iso8601_")] // matches errors from multiple iso8601 rules + fn verify_iso8601_date_extended_error(#[case] bad_date: &str) { + + LedgerParser::parse( + Rule::iso8601_date_extended, bad_date) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_date); + assert!(false); + } + } + + + mod posting { + use super::*; + + #[rstest] + #[case (" Assets:subacct1 1.0000")] + #[case ("\tEquity \t -1.0000")] + fn can_parse_posting_basic(#[case] base: &str) { + + // NOTE: addons must end in \n to match rules + let addons = [ + "\n" + ," \n" + ,"\t\n" + ," ; comment 123 ; \n" + ,"\t;\tcomment 123 ;\t\n" + ]; + + for suffix in addons.iter() { + + let tc = format!("{}{}", base, suffix); + println!("Test case: {}", tc); + + assert!(get_pairs(Rule::posting_basic, &tc).len() > 0); + } + } + + + #[rstest] + #[case (" Assets:subacct1 1.0000")] // Too many leading spaces or tabs + #[case (" \tEquity \t -1.0000")] + #[case ("\t Equity \t -1.0000")] + + #[should_panic(expected = "expected posting_basic")] // matches errors from multiple iso8601 rules + fn verify_posting_basic_error(#[case] bad_date: &str) { + + LedgerParser::parse( + Rule::posting_basic, bad_date) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_date); + assert!(false); + } + } + + + mod trans_header { + use super::*; + + + #[rstest] + // NOTE: use simple text in case; test function wraps in dbl quotes + #[case ("a")] + #[case ("description")] + #[case (" a description ")] + #[case ("\ta description\twith tabs ")] + fn can_parse_trans_descr(#[case] descr: &str) { + + let quoted_descr = format!("\"{}\"", descr); + let pairs = LedgerParser::parse( + Rule::trans_description, "ed_descr) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + + #[rstest] + // NOTE: use simple text in case; test function wraps in dbl quotes + #[case ("")] // empty - no text + #[case (" ")] // empty - only ws + #[case ("\ta description\twith tabs and\n a newline")] // newline is invalid + #[should_panic(expected = "expected trans_")] + fn verify_trans_descr_error(#[case] bad_descr: &str) { + + let quoted_bad_descr = format!("\"{}\"", bad_descr); + LedgerParser::parse( + Rule::trans_description, "ed_bad_descr) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", quoted_bad_descr); + assert!(false); + } + + + + #[rstest] + #[case ("2009-01-09 ! \"Bitcoin launch date\"")] + #[case ("2010-01-09 * \"multi whitespace test\"")] + #[case ("2011-01-09\t! \"tab test\"")] + #[case ("2012-01-09 * \"trailing tab test\"\t")] + #[case ("2013-01-09 ! \"trailing spaces test\" ")] + #[case ("2014-01-09 ! \"trailing tabs and spaces test\" \t \t\t ")] + // #[ignore = "TBD: handle special chars in transaction description"] + // #[case ("2009-01-09 ! \"Special chars in description: !@#$%^&*()-_=+\"")] + fn can_parse_trans_header(#[case] base: &str) { + + // NOTE: addons must end in \n to match rules + let addons = [ + "\n" + ," \n" + ,"\t\n" + ," ; comment 123 ; \n" + ,"\t;\tcomment 123 ;\t\n" + ]; + + for suffix in addons.iter() { + + let tc = format!("{}{}", base, suffix); + println!("Test case: {}", tc); + + assert!(get_pairs(Rule::trans_header, &tc).len() > 0); + } + + } + + #[rstest] + #[case ("2016-01-28 * \"comment after description w/o whitespace\"; 10:01 am, xfer id 56aa57787199a73d29000650\n")] + #[should_panic(expected = "expected trans_")] + fn verify_trans_header_error(#[case] bad_hdr: &str) { + + let quoted_bad_descr = format!("\"{}\"", bad_hdr); + LedgerParser::parse( + Rule::trans_header, "ed_bad_descr) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", quoted_bad_descr); + assert!(false); + } + + + } + + + mod trans_block { + use super::*; + + + // An example beancount transaction + // 2016-01-28 * " Buy BTC" ; 10:01 am, xfer id 56aa57787199a73d29000650 + // Assets:Exchanges:Coinbase 1.03683606 BTC { 381.9697397 USD, 2016-01-28 } + // Assets:Bank:AllyChk -400.00 USD ; verified w/register + // Liabilities:Fees:Coinbase 3.96 USD + // Liabilities:Fees:Adjustment 0.00000005 USD + + #[rstest] + #[ignore = "wip"] + #[case ("2009-01-09 ! \"Bitcoin launch date\" + assets:subacct1 1.0000 + equity -1.0000 + ")] + fn can_parse_trans_block(#[case] tblock: &str) { + + let quoted_descr = format!("\"{}\"", tblock); + let pairs = LedgerParser::parse( + Rule::trans_description, "ed_descr) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + + + } + + + + mod directives { + use super::*; + + // YYYY-MM-DD open Account [ConstraintCurrency,...] ["BookingMethod"] + + #[rstest] + #[case (Rule::directive_open, "2001-09-11 open assets")] + #[case (Rule::directive_open, "2001-09-11 open assets:cash")] + #[case (Rule::directive_open, "2001-09-11 open Assets1:cash2:3petty")] + #[case (Rule::directive_close, "2001-09-11 close assets")] + #[case (Rule::directive_close, "2001-09-11 close assets1:2cash:3petty")] + #[case (Rule::directive_commodity, "2001-09-11 thing USD")] + #[case (Rule::balance_directive, "2001-09-11 balance assets 123.456 USD")] + #[case (Rule::balance_directive, "2001-09-11 balance assets1:2cash -0.456 USD")] + fn can_parse_misc_directive(#[case] r: Rule, #[case] base: &str) { + + // NOTE: addons must end in \n to match rules + let addons = [ + "\n" + ," \n" + ,"\t\n" + ," ; comment 123 ; \n" + ,"\t;\tcomment 123 ;\t\n" + ]; + + for suffix in addons.iter() { + + let tc = format!("{}{}", base, suffix); + println!("Test case: {}", tc); + + assert!(get_pairs(r, &tc).len() > 0); + } + } + + } + + + fn get_pairs(r: Rule, content: &str) -> Pairs<'_, Rule> { + let x = LedgerParser::parse( + r, + + content) + .unwrap_or_else(|e| panic!("{}", e)); + + return x; + } + + +} diff --git a/pta-parser/src/lib.rs b/pta-parser/src/lib.rs index 425c0e1..f226d2d 100644 --- a/pta-parser/src/lib.rs +++ b/pta-parser/src/lib.rs @@ -1,407 +1,12 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + extern crate pest; #[macro_use] extern crate pest_derive; -mod ledger { - - #[derive(Parser)] - #[grammar = "./grammars/ledger.pest"] - pub struct LedgerParser; - -} - - -#[cfg(test)] -mod parser_tests { - use super::*; - - use pest::{Parser, iterators::Pairs}; - use ledger::{LedgerParser, Rule}; - - use rstest::rstest; - - - mod acct_desc { - use super::*; - - #[rstest] - #[case ("a")] - #[case ("a1")] - #[case ("a:a")] - #[case ("a1:a")] - #[case ("a1:a1")] - #[case ("a:123")] // subaccts beginning w/number - #[case ("a1:sub:123")] - #[case ("asset")] - #[case ("asset:property")] - #[case ("asset:property:real")] - fn can_parse_acct_descriptor(#[case] acct_desc: &str) { - - let pairs = LedgerParser::parse( - Rule::acct_descriptor, acct_desc) - .unwrap_or_else(|e| panic!("{}", e)); - - // Parsing succeeded; ensure at least 1 pair was returned - assert!(pairs.len() > 0); - } - - - #[rstest] - // NOTE: invalid lead char in first acct segment ("1bad") fails top_level_acct rule & is included in verify_top_level_acct_parsing_error cases. - #[case ("a1:b@d")] - #[case ("bad1:")] // invalid: ends with acct descriptor separator (colon) - #[should_panic(expected = "expected acct_descriptor")] - #[ignore = "unexpectedly parses without error"] - fn verify_acct_descriptor_parsing_error(#[case] bad_acct_desc: &str) { - - LedgerParser::parse( - Rule::acct_descriptor, bad_acct_desc) - .unwrap_or_else(|e| panic!("{}", e)); - - // should never reach this code since all cases should result in panic - println!("Test case '{}' should fail to parse!", bad_acct_desc); - assert!(false); - } - - - - #[rstest] - #[case ("1")] // invalid: number as lead char - #[case ("1b")] - #[case ("1-b")] // invalid: non-alphanumeric char - #[case ("1b-")] - #[should_panic(expected = "expected top_level_acct")] - fn verify_top_level_acct_parsing_error(#[case] bad_top_level_acct: &str) { - - LedgerParser::parse( - Rule::top_level_acct, bad_top_level_acct) - .unwrap_or_else(|e| panic!("{}", e)); - - // should never reach this code since all cases should result in panic - println!("Test case '{}' should fail to parse!", bad_top_level_acct); - assert!(false); - - } - } - - - - mod decimal { - use super::*; - - - #[rstest] - #[case ("0.00000001")] - #[case ("1.23")] - #[case ("123.456")] - #[case ("-123.456789012")] // negative values - #[case ("-0.00000001")] - fn can_parse_decimal_value(#[case] dec: &str) { - - let pairs = LedgerParser::parse( - Rule::decimal_value, dec) - .unwrap_or_else(|e| panic!("{}", e)); - - // Parsing succeeded; ensure at least 1 pair was returned - assert!(pairs.len() > 0); - } - - - #[rstest] - #[case ("0.")] // fractional segment missing - #[case ("-0.")] - #[case ("123")] - #[case ("-123")] - #[case (".12")] // whole segment missing - #[case ("-.12")] - - #[should_panic(expected = "expected decimal_value")] - fn verify_decimal_value_error(#[case] bad_dec: &str) { - - LedgerParser::parse( - Rule::decimal_value, bad_dec) - .unwrap_or_else(|e| panic!("{}", e)); - - // should never reach this code since all cases should result in panic - println!("Test case '{}' should fail to parse!", bad_dec); - assert!(false); - } - - } - - - - mod iso8601 { - use super::*; - - #[rstest] - #[case ("1900-01-01")] - #[case ("2015-12-31")] - fn can_parse_iso8601_date_extended(#[case] year: &str) { - - let pairs = LedgerParser::parse( - Rule::iso8601_date_extended, year) - .unwrap_or_else(|e| panic!("{}", e)); - - // Parsing succeeded; ensure at least 1 pair was returned - assert!(pairs.len() > 0); - } - - - #[rstest] - #[case ("000-01-01")] // Year out of range - #[case ("99990-01-01")] - #[case ("01-01")] // year segment missing - - #[case ("1999")] // month segment missing - #[case ("1999-")] - #[case ("0000-00-01")] // Month out of range - #[case ("0000-13-01")] - - #[case ("1999-12")] // day segment missing - #[case ("1999-12-")] - #[case ("0000-01-00")] // Day out of range - #[case ("0000-01-32")] - - #[case ("000o-01-01")] // Invalid chars - #[case ("1999-0x-12")] - #[case ("1999-12-0x")] - - #[case ("1999 12-01")] // whitespace (ensure atomic rule modifier is used) - #[case ("1999-12 01")] - #[case (" 1999-12-01")] // leading space (reqs additional rule) - #[should_panic(expected = "expected iso8601_")] // matches errors from multiple iso8601 rules - fn verify_iso8601_date_extended_error(#[case] bad_date: &str) { - - LedgerParser::parse( - Rule::iso8601_date_extended, bad_date) - .unwrap_or_else(|e| panic!("{}", e)); - - // should never reach this code since all cases should result in panic - println!("Test case '{}' should fail to parse!", bad_date); - assert!(false); - } - } - - - mod posting { - use super::*; - - #[rstest] - #[case (" Assets:subacct1 1.0000")] - #[case ("\tEquity \t -1.0000")] - fn can_parse_posting_basic(#[case] base: &str) { - - // NOTE: addons must end in \n to match rules - let addons = [ - "\n" - ," \n" - ,"\t\n" - ," ; comment 123 ; \n" - ,"\t;\tcomment 123 ;\t\n" - ]; - - for suffix in addons.iter() { - - let tc = format!("{}{}", base, suffix); - println!("Test case: {}", tc); - - assert!(get_pairs(Rule::posting_basic, &tc).len() > 0); - } - } - - - #[rstest] - #[case (" Assets:subacct1 1.0000")] // Too many leading spaces or tabs - #[case (" \tEquity \t -1.0000")] - #[case ("\t Equity \t -1.0000")] - - #[should_panic(expected = "expected posting_basic")] // matches errors from multiple iso8601 rules - fn verify_posting_basic_error(#[case] bad_date: &str) { - - LedgerParser::parse( - Rule::posting_basic, bad_date) - .unwrap_or_else(|e| panic!("{}", e)); - - // should never reach this code since all cases should result in panic - println!("Test case '{}' should fail to parse!", bad_date); - assert!(false); - } - } - - - mod trans_header { - use super::*; - - - #[rstest] - // NOTE: use simple text in case; test function wraps in dbl quotes - #[case ("a")] - #[case ("description")] - #[case (" a description ")] - #[case ("\ta description\twith tabs ")] - fn can_parse_trans_descr(#[case] descr: &str) { - - let quoted_descr = format!("\"{}\"", descr); - let pairs = LedgerParser::parse( - Rule::trans_description, "ed_descr) - .unwrap_or_else(|e| panic!("{}", e)); - - // Parsing succeeded; ensure at least 1 pair was returned - assert!(pairs.len() > 0); - } - - - #[rstest] - // NOTE: use simple text in case; test function wraps in dbl quotes - #[case ("")] // empty - no text - #[case (" ")] // empty - only ws - #[case ("\ta description\twith tabs and\n a newline")] // newline is invalid - #[should_panic(expected = "expected trans_")] - fn verify_trans_descr_error(#[case] bad_descr: &str) { - - let quoted_bad_descr = format!("\"{}\"", bad_descr); - LedgerParser::parse( - Rule::trans_description, "ed_bad_descr) - .unwrap_or_else(|e| panic!("{}", e)); - - // should never reach this code since all cases should result in panic - println!("Test case '{}' should fail to parse!", quoted_bad_descr); - assert!(false); - } - - - - #[rstest] - #[case ("2009-01-09 ! \"Bitcoin launch date\"")] - #[case ("2010-01-09 * \"multi whitespace test\"")] - #[case ("2011-01-09\t! \"tab test\"")] - #[case ("2012-01-09 * \"trailing tab test\"\t")] - #[case ("2013-01-09 ! \"trailing spaces test\" ")] - #[case ("2014-01-09 ! \"trailing tabs and spaces test\" \t \t\t ")] - // #[ignore = "TBD: handle special chars in transaction description"] - // #[case ("2009-01-09 ! \"Special chars in description: !@#$%^&*()-_=+\"")] - fn can_parse_trans_header(#[case] base: &str) { - - // NOTE: addons must end in \n to match rules - let addons = [ - "\n" - ," \n" - ,"\t\n" - ," ; comment 123 ; \n" - ,"\t;\tcomment 123 ;\t\n" - ]; - - for suffix in addons.iter() { - - let tc = format!("{}{}", base, suffix); - println!("Test case: {}", tc); - - assert!(get_pairs(Rule::trans_header, &tc).len() > 0); - } - - } - - #[rstest] - #[case ("2016-01-28 * \"comment after description w/o whitespace\"; 10:01 am, xfer id 56aa57787199a73d29000650\n")] - #[should_panic(expected = "expected trans_")] - fn verify_trans_header_error(#[case] bad_hdr: &str) { - - let quoted_bad_descr = format!("\"{}\"", bad_hdr); - LedgerParser::parse( - Rule::trans_header, "ed_bad_descr) - .unwrap_or_else(|e| panic!("{}", e)); - - // should never reach this code since all cases should result in panic - println!("Test case '{}' should fail to parse!", quoted_bad_descr); - assert!(false); - } - - - } - - - mod trans_block { - use super::*; - - - // An example beancount transaction - // 2016-01-28 * " Buy BTC" ; 10:01 am, xfer id 56aa57787199a73d29000650 - // Assets:Exchanges:Coinbase 1.03683606 BTC { 381.9697397 USD, 2016-01-28 } - // Assets:Bank:AllyChk -400.00 USD ; verified w/register - // Liabilities:Fees:Coinbase 3.96 USD - // Liabilities:Fees:Adjustment 0.00000005 USD - - #[rstest] - #[ignore = "wip"] - #[case ("2009-01-09 ! \"Bitcoin launch date\" - assets:subacct1 1.0000 - equity -1.0000 - ")] - fn can_parse_trans_block(#[case] tblock: &str) { - - let quoted_descr = format!("\"{}\"", tblock); - let pairs = LedgerParser::parse( - Rule::trans_description, "ed_descr) - .unwrap_or_else(|e| panic!("{}", e)); - - // Parsing succeeded; ensure at least 1 pair was returned - assert!(pairs.len() > 0); - } - - - - } - - - - mod directives { - use super::*; - - // YYYY-MM-DD open Account [ConstraintCurrency,...] ["BookingMethod"] - - #[rstest] - #[case (Rule::directive_open, "2001-09-11 open assets")] - #[case (Rule::directive_open, "2001-09-11 open assets:cash")] - #[case (Rule::directive_open, "2001-09-11 open Assets1:cash2:3petty")] - #[case (Rule::directive_close, "2001-09-11 close assets")] - #[case (Rule::directive_close, "2001-09-11 close assets1:2cash:3petty")] - #[case (Rule::directive_commodity, "2001-09-11 thing USD")] - #[case (Rule::balance_directive, "2001-09-11 balance assets 123.456 USD")] - #[case (Rule::balance_directive, "2001-09-11 balance assets1:2cash -0.456 USD")] - fn can_parse_misc_directive(#[case] r: Rule, #[case] base: &str) { - - // NOTE: addons must end in \n to match rules - let addons = [ - "\n" - ," \n" - ,"\t\n" - ," ; comment 123 ; \n" - ,"\t;\tcomment 123 ;\t\n" - ]; - - for suffix in addons.iter() { - - let tc = format!("{}{}", base, suffix); - println!("Test case: {}", tc); - - assert!(get_pairs(r, &tc).len() > 0); - } - } - - } - - - fn get_pairs(r: Rule, content: &str) -> Pairs<'_, Rule> { - let x = LedgerParser::parse( - r, - - content) - .unwrap_or_else(|e| panic!("{}", e)); - - return x; - } - - -} +pub mod ledger_parser; +pub use ledger_parser::LedgerParser; From 233de329c3960eff1bbf189a41bfed77c1639d59 Mon Sep 17 00:00:00 2001 From: jburnett Date: Mon, 25 Sep 2023 14:26:48 -0400 Subject: [PATCH 05/23] Add CLI to verify accessibility and for future impl --- Cargo.toml | 3 ++- cli/Cargo.toml | 8 ++++++++ cli/src/main.rs | 28 ++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 cli/Cargo.toml create mode 100644 cli/src/main.rs diff --git a/Cargo.toml b/Cargo.toml index 8fb0c02..663746e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,6 @@ [workspace] resolver = "2" members = [ - "pta-parser" + "cli" + ,"pta-parser" ] diff --git a/cli/Cargo.toml b/cli/Cargo.toml new file mode 100644 index 0000000..faafcaf --- /dev/null +++ b/cli/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "cli" +version = "0.1.0" +edition = "2021" + + +[dependencies] +pta-parser = { path = "../pta-parser" } \ No newline at end of file diff --git a/cli/src/main.rs b/cli/src/main.rs new file mode 100644 index 0000000..44731f4 --- /dev/null +++ b/cli/src/main.rs @@ -0,0 +1,28 @@ +extern crate pta_parser; + +use pta_parser::LedgerParser; + +fn main() { + // TODO: implement useful CLI, e.g., + // - exec with path of file to parse + // - optionally output parse results (should be equivalent to input file) + + println!("\nNOTICE: This CLI is under development...\n"); + + // instantiate parser to ensure expected accessibility + let _ = LedgerParser {}; +} + + + +#[cfg(test)] +mod cli_tests { + + use pta_parser::LedgerParser; + + #[test] + fn can_create_parser() { + // simply verifies that the parser can be instantiated, ensuring accessibility + let _ = LedgerParser{}; + } +} \ No newline at end of file From 74f04e2825dd200abdb9838bb8bd287c8b9452a2 Mon Sep 17 00:00:00 2001 From: jburnett Date: Mon, 25 Sep 2023 21:37:35 -0400 Subject: [PATCH 06/23] WIP: Cli parses basic ledger --- cli/Cargo.toml | 1 + cli/src/main.rs | 50 +++++++++++-- pta-parser/src/grammars/ledger.pest | 3 +- pta-parser/src/ledger_parser.rs | 106 ++++++++++++++++++---------- pta-parser/src/lib.rs | 1 + testdata/basic-ledger | 13 ++++ 6 files changed, 130 insertions(+), 44 deletions(-) create mode 100644 testdata/basic-ledger diff --git a/cli/Cargo.toml b/cli/Cargo.toml index faafcaf..36fec0c 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -5,4 +5,5 @@ edition = "2021" [dependencies] +pest = "2.7.3" pta-parser = { path = "../pta-parser" } \ No newline at end of file diff --git a/cli/src/main.rs b/cli/src/main.rs index 44731f4..3fb1d06 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,16 +1,54 @@ extern crate pta_parser; -use pta_parser::LedgerParser; +// TODO: how to isolate pest so clients can just use lib (w/o requiring pest as here) +use pest::*; +use pta_parser::{LedgerParser, Rule}; -fn main() { - // TODO: implement useful CLI, e.g., +fn main() -> Result<(), Box> { + // TODO: CLI improvements // - exec with path of file to parse // - optionally output parse results (should be equivalent to input file) - println!("\nNOTICE: This CLI is under development...\n"); + let pb = std::env::current_dir()?; + println!("Curr dir: {:?}", pb.as_path()); - // instantiate parser to ensure expected accessibility - let _ = LedgerParser {}; + let p = pb.join("testdata/basic-ledger"); + + println!("Reading {:?}", p); + + match std::fs::read_to_string(p) { + Ok(ledger) => { + println!("Read string length: {}", ledger.len()); + + match LedgerParser::parse(Rule::ledger, &ledger) { + Ok(pairs) => { + println!("LedgerParser produced {} pairs", pairs.len()); + let mut t = pairs.tokens(); + while let val = t.next() { + match val { + Some(val) => { + println!("Token: {:?}", val); + } + + None => { break; } + } + } + } + + Err(e) => { + println!("ERR: {}", e); + return Err(Box::new(e)); + } + } + } + + Err(e) => { + println!("ERR: {}", e); + return Err(Box::new(e)); + } + } + + return Ok(()); } diff --git a/pta-parser/src/grammars/ledger.pest b/pta-parser/src/grammars/ledger.pest index 0eb3ead..eabee1e 100644 --- a/pta-parser/src/grammars/ledger.pest +++ b/pta-parser/src/grammars/ledger.pest @@ -23,6 +23,7 @@ comment_token = { ";" } // a comment comment = { comment_token ~ (!NEWLINE ~ ANY)* ~ NEWLINE } comment_or_newline = { (WHITESPACE+ ~ comment) | (WHITESPACE* ~ NEWLINE) } +empty_line = { WHITESPACE* ~ NEWLINE } // Each acct token must begin with alpha and may be followed by any number of alpha or number // Full account descriptors are comprised of colon-separated account names. The top-level @@ -116,4 +117,4 @@ balance_directive = @{ directives = { balance_directive | directive_close | directive_commodity | directive_open } -ledger = { SOI ~ (options | directives | transaction_block)+ ~ EOI } \ No newline at end of file +ledger = { SOI ~ (options | directives | transaction_block | comment | empty_line)+ ~ EOI } \ No newline at end of file diff --git a/pta-parser/src/ledger_parser.rs b/pta-parser/src/ledger_parser.rs index f4275f4..96a51b4 100644 --- a/pta-parser/src/ledger_parser.rs +++ b/pta-parser/src/ledger_parser.rs @@ -127,6 +127,44 @@ mod parser_tests { + mod directives { + use super::*; + + // YYYY-MM-DD open Account [ConstraintCurrency,...] ["BookingMethod"] + + #[rstest] + #[case (Rule::directive_open, "2001-09-11 open assets")] + #[case (Rule::directive_open, "2001-09-11 open assets:cash")] + #[case (Rule::directive_open, "2001-09-11 open Assets1:cash2:3petty")] + #[case (Rule::directive_close, "2001-09-11 close assets")] + #[case (Rule::directive_close, "2001-09-11 close assets1:2cash:3petty")] + #[case (Rule::directive_commodity, "2001-09-11 thing USD")] + #[case (Rule::balance_directive, "2001-09-11 balance assets 123.456 USD")] + #[case (Rule::balance_directive, "2001-09-11 balance assets1:2cash -0.456 USD")] + fn can_parse_misc_directive(#[case] r: Rule, #[case] base: &str) { + + // NOTE: addons must end in \n to match rules + let addons = [ + "\n" + ," \n" + ,"\t\n" + ," ; comment 123 ; \n" + ,"\t;\tcomment 123 ;\t\n" + ]; + + for suffix in addons.iter() { + + let tc = format!("{}{}", base, suffix); + println!("Test case: {}", tc); + + assert!(get_pairs(r, &tc).len() > 0); + } + } + + } + + + mod iso8601 { use super::*; @@ -180,6 +218,37 @@ mod parser_tests { } + mod ledger_file { + use super::*; + + #[rstest] + #[case ( + ";; Accounts + 2001-09-11 open assets + 2001-09-11 open assets:cash\t;comment + 2001-09-12 close assets + + ;; Balance assertions + 2001-09-11 balance assets 123.456 USD + + + ;; Misc + 2001-09-11 thing USD ; a comment + + ")] + fn can_parse_ledger(#[case] year: &str) { + + let pairs = LedgerParser::parse( + Rule::ledger, year) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + } + + + mod posting { use super::*; @@ -351,43 +420,6 @@ mod parser_tests { - mod directives { - use super::*; - - // YYYY-MM-DD open Account [ConstraintCurrency,...] ["BookingMethod"] - - #[rstest] - #[case (Rule::directive_open, "2001-09-11 open assets")] - #[case (Rule::directive_open, "2001-09-11 open assets:cash")] - #[case (Rule::directive_open, "2001-09-11 open Assets1:cash2:3petty")] - #[case (Rule::directive_close, "2001-09-11 close assets")] - #[case (Rule::directive_close, "2001-09-11 close assets1:2cash:3petty")] - #[case (Rule::directive_commodity, "2001-09-11 thing USD")] - #[case (Rule::balance_directive, "2001-09-11 balance assets 123.456 USD")] - #[case (Rule::balance_directive, "2001-09-11 balance assets1:2cash -0.456 USD")] - fn can_parse_misc_directive(#[case] r: Rule, #[case] base: &str) { - - // NOTE: addons must end in \n to match rules - let addons = [ - "\n" - ," \n" - ,"\t\n" - ," ; comment 123 ; \n" - ,"\t;\tcomment 123 ;\t\n" - ]; - - for suffix in addons.iter() { - - let tc = format!("{}{}", base, suffix); - println!("Test case: {}", tc); - - assert!(get_pairs(r, &tc).len() > 0); - } - } - - } - - fn get_pairs(r: Rule, content: &str) -> Pairs<'_, Rule> { let x = LedgerParser::parse( r, diff --git a/pta-parser/src/lib.rs b/pta-parser/src/lib.rs index f226d2d..4d92d7f 100644 --- a/pta-parser/src/lib.rs +++ b/pta-parser/src/lib.rs @@ -10,3 +10,4 @@ extern crate pest_derive; pub mod ledger_parser; pub use ledger_parser::LedgerParser; +pub use ledger_parser::Rule; diff --git a/testdata/basic-ledger b/testdata/basic-ledger new file mode 100644 index 0000000..2f69d59 --- /dev/null +++ b/testdata/basic-ledger @@ -0,0 +1,13 @@ +;; Accounts +2001-09-11 open Assets +2001-09-11 open Assets:Cash +2001-09-12 close Assets + + +;; Balance assertions +2001-09-11 balance Assets:Cash 123.456 USD +2001-09-11 balance Liabilities -123.456 USD + + +;; Misc +2001-09-11 thing USD ; a comment From 5c16d571752885727a5a1c7c6ed665b390cab11d Mon Sep 17 00:00:00 2001 From: jburnett Date: Tue, 26 Sep 2023 11:01:59 -0400 Subject: [PATCH 07/23] Fix commodity directive --- pta-parser/src/grammars/ledger.pest | 2 +- pta-parser/src/ledger_parser.rs | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pta-parser/src/grammars/ledger.pest b/pta-parser/src/grammars/ledger.pest index eabee1e..7aeb9c4 100644 --- a/pta-parser/src/grammars/ledger.pest +++ b/pta-parser/src/grammars/ledger.pest @@ -91,7 +91,7 @@ directive_close = @{ // YYYY-MM-DD commodity Currency directive_commodity = @{ iso8601_date_extended - ~ WHITESPACE+ ~ commodity + ~ WHITESPACE+ ~ "commodity" ~ WHITESPACE+ ~ currency ~ comment_or_newline } diff --git a/pta-parser/src/ledger_parser.rs b/pta-parser/src/ledger_parser.rs index 96a51b4..a899020 100644 --- a/pta-parser/src/ledger_parser.rs +++ b/pta-parser/src/ledger_parser.rs @@ -138,7 +138,7 @@ mod parser_tests { #[case (Rule::directive_open, "2001-09-11 open Assets1:cash2:3petty")] #[case (Rule::directive_close, "2001-09-11 close assets")] #[case (Rule::directive_close, "2001-09-11 close assets1:2cash:3petty")] - #[case (Rule::directive_commodity, "2001-09-11 thing USD")] + #[case (Rule::directive_commodity, "2001-09-11 commodity USD")] #[case (Rule::balance_directive, "2001-09-11 balance assets 123.456 USD")] #[case (Rule::balance_directive, "2001-09-11 balance assets1:2cash -0.456 USD")] fn can_parse_misc_directive(#[case] r: Rule, #[case] base: &str) { @@ -233,7 +233,8 @@ mod parser_tests { ;; Misc - 2001-09-11 thing USD ; a comment + 1792-01-01 commodity USD ; US Dollar + 2001-09-11 commodity BTC ; Bitcoin launch date ")] fn can_parse_ledger(#[case] year: &str) { From ef7db7339b46abad0adb243dc6b18e0b6bb0dbc3 Mon Sep 17 00:00:00 2001 From: jburnett Date: Thu, 28 Sep 2023 00:24:34 -0400 Subject: [PATCH 08/23] WIP: multiple comment tokens; CLI output --- cli/src/main.rs | 63 ++++++++++++++++++++++++----- pta-parser/src/grammars/ledger.pest | 13 +++--- pta-parser/src/ledger_parser.rs | 3 +- testdata/basic-ledger | 19 ++++++++- 4 files changed, 78 insertions(+), 20 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 3fb1d06..e279574 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,7 +1,7 @@ extern crate pta_parser; // TODO: how to isolate pest so clients can just use lib (w/o requiring pest as here) -use pest::*; +use pest::{*, iterators::Pair}; use pta_parser::{LedgerParser, Rule}; fn main() -> Result<(), Box> { @@ -21,18 +21,53 @@ fn main() -> Result<(), Box> { println!("Read string length: {}", ledger.len()); match LedgerParser::parse(Rule::ledger, &ledger) { - Ok(pairs) => { - println!("LedgerParser produced {} pairs", pairs.len()); - let mut t = pairs.tokens(); - while let val = t.next() { - match val { - Some(val) => { - println!("Token: {:?}", val); + Ok(root) => { + for pair in root.into_iter() { + // println!("\n{:?}", pair.as_span()); + // println!("\n{:?}", pair.as_rule()); + + match pair.as_rule() { + Rule::comment => { + dump_pair(&pair); + + // println!("Comment: {:?}", pair.as_span()); + + // for comment in pair.into_iter() { + // println!("{:?}", comment); + // } } - None => { break; } + Rule::EOI => { dump_pair(&pair); } + Rule::WHITESPACE => { dump_pair(&pair); } + Rule::acct_descriptor => { dump_pair(&pair); } + Rule::acct_separator => { dump_pair(&pair); } + Rule::balance_directive => { dump_pair(&pair); } + Rule::comment_or_newline => { dump_pair(&pair); } + Rule::comment_token => { dump_pair(&pair); } + Rule::currency => { dump_pair(&pair); } + Rule::decimal_value => { dump_pair(&pair); } + Rule::directive_close => { dump_pair(&pair); } + Rule::directive_commodity => { dump_pair(&pair); } + Rule::directive_open => { dump_pair(&pair); } + Rule::directives => { dump_pair(&pair); } + Rule::empty_line => { dump_pair(&pair); } + Rule::iso8601_date_extended => { dump_pair(&pair); } + Rule::iso8601_day => { dump_pair(&pair); } + Rule::iso8601_month => { dump_pair(&pair); } + Rule::iso8601_year => { dump_pair(&pair); } + Rule::ledger => { dump_pair(&pair); } + Rule::options => { dump_pair(&pair); } + Rule::posting_basic => { dump_pair(&pair); } + Rule::posting_indent => { dump_pair(&pair); } + Rule::sub_acct => { dump_pair(&pair); } + Rule::top_level_acct => { dump_pair(&pair); } + Rule::trans_annotation => { dump_pair(&pair); } + Rule::trans_description => { dump_pair(&pair); } + Rule::trans_description_text => { dump_pair(&pair); } + Rule::trans_header => { dump_pair(&pair); } + Rule::transaction_block => { dump_pair(&pair); } } - } + } } Err(e) => { @@ -51,7 +86,13 @@ fn main() -> Result<(), Box> { return Ok(()); } - +fn dump_rule(r:&Rule, s:&Span) { + println!("\nRULE: {:?}", &r); + println!("\n{:?}", &s); +} +fn dump_pair(p:&Pair) { + dump_rule(&p.as_rule(), &p.as_span()); +} #[cfg(test)] mod cli_tests { diff --git a/pta-parser/src/grammars/ledger.pest b/pta-parser/src/grammars/ledger.pest index 7aeb9c4..e9db904 100644 --- a/pta-parser/src/grammars/ledger.pest +++ b/pta-parser/src/grammars/ledger.pest @@ -16,7 +16,7 @@ WHITESPACE = _{ " " | "\t" } // constants acct_separator = { ":" } -comment_token = { ";" } +comment_token = { ";" | "*" } // TODO: need to handle escaped semi-colon? // TODO: consider whether comment must be preceded by whitespace (except at beginning of line) @@ -70,7 +70,6 @@ transaction_block = { trans_header ~ posting_basic+ } currency = { ASCII_ALPHA_UPPER{3} } -commodity = { ASCII_ALPHA+ } // TODO: should commodity allow numbers? options = { "operating_currency" } // TODO: open works but is incomplete @@ -106,14 +105,16 @@ balance_directive = @{ } // TODO: other directives to implement -// YYYY-MM-DD pad Account AccountPad -// YYYY-MM-DD note Account Description // YYYY-MM-DD document Account PathToDocument -// YYYY-MM-DD price Commodity Price // YYYY-MM-DD event Name Value +// YYYY-MM-DD note Account Description +// YYYY-MM-DD pad Account AccountPad +// YYYY-MM-DD price Commodity Price +// include Filename // option Name Value // plugin ModuleName StringConfig -// include Filename +// poptag +// pushtag directives = { balance_directive | directive_close | directive_commodity | directive_open } diff --git a/pta-parser/src/ledger_parser.rs b/pta-parser/src/ledger_parser.rs index a899020..73370be 100644 --- a/pta-parser/src/ledger_parser.rs +++ b/pta-parser/src/ledger_parser.rs @@ -223,7 +223,8 @@ mod parser_tests { #[rstest] #[case ( - ";; Accounts + "; an asterisk-based comment + * Accounts 2001-09-11 open assets 2001-09-11 open assets:cash\t;comment 2001-09-12 close assets diff --git a/testdata/basic-ledger b/testdata/basic-ledger index 2f69d59..283d068 100644 --- a/testdata/basic-ledger +++ b/testdata/basic-ledger @@ -3,11 +3,26 @@ 2001-09-11 open Assets:Cash 2001-09-12 close Assets +* Commodities +1792-01-01 commodity USD +; name: "US Dollar" +; export: "CASH" +2009-01-09 commodity BTC +; name: "Bitcoin" +; export: "CryptoCurrency" +; price: "USD:coinbase/BTC-USD" ;; Balance assertions 2001-09-11 balance Assets:Cash 123.456 USD 2001-09-11 balance Liabilities -123.456 USD -;; Misc -2001-09-11 thing USD ; a comment +* Transactions +;2009-01-09 ! "Bitcoin launch date" +; assets:subacct1 1.0000 +; equity -1.0000 + +;2001-09-12 * "some transaction" +; assets 1234.5678 +; equity -1234.5678 + From 8d5e3b47bf9d1405d3522abf5cb6f5a3ecf38439 Mon Sep 17 00:00:00 2001 From: jburnett Date: Fri, 6 Oct 2023 21:09:24 -0400 Subject: [PATCH 09/23] trans block parses --- pta-parser/src/grammars/ledger.pest | 5 +- pta-parser/src/ledger_parser.rs | 103 ++++++++++++++++++---------- pta-parser/src/lib.rs | 4 +- 3 files changed, 72 insertions(+), 40 deletions(-) diff --git a/pta-parser/src/grammars/ledger.pest b/pta-parser/src/grammars/ledger.pest index e9db904..6a23881 100644 --- a/pta-parser/src/grammars/ledger.pest +++ b/pta-parser/src/grammars/ledger.pest @@ -54,7 +54,7 @@ posting_basic = @{ // TODO: improve on 'text' to allow more in description trans_description_text = { (ASCII_ALPHANUMERIC+ | WHITESPACE)+ } // TODO: is this the full set of annotation options? -trans_annotation = { "*" | "!" } +trans_annotation = { "txn" | "*" | "!" } trans_description = { "\"" ~ trans_description_text ~ "\"" } // TODO: how to ensure col 0 / no ws for header row @@ -64,9 +64,10 @@ trans_header = @{ ~ trans_annotation ~ WHITESPACE+ ~ trans_description + ~ comment_or_newline } -transaction_block = { trans_header ~ posting_basic+ } +transaction_block = @{ trans_header ~ posting_basic+ } currency = { ASCII_ALPHA_UPPER{3} } diff --git a/pta-parser/src/ledger_parser.rs b/pta-parser/src/ledger_parser.rs index 73370be..b8a644b 100644 --- a/pta-parser/src/ledger_parser.rs +++ b/pta-parser/src/ledger_parser.rs @@ -297,6 +297,65 @@ mod parser_tests { } + mod trans_block { + use super::*; + + + // An example beancount transaction + // 2016-01-28 * " Buy BTC" ; 10:01 am, xfer id 56aa57787199a73d29000650 + // Assets:Exchanges:Coinbase 1.03683606 BTC { 381.9697397 USD, 2016-01-28 } + // Assets:Bank:AllyChk -400.00 USD ; verified w/register + // Liabilities:Fees:Coinbase 3.96 USD + // Liabilities:Fees:Adjustment 0.00000005 USD + + #[rstest] + // #[ignore = "wip"] + #[case ("2009-01-09 ! \"Bitcoin launch date\" ;comment \n\tAssets 1.0000 ;posting comment\n\tEquity -1.0000 \n")] + #[case ("2009-01-09 ! \"Bitcoin launch date\"\n\tassets 1.0000\n equity -1.0000\n")] + fn can_parse_trans_block(#[case] tblock: &str) { + + let pairs = LedgerParser::parse( + Rule::transaction_block, &tblock) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + #[rstest] + // #[ignore = "wip"] + #[case ("2009-01-09 ! \"Bitcoin launch date\" + ")] + #[should_panic(expected = "expected transaction_block")] + fn verify_trans_block_posting_error(#[case] bad_block: &str) { + LedgerParser::parse( + Rule::transaction_block, &bad_block) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_block); + assert!(false); + } + + // REVIEW: Are these cases duplicative of trans_header tests? + #[rstest] + #[ignore = "wip"] + #[case ("2009-01-09 ! \"Bitcoin launch date\"")] + #[should_panic(expected = "expected trans_header")] + fn verify_trans_block_trans_header_error(#[case] bad_block: &str) { + LedgerParser::parse( + Rule::transaction_block, &bad_block) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_block); + assert!(false); + } + + } + + + mod trans_header { use super::*; @@ -340,9 +399,14 @@ mod parser_tests { #[rstest] + // Verify transaction annotations: !, *, txn #[case ("2009-01-09 ! \"Bitcoin launch date\"")] + #[case ("2009-01-09 * \"Bitcoin launch date\"")] + #[case ("2009-01-09 txn \"Bitcoin launch date\"")] + // whitespace variations #[case ("2010-01-09 * \"multi whitespace test\"")] - #[case ("2011-01-09\t! \"tab test\"")] + #[case ("2011-01-09\t!\t\"tab test\"")] + #[case ("2011-01-09\ttxn\t\"tab test\"")] #[case ("2012-01-09 * \"trailing tab test\"\t")] #[case ("2013-01-09 ! \"trailing spaces test\" ")] #[case ("2014-01-09 ! \"trailing tabs and spaces test\" \t \t\t ")] @@ -371,7 +435,7 @@ mod parser_tests { #[rstest] #[case ("2016-01-28 * \"comment after description w/o whitespace\"; 10:01 am, xfer id 56aa57787199a73d29000650\n")] - #[should_panic(expected = "expected trans_")] + #[should_panic(expected = "expected trans_header")] fn verify_trans_header_error(#[case] bad_hdr: &str) { let quoted_bad_descr = format!("\"{}\"", bad_hdr); @@ -388,40 +452,7 @@ mod parser_tests { } - mod trans_block { - use super::*; - - - // An example beancount transaction - // 2016-01-28 * " Buy BTC" ; 10:01 am, xfer id 56aa57787199a73d29000650 - // Assets:Exchanges:Coinbase 1.03683606 BTC { 381.9697397 USD, 2016-01-28 } - // Assets:Bank:AllyChk -400.00 USD ; verified w/register - // Liabilities:Fees:Coinbase 3.96 USD - // Liabilities:Fees:Adjustment 0.00000005 USD - - #[rstest] - #[ignore = "wip"] - #[case ("2009-01-09 ! \"Bitcoin launch date\" - assets:subacct1 1.0000 - equity -1.0000 - ")] - fn can_parse_trans_block(#[case] tblock: &str) { - - let quoted_descr = format!("\"{}\"", tblock); - let pairs = LedgerParser::parse( - Rule::trans_description, "ed_descr) - .unwrap_or_else(|e| panic!("{}", e)); - - // Parsing succeeded; ensure at least 1 pair was returned - assert!(pairs.len() > 0); - } - - - - } - - - + fn get_pairs(r: Rule, content: &str) -> Pairs<'_, Rule> { let x = LedgerParser::parse( r, diff --git a/pta-parser/src/lib.rs b/pta-parser/src/lib.rs index 4d92d7f..5d15867 100644 --- a/pta-parser/src/lib.rs +++ b/pta-parser/src/lib.rs @@ -8,6 +8,6 @@ extern crate pest; extern crate pest_derive; +// Export ledger parser pub mod ledger_parser; -pub use ledger_parser::LedgerParser; -pub use ledger_parser::Rule; +pub use ledger_parser::*; From bc7f9fa6c5e82f0664c2cb5370feb733bc5641bd Mon Sep 17 00:00:00 2001 From: jburnett Date: Fri, 6 Oct 2023 23:24:20 -0400 Subject: [PATCH 10/23] Refactor parser tests --- pta-parser/src/ledger_parser.rs | 467 --------------------- pta-parser/src/ledger_parser/mod.rs | 11 + pta-parser/src/lib.rs | 12 +- pta-parser/src/parser_tests/basics.rs | 167 ++++++++ pta-parser/src/parser_tests/mod.rs | 96 +++++ pta-parser/src/parser_tests/transaction.rs | 207 +++++++++ 6 files changed, 489 insertions(+), 471 deletions(-) delete mode 100644 pta-parser/src/ledger_parser.rs create mode 100644 pta-parser/src/ledger_parser/mod.rs create mode 100644 pta-parser/src/parser_tests/basics.rs create mode 100644 pta-parser/src/parser_tests/mod.rs create mode 100644 pta-parser/src/parser_tests/transaction.rs diff --git a/pta-parser/src/ledger_parser.rs b/pta-parser/src/ledger_parser.rs deleted file mode 100644 index b8a644b..0000000 --- a/pta-parser/src/ledger_parser.rs +++ /dev/null @@ -1,467 +0,0 @@ -// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. -// -// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). -// - - -#[derive(Parser)] -#[grammar = "./grammars/ledger.pest"] -pub struct LedgerParser; - - -#[cfg(test)] -mod parser_tests { - - use super::*; - use pest::{Parser, iterators::Pairs}; - use rstest::rstest; - - - mod acct_desc { - use super::*; - - #[rstest] - #[case ("a")] - #[case ("a1")] - #[case ("a:a")] - #[case ("a1:a")] - #[case ("a1:a1")] - #[case ("a:123")] // subaccts beginning w/number - #[case ("a1:sub:123")] - #[case ("asset")] - #[case ("asset:property")] - #[case ("asset:property:real")] - fn can_parse_acct_descriptor(#[case] acct_desc: &str) { - - let pairs = LedgerParser::parse( - Rule::acct_descriptor, acct_desc) - .unwrap_or_else(|e| panic!("{}", e)); - - // Parsing succeeded; ensure at least 1 pair was returned - assert!(pairs.len() > 0); - } - - - #[rstest] - // NOTE: invalid lead char in first acct segment ("1bad") fails top_level_acct rule & is included in verify_top_level_acct_parsing_error cases. - #[case ("a1:b@d")] - #[case ("bad1:")] // invalid: ends with acct descriptor separator (colon) - #[should_panic(expected = "expected acct_descriptor")] - #[ignore = "unexpectedly parses without error"] - fn verify_acct_descriptor_parsing_error(#[case] bad_acct_desc: &str) { - - LedgerParser::parse( - Rule::acct_descriptor, bad_acct_desc) - .unwrap_or_else(|e| panic!("{}", e)); - - // should never reach this code since all cases should result in panic - println!("Test case '{}' should fail to parse!", bad_acct_desc); - assert!(false); - } - - - - #[rstest] - #[case ("1")] // invalid: number as lead char - #[case ("1b")] - #[case ("1-b")] // invalid: non-alphanumeric char - #[case ("1b-")] - #[should_panic(expected = "expected top_level_acct")] - fn verify_top_level_acct_parsing_error(#[case] bad_top_level_acct: &str) { - - LedgerParser::parse( - Rule::top_level_acct, bad_top_level_acct) - .unwrap_or_else(|e| panic!("{}", e)); - - // should never reach this code since all cases should result in panic - println!("Test case '{}' should fail to parse!", bad_top_level_acct); - assert!(false); - - } - } - - - - mod decimal { - use super::*; - - - #[rstest] - #[case ("0.00000001")] - #[case ("1.23")] - #[case ("123.456")] - #[case ("-123.456789012")] // negative values - #[case ("-0.00000001")] - fn can_parse_decimal_value(#[case] dec: &str) { - - let pairs = LedgerParser::parse( - Rule::decimal_value, dec) - .unwrap_or_else(|e| panic!("{}", e)); - - // Parsing succeeded; ensure at least 1 pair was returned - assert!(pairs.len() > 0); - } - - - #[rstest] - #[case ("0.")] // fractional segment missing - #[case ("-0.")] - #[case ("123")] - #[case ("-123")] - #[case (".12")] // whole segment missing - #[case ("-.12")] - - #[should_panic(expected = "expected decimal_value")] - fn verify_decimal_value_error(#[case] bad_dec: &str) { - - LedgerParser::parse( - Rule::decimal_value, bad_dec) - .unwrap_or_else(|e| panic!("{}", e)); - - // should never reach this code since all cases should result in panic - println!("Test case '{}' should fail to parse!", bad_dec); - assert!(false); - } - - } - - - - mod directives { - use super::*; - - // YYYY-MM-DD open Account [ConstraintCurrency,...] ["BookingMethod"] - - #[rstest] - #[case (Rule::directive_open, "2001-09-11 open assets")] - #[case (Rule::directive_open, "2001-09-11 open assets:cash")] - #[case (Rule::directive_open, "2001-09-11 open Assets1:cash2:3petty")] - #[case (Rule::directive_close, "2001-09-11 close assets")] - #[case (Rule::directive_close, "2001-09-11 close assets1:2cash:3petty")] - #[case (Rule::directive_commodity, "2001-09-11 commodity USD")] - #[case (Rule::balance_directive, "2001-09-11 balance assets 123.456 USD")] - #[case (Rule::balance_directive, "2001-09-11 balance assets1:2cash -0.456 USD")] - fn can_parse_misc_directive(#[case] r: Rule, #[case] base: &str) { - - // NOTE: addons must end in \n to match rules - let addons = [ - "\n" - ," \n" - ,"\t\n" - ," ; comment 123 ; \n" - ,"\t;\tcomment 123 ;\t\n" - ]; - - for suffix in addons.iter() { - - let tc = format!("{}{}", base, suffix); - println!("Test case: {}", tc); - - assert!(get_pairs(r, &tc).len() > 0); - } - } - - } - - - - mod iso8601 { - use super::*; - - #[rstest] - #[case ("1900-01-01")] - #[case ("2015-12-31")] - fn can_parse_iso8601_date_extended(#[case] year: &str) { - - let pairs = LedgerParser::parse( - Rule::iso8601_date_extended, year) - .unwrap_or_else(|e| panic!("{}", e)); - - // Parsing succeeded; ensure at least 1 pair was returned - assert!(pairs.len() > 0); - } - - - #[rstest] - #[case ("000-01-01")] // Year out of range - #[case ("99990-01-01")] - #[case ("01-01")] // year segment missing - - #[case ("1999")] // month segment missing - #[case ("1999-")] - #[case ("0000-00-01")] // Month out of range - #[case ("0000-13-01")] - - #[case ("1999-12")] // day segment missing - #[case ("1999-12-")] - #[case ("0000-01-00")] // Day out of range - #[case ("0000-01-32")] - - #[case ("000o-01-01")] // Invalid chars - #[case ("1999-0x-12")] - #[case ("1999-12-0x")] - - #[case ("1999 12-01")] // whitespace (ensure atomic rule modifier is used) - #[case ("1999-12 01")] - #[case (" 1999-12-01")] // leading space (reqs additional rule) - #[should_panic(expected = "expected iso8601_")] // matches errors from multiple iso8601 rules - fn verify_iso8601_date_extended_error(#[case] bad_date: &str) { - - LedgerParser::parse( - Rule::iso8601_date_extended, bad_date) - .unwrap_or_else(|e| panic!("{}", e)); - - // should never reach this code since all cases should result in panic - println!("Test case '{}' should fail to parse!", bad_date); - assert!(false); - } - } - - - mod ledger_file { - use super::*; - - #[rstest] - #[case ( - "; an asterisk-based comment - * Accounts - 2001-09-11 open assets - 2001-09-11 open assets:cash\t;comment - 2001-09-12 close assets - - ;; Balance assertions - 2001-09-11 balance assets 123.456 USD - - - ;; Misc - 1792-01-01 commodity USD ; US Dollar - 2001-09-11 commodity BTC ; Bitcoin launch date - - ")] - fn can_parse_ledger(#[case] year: &str) { - - let pairs = LedgerParser::parse( - Rule::ledger, year) - .unwrap_or_else(|e| panic!("{}", e)); - - // Parsing succeeded; ensure at least 1 pair was returned - assert!(pairs.len() > 0); - } - } - - - - mod posting { - use super::*; - - #[rstest] - #[case (" Assets:subacct1 1.0000")] - #[case ("\tEquity \t -1.0000")] - fn can_parse_posting_basic(#[case] base: &str) { - - // NOTE: addons must end in \n to match rules - let addons = [ - "\n" - ," \n" - ,"\t\n" - ," ; comment 123 ; \n" - ,"\t;\tcomment 123 ;\t\n" - ]; - - for suffix in addons.iter() { - - let tc = format!("{}{}", base, suffix); - println!("Test case: {}", tc); - - assert!(get_pairs(Rule::posting_basic, &tc).len() > 0); - } - } - - - #[rstest] - #[case (" Assets:subacct1 1.0000")] // Too many leading spaces or tabs - #[case (" \tEquity \t -1.0000")] - #[case ("\t Equity \t -1.0000")] - - #[should_panic(expected = "expected posting_basic")] // matches errors from multiple iso8601 rules - fn verify_posting_basic_error(#[case] bad_date: &str) { - - LedgerParser::parse( - Rule::posting_basic, bad_date) - .unwrap_or_else(|e| panic!("{}", e)); - - // should never reach this code since all cases should result in panic - println!("Test case '{}' should fail to parse!", bad_date); - assert!(false); - } - } - - - mod trans_block { - use super::*; - - - // An example beancount transaction - // 2016-01-28 * " Buy BTC" ; 10:01 am, xfer id 56aa57787199a73d29000650 - // Assets:Exchanges:Coinbase 1.03683606 BTC { 381.9697397 USD, 2016-01-28 } - // Assets:Bank:AllyChk -400.00 USD ; verified w/register - // Liabilities:Fees:Coinbase 3.96 USD - // Liabilities:Fees:Adjustment 0.00000005 USD - - #[rstest] - // #[ignore = "wip"] - #[case ("2009-01-09 ! \"Bitcoin launch date\" ;comment \n\tAssets 1.0000 ;posting comment\n\tEquity -1.0000 \n")] - #[case ("2009-01-09 ! \"Bitcoin launch date\"\n\tassets 1.0000\n equity -1.0000\n")] - fn can_parse_trans_block(#[case] tblock: &str) { - - let pairs = LedgerParser::parse( - Rule::transaction_block, &tblock) - .unwrap_or_else(|e| panic!("{}", e)); - - // Parsing succeeded; ensure at least 1 pair was returned - assert!(pairs.len() > 0); - } - - #[rstest] - // #[ignore = "wip"] - #[case ("2009-01-09 ! \"Bitcoin launch date\" - ")] - #[should_panic(expected = "expected transaction_block")] - fn verify_trans_block_posting_error(#[case] bad_block: &str) { - LedgerParser::parse( - Rule::transaction_block, &bad_block) - .unwrap_or_else(|e| panic!("{}", e)); - - // should never reach this code since all cases should result in panic - println!("Test case '{}' should fail to parse!", bad_block); - assert!(false); - } - - // REVIEW: Are these cases duplicative of trans_header tests? - #[rstest] - #[ignore = "wip"] - #[case ("2009-01-09 ! \"Bitcoin launch date\"")] - #[should_panic(expected = "expected trans_header")] - fn verify_trans_block_trans_header_error(#[case] bad_block: &str) { - LedgerParser::parse( - Rule::transaction_block, &bad_block) - .unwrap_or_else(|e| panic!("{}", e)); - - // should never reach this code since all cases should result in panic - println!("Test case '{}' should fail to parse!", bad_block); - assert!(false); - } - - } - - - - mod trans_header { - use super::*; - - - #[rstest] - // NOTE: use simple text in case; test function wraps in dbl quotes - #[case ("a")] - #[case ("description")] - #[case (" a description ")] - #[case ("\ta description\twith tabs ")] - fn can_parse_trans_descr(#[case] descr: &str) { - - let quoted_descr = format!("\"{}\"", descr); - let pairs = LedgerParser::parse( - Rule::trans_description, "ed_descr) - .unwrap_or_else(|e| panic!("{}", e)); - - // Parsing succeeded; ensure at least 1 pair was returned - assert!(pairs.len() > 0); - } - - - #[rstest] - // NOTE: use simple text in case; test function wraps in dbl quotes - #[case ("")] // empty - no text - #[case (" ")] // empty - only ws - #[case ("\ta description\twith tabs and\n a newline")] // newline is invalid - #[should_panic(expected = "expected trans_")] - fn verify_trans_descr_error(#[case] bad_descr: &str) { - - let quoted_bad_descr = format!("\"{}\"", bad_descr); - LedgerParser::parse( - Rule::trans_description, "ed_bad_descr) - .unwrap_or_else(|e| panic!("{}", e)); - - // should never reach this code since all cases should result in panic - println!("Test case '{}' should fail to parse!", quoted_bad_descr); - assert!(false); - } - - - - #[rstest] - // Verify transaction annotations: !, *, txn - #[case ("2009-01-09 ! \"Bitcoin launch date\"")] - #[case ("2009-01-09 * \"Bitcoin launch date\"")] - #[case ("2009-01-09 txn \"Bitcoin launch date\"")] - // whitespace variations - #[case ("2010-01-09 * \"multi whitespace test\"")] - #[case ("2011-01-09\t!\t\"tab test\"")] - #[case ("2011-01-09\ttxn\t\"tab test\"")] - #[case ("2012-01-09 * \"trailing tab test\"\t")] - #[case ("2013-01-09 ! \"trailing spaces test\" ")] - #[case ("2014-01-09 ! \"trailing tabs and spaces test\" \t \t\t ")] - // #[ignore = "TBD: handle special chars in transaction description"] - // #[case ("2009-01-09 ! \"Special chars in description: !@#$%^&*()-_=+\"")] - fn can_parse_trans_header(#[case] base: &str) { - - // NOTE: addons must end in \n to match rules - let addons = [ - "\n" - ," \n" - ,"\t\n" - ," ; comment 123 ; \n" - ,"\t;\tcomment 123 ;\t\n" - ]; - - for suffix in addons.iter() { - - let tc = format!("{}{}", base, suffix); - println!("Test case: {}", tc); - - assert!(get_pairs(Rule::trans_header, &tc).len() > 0); - } - - } - - #[rstest] - #[case ("2016-01-28 * \"comment after description w/o whitespace\"; 10:01 am, xfer id 56aa57787199a73d29000650\n")] - #[should_panic(expected = "expected trans_header")] - fn verify_trans_header_error(#[case] bad_hdr: &str) { - - let quoted_bad_descr = format!("\"{}\"", bad_hdr); - LedgerParser::parse( - Rule::trans_header, "ed_bad_descr) - .unwrap_or_else(|e| panic!("{}", e)); - - // should never reach this code since all cases should result in panic - println!("Test case '{}' should fail to parse!", quoted_bad_descr); - assert!(false); - } - - - } - - - - fn get_pairs(r: Rule, content: &str) -> Pairs<'_, Rule> { - let x = LedgerParser::parse( - r, - - content) - .unwrap_or_else(|e| panic!("{}", e)); - - return x; - } - - -} diff --git a/pta-parser/src/ledger_parser/mod.rs b/pta-parser/src/ledger_parser/mod.rs new file mode 100644 index 0000000..82df59a --- /dev/null +++ b/pta-parser/src/ledger_parser/mod.rs @@ -0,0 +1,11 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + + +use pest_derive::*; + +#[derive(Parser)] +#[grammar = "./grammars/ledger.pest"] +pub struct LedgerParser; diff --git a/pta-parser/src/lib.rs b/pta-parser/src/lib.rs index 5d15867..b39a570 100644 --- a/pta-parser/src/lib.rs +++ b/pta-parser/src/lib.rs @@ -3,11 +3,15 @@ // This project is licensed under the terms of the MIT license (cf. LICENSE file in root). // -extern crate pest; -#[macro_use] -extern crate pest_derive; - +pub extern crate pest; +pub extern crate pest_derive; +#[cfg(test)] +pub extern crate rstest; // Export ledger parser pub mod ledger_parser; pub use ledger_parser::*; + + +pub mod parser_tests; +pub use parser_tests::*; \ No newline at end of file diff --git a/pta-parser/src/parser_tests/basics.rs b/pta-parser/src/parser_tests/basics.rs new file mode 100644 index 0000000..4f9d4d8 --- /dev/null +++ b/pta-parser/src/parser_tests/basics.rs @@ -0,0 +1,167 @@ +#[cfg(test)] use super::*; +#[cfg(test)] use rstest::rstest; + + +#[cfg(test)] +mod acct_desc { + use super::*; + + #[rstest] + #[case ("a")] + #[case ("a1")] + #[case ("a:a")] + #[case ("a1:a")] + #[case ("a1:a1")] + #[case ("a:123")] // subaccts beginning w/number + #[case ("a1:sub:123")] + #[case ("asset")] + #[case ("asset:property")] + #[case ("asset:property:real")] + fn can_parse_acct_descriptor(#[case] acct_desc: &str) { + + let pairs = LedgerParser::parse( + Rule::acct_descriptor, acct_desc) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + + #[rstest] + // NOTE: invalid lead char in first acct segment ("1bad") fails top_level_acct rule & is included in verify_top_level_acct_parsing_error cases. + #[case ("a1:b@d")] + #[case ("bad1:")] // invalid: ends with acct descriptor separator (colon) + #[should_panic(expected = "expected acct_descriptor")] + #[ignore = "unexpectedly parses without error"] + fn verify_acct_descriptor_parsing_error(#[case] bad_acct_desc: &str) { + + LedgerParser::parse( + Rule::acct_descriptor, bad_acct_desc) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_acct_desc); + assert!(false); + } + + + + #[rstest] + #[case ("1")] // invalid: number as lead char + #[case ("1b")] + #[case ("1-b")] // invalid: non-alphanumeric char + #[case ("1b-")] + #[should_panic(expected = "expected top_level_acct")] + fn verify_top_level_acct_parsing_error(#[case] bad_top_level_acct: &str) { + + LedgerParser::parse( + Rule::top_level_acct, bad_top_level_acct) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_top_level_acct); + assert!(false); + + } +} + + +#[cfg(test)] +mod decimal { + use super::*; + + #[rstest] + #[case ("0.00000001")] + #[case ("1.23")] + #[case ("123.456")] + #[case ("-123.456789012")] // negative values + #[case ("-0.00000001")] + fn can_parse_decimal_value(#[case] dec: &str) { + + let pairs = LedgerParser::parse( + Rule::decimal_value, dec) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + + #[cfg(test)] + #[rstest] + #[case ("0.")] // fractional segment missing + #[case ("-0.")] + #[case ("123")] + #[case ("-123")] + #[case (".12")] // whole segment missing + #[case ("-.12")] + + #[should_panic(expected = "expected decimal_value")] + fn verify_decimal_value_error(#[case] bad_dec: &str) { + + LedgerParser::parse( + Rule::decimal_value, bad_dec) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_dec); + assert!(false); + } + +} + + + +#[cfg(test)] +mod iso8601 { + use super::*; + + #[rstest] + #[case ("1900-01-01")] + #[case ("2015-12-31")] + fn can_parse_iso8601_date_extended(#[case] year: &str) { + + let pairs = LedgerParser::parse( + Rule::iso8601_date_extended, year) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + + #[rstest] + #[case ("000-01-01")] // Year out of range + #[case ("99990-01-01")] + #[case ("01-01")] // year segment missing + + #[case ("1999")] // month segment missing + #[case ("1999-")] + #[case ("0000-00-01")] // Month out of range + #[case ("0000-13-01")] + + #[case ("1999-12")] // day segment missing + #[case ("1999-12-")] + #[case ("0000-01-00")] // Day out of range + #[case ("0000-01-32")] + + #[case ("000o-01-01")] // Invalid chars + #[case ("1999-0x-12")] + #[case ("1999-12-0x")] + + #[case ("1999 12-01")] // whitespace (ensure atomic rule modifier is used) + #[case ("1999-12 01")] + #[case (" 1999-12-01")] // leading space (reqs additional rule) + #[should_panic(expected = "expected iso8601_")] // matches errors from multiple iso8601 rules + fn verify_iso8601_date_extended_error(#[case] bad_date: &str) { + + LedgerParser::parse( + Rule::iso8601_date_extended, bad_date) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_date); + assert!(false); + } +} diff --git a/pta-parser/src/parser_tests/mod.rs b/pta-parser/src/parser_tests/mod.rs new file mode 100644 index 0000000..5c366f4 --- /dev/null +++ b/pta-parser/src/parser_tests/mod.rs @@ -0,0 +1,96 @@ + +pub use super::*; +pub use pest::{Parser, iterators::Pairs}; +#[cfg(test)] +pub use rstest::rstest; + + +mod basics; +mod transaction; + + +#[cfg(test)] +mod directives { + use super::*; + + // YYYY-MM-DD open Account [ConstraintCurrency,...] ["BookingMethod"] + + #[rstest] + #[case (Rule::directive_open, "2001-09-11 open assets")] + #[case (Rule::directive_open, "2001-09-11 open assets:cash")] + #[case (Rule::directive_open, "2001-09-11 open Assets1:cash2:3petty")] + #[case (Rule::directive_close, "2001-09-11 close assets")] + #[case (Rule::directive_close, "2001-09-11 close assets1:2cash:3petty")] + #[case (Rule::directive_commodity, "2001-09-11 commodity USD")] + #[case (Rule::balance_directive, "2001-09-11 balance assets 123.456 USD")] + #[case (Rule::balance_directive, "2001-09-11 balance assets1:2cash -0.456 USD")] + fn can_parse_misc_directive(#[case] r: Rule, #[case] base: &str) { + + // NOTE: addons must end in \n to match rules + let addons = [ + "\n" + ," \n" + ,"\t\n" + ," ; comment 123 ; \n" + ,"\t;\tcomment 123 ;\t\n" + ]; + + for suffix in addons.iter() { + + let tc = format!("{}{}", base, suffix); + println!("Test case: {}", tc); + + assert!(get_pairs(r, &tc).len() > 0); + } + } + +} + + + +#[cfg(test)] +mod ledger_file { + use super::*; + + #[rstest] + #[case ( + "; an asterisk-based comment + * Accounts + 2001-09-11 open assets + 2001-09-11 open assets:cash\t;comment + 2001-09-12 close assets + + ;; Balance assertions + 2001-09-11 balance assets 123.456 USD + + + ;; Misc + 1792-01-01 commodity USD ; US Dollar + 2001-09-11 commodity BTC ; Bitcoin launch date + + ")] + fn can_parse_ledger(#[case] year: &str) { + + let pairs = LedgerParser::parse( + Rule::ledger, year) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } +} + + + + + +pub fn get_pairs(r: Rule, content: &str) -> Pairs<'_, Rule> { + let x = LedgerParser::parse( + r, + + content) + .unwrap_or_else(|e| panic!("{}", e)); + + return x; +} + diff --git a/pta-parser/src/parser_tests/transaction.rs b/pta-parser/src/parser_tests/transaction.rs new file mode 100644 index 0000000..4375dc9 --- /dev/null +++ b/pta-parser/src/parser_tests/transaction.rs @@ -0,0 +1,207 @@ +#[cfg(test)] use super::*; +#[cfg(test)] use rstest::rstest; + + + +#[cfg(test)] +mod posting { + use super::*; + + #[rstest] + #[case (" Assets:subacct1 1.0000")] + #[case ("\tEquity \t -1.0000")] + fn can_parse_posting_basic(#[case] base: &str) { + + // NOTE: addons must end in \n to match rules + let addons = [ + "\n" + ," \n" + ,"\t\n" + ," ; comment 123 ; \n" + ,"\t;\tcomment 123 ;\t\n" + ]; + + for suffix in addons.iter() { + + let tc = format!("{}{}", base, suffix); + println!("Test case: {}", tc); + + assert!(get_pairs(Rule::posting_basic, &tc).len() > 0); + } + } + + + #[rstest] + #[case (" Assets:subacct1 1.0000")] // Too many leading spaces or tabs + #[case (" \tEquity \t -1.0000")] + #[case ("\t Equity \t -1.0000")] + + #[should_panic(expected = "expected posting_basic")] // matches errors from multiple iso8601 rules + fn verify_posting_basic_error(#[case] bad_date: &str) { + + LedgerParser::parse( + Rule::posting_basic, bad_date) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_date); + assert!(false); + } +} + + +#[cfg(test)] +mod trans_block { + use super::*; + + + // An example beancount transaction + // 2016-01-28 * " Buy BTC" ; 10:01 am, xfer id 56aa57787199a73d29000650 + // Assets:Exchanges:Coinbase 1.03683606 BTC { 381.9697397 USD, 2016-01-28 } + // Assets:Bank:AllyChk -400.00 USD ; verified w/register + // Liabilities:Fees:Coinbase 3.96 USD + // Liabilities:Fees:Adjustment 0.00000005 USD + + #[rstest] + // #[ignore = "wip"] + #[case ("2009-01-09 ! \"Bitcoin launch date\" ;comment \n\tAssets 1.0000 ;posting comment\n\tEquity -1.0000 \n")] + #[case ("2009-01-09 ! \"Bitcoin launch date\"\n\tassets 1.0000\n equity -1.0000\n")] + fn can_parse_trans_block(#[case] tblock: &str) { + + let pairs = LedgerParser::parse( + Rule::transaction_block, &tblock) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + #[rstest] + // #[ignore = "wip"] + #[case ("2009-01-09 ! \"Bitcoin launch date\" + ")] + #[should_panic(expected = "expected transaction_block")] + fn verify_trans_block_posting_error(#[case] bad_block: &str) { + LedgerParser::parse( + Rule::transaction_block, &bad_block) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_block); + assert!(false); + } + + // REVIEW: Are these cases duplicative of trans_header tests? + #[rstest] + #[ignore = "wip"] + #[case ("2009-01-09 ! \"Bitcoin launch date\"")] + #[should_panic(expected = "expected trans_header")] + fn verify_trans_block_trans_header_error(#[case] bad_block: &str) { + LedgerParser::parse( + Rule::transaction_block, &bad_block) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_block); + assert!(false); + } + +} + + + +#[cfg(test)] +mod trans_header { + use super::*; + + + #[rstest] + // NOTE: use simple text in case; test function wraps in dbl quotes + #[case ("a")] + #[case ("description")] + #[case (" a description ")] + #[case ("\ta description\twith tabs ")] + fn can_parse_trans_descr(#[case] descr: &str) { + + let quoted_descr = format!("\"{}\"", descr); + let pairs = LedgerParser::parse( + Rule::trans_description, "ed_descr) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + + #[rstest] + // NOTE: use simple text in case; test function wraps in dbl quotes + #[case ("")] // empty - no text + #[case (" ")] // empty - only ws + #[case ("\ta description\twith tabs and\n a newline")] // newline is invalid + #[should_panic(expected = "expected trans_")] + fn verify_trans_descr_error(#[case] bad_descr: &str) { + + let quoted_bad_descr = format!("\"{}\"", bad_descr); + LedgerParser::parse( + Rule::trans_description, "ed_bad_descr) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", quoted_bad_descr); + assert!(false); + } + + + + #[rstest] + // Verify transaction annotations: !, *, txn + #[case ("2009-01-09 ! \"Bitcoin launch date\"")] + #[case ("2009-01-09 * \"Bitcoin launch date\"")] + #[case ("2009-01-09 txn \"Bitcoin launch date\"")] + // whitespace variations + #[case ("2010-01-09 * \"multi whitespace test\"")] + #[case ("2011-01-09\t!\t\"tab test\"")] + #[case ("2011-01-09\ttxn\t\"tab test\"")] + #[case ("2012-01-09 * \"trailing tab test\"\t")] + #[case ("2013-01-09 ! \"trailing spaces test\" ")] + #[case ("2014-01-09 ! \"trailing tabs and spaces test\" \t \t\t ")] + // #[ignore = "TBD: handle special chars in transaction description"] + // #[case ("2009-01-09 ! \"Special chars in description: !@#$%^&*()-_=+\"")] + fn can_parse_trans_header(#[case] base: &str) { + + // NOTE: addons must end in \n to match rules + let addons = [ + "\n" + ," \n" + ,"\t\n" + ," ; comment 123 ; \n" + ,"\t;\tcomment 123 ;\t\n" + ]; + + for suffix in addons.iter() { + + let tc = format!("{}{}", base, suffix); + println!("Test case: {}", tc); + + assert!(get_pairs(Rule::trans_header, &tc).len() > 0); + } + + } + + #[rstest] + #[case ("2016-01-28 * \"comment after description w/o whitespace\"; 10:01 am, xfer id 56aa57787199a73d29000650\n")] + #[should_panic(expected = "expected trans_header")] + fn verify_trans_header_error(#[case] bad_hdr: &str) { + + let quoted_bad_descr = format!("\"{}\"", bad_hdr); + LedgerParser::parse( + Rule::trans_header, "ed_bad_descr) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", quoted_bad_descr); + assert!(false); + } + + +} From d53688446cf1619ff35b5349a8a99d59f80c4121 Mon Sep 17 00:00:00 2001 From: jburnett Date: Mon, 9 Oct 2023 17:25:32 -0400 Subject: [PATCH 11/23] add simple transactions to ledger file --- testdata/basic-ledger | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/testdata/basic-ledger b/testdata/basic-ledger index 283d068..87cbe0c 100644 --- a/testdata/basic-ledger +++ b/testdata/basic-ledger @@ -18,11 +18,14 @@ * Transactions -;2009-01-09 ! "Bitcoin launch date" -; assets:subacct1 1.0000 -; equity -1.0000 +2009-01-09 ! "Bitcoin launch date" + assets:subacct1 1.0000 + equity -1.0000 -;2001-09-12 * "some transaction" -; assets 1234.5678 -; equity -1234.5678 +2001-09-12 * "some transaction" + assets 1234.5678 + equity -1234.5678 +; Transaction with a single posting - not a valid transaction, but should parse. +2001-12-31 txn "txn" + assets 0.00000000 From 2f9d33b94f50e85aa3d43a9ad9e8faeb645dacbb Mon Sep 17 00:00:00 2001 From: jburnett Date: Mon, 9 Oct 2023 17:37:41 -0400 Subject: [PATCH 12/23] Consolidate pest rules to simplify --- cli/src/main.rs | 3 --- pta-parser/src/grammars/ledger.pest | 10 ++++++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index e279574..56b91ec 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -52,9 +52,6 @@ fn main() -> Result<(), Box> { Rule::directives => { dump_pair(&pair); } Rule::empty_line => { dump_pair(&pair); } Rule::iso8601_date_extended => { dump_pair(&pair); } - Rule::iso8601_day => { dump_pair(&pair); } - Rule::iso8601_month => { dump_pair(&pair); } - Rule::iso8601_year => { dump_pair(&pair); } Rule::ledger => { dump_pair(&pair); } Rule::options => { dump_pair(&pair); } Rule::posting_basic => { dump_pair(&pair); } diff --git a/pta-parser/src/grammars/ledger.pest b/pta-parser/src/grammars/ledger.pest index 6a23881..91f9834 100644 --- a/pta-parser/src/grammars/ledger.pest +++ b/pta-parser/src/grammars/ledger.pest @@ -36,10 +36,12 @@ acct_descriptor = @{ top_level_acct ~ (sub_acct)* } decimal_value = @{ (("-" ~ NUMBER+) | NUMBER+) ~ "." ~ NUMBER+ } -iso8601_year = { ASCII_DIGIT{4} } -iso8601_month = @{ ( "0" ~ ASCII_NONZERO_DIGIT) | ("1" ~ '0'..'2') } -iso8601_day = @{ ("30" | "31") | ("0" ~ ASCII_NONZERO_DIGIT) | ('1'..'2' ~ ASCII_DIGIT) } -iso8601_date_extended = @{ iso8601_year ~ "-" ~ iso8601_month ~ "-" ~ iso8601_day } // YYYY-MM-DD + +iso8601_date_extended = @{ + ASCII_DIGIT{4} + ~ "-" ~ (( "0" ~ ASCII_NONZERO_DIGIT) | ("1" ~ '0'..'2')) + ~ "-" ~ (("30" | "31") | ("0" ~ ASCII_NONZERO_DIGIT) | ('1'..'2' ~ ASCII_DIGIT)) +} // YYYY-MM-DD // TODO: consider more lax indent rules From cbec9e308fde95ae27973107ae6636807e9dfbc2 Mon Sep 17 00:00:00 2001 From: jburnett Date: Tue, 10 Oct 2023 22:37:18 -0400 Subject: [PATCH 13/23] WIP: consuming parser --- cli/Cargo.toml | 2 +- cli/src/main.rs | 162 ++++++++++++------ pta-parser/Cargo.toml | 1 + pta-parser/src/grammars/ledger.pest | 54 +++--- pta-parser/src/ledger_parser/mod.rs | 146 ++++++++++++++++ pta-parser/src/parser_tests/transaction.rs | 184 +++++++++++---------- 6 files changed, 386 insertions(+), 163 deletions(-) diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 36fec0c..85347c5 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -6,4 +6,4 @@ edition = "2021" [dependencies] pest = "2.7.3" -pta-parser = { path = "../pta-parser" } \ No newline at end of file +pta-parser = { path = "../pta-parser" } diff --git a/cli/src/main.rs b/cli/src/main.rs index 56b91ec..938becb 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -5,7 +5,7 @@ use pest::{*, iterators::Pair}; use pta_parser::{LedgerParser, Rule}; fn main() -> Result<(), Box> { - // TODO: CLI improvements + // TODO: CLI improvements // - exec with path of file to parse // - optionally output parse results (should be equivalent to input file) @@ -20,58 +20,80 @@ fn main() -> Result<(), Box> { Ok(ledger) => { println!("Read string length: {}", ledger.len()); - match LedgerParser::parse(Rule::ledger, &ledger) { - Ok(root) => { - for pair in root.into_iter() { - // println!("\n{:?}", pair.as_span()); - // println!("\n{:?}", pair.as_rule()); - - match pair.as_rule() { - Rule::comment => { - dump_pair(&pair); - - // println!("Comment: {:?}", pair.as_span()); - - // for comment in pair.into_iter() { - // println!("{:?}", comment); - // } - } - - Rule::EOI => { dump_pair(&pair); } - Rule::WHITESPACE => { dump_pair(&pair); } - Rule::acct_descriptor => { dump_pair(&pair); } - Rule::acct_separator => { dump_pair(&pair); } - Rule::balance_directive => { dump_pair(&pair); } - Rule::comment_or_newline => { dump_pair(&pair); } - Rule::comment_token => { dump_pair(&pair); } - Rule::currency => { dump_pair(&pair); } - Rule::decimal_value => { dump_pair(&pair); } - Rule::directive_close => { dump_pair(&pair); } - Rule::directive_commodity => { dump_pair(&pair); } - Rule::directive_open => { dump_pair(&pair); } - Rule::directives => { dump_pair(&pair); } - Rule::empty_line => { dump_pair(&pair); } - Rule::iso8601_date_extended => { dump_pair(&pair); } - Rule::ledger => { dump_pair(&pair); } - Rule::options => { dump_pair(&pair); } - Rule::posting_basic => { dump_pair(&pair); } - Rule::posting_indent => { dump_pair(&pair); } - Rule::sub_acct => { dump_pair(&pair); } - Rule::top_level_acct => { dump_pair(&pair); } - Rule::trans_annotation => { dump_pair(&pair); } - Rule::trans_description => { dump_pair(&pair); } - Rule::trans_description_text => { dump_pair(&pair); } - Rule::trans_header => { dump_pair(&pair); } - Rule::transaction_block => { dump_pair(&pair); } - } - } - } + // return main_consume(&ledger); + + return main_parse(&ledger); + } + + Err(e) => { + println!("ERR: {}", e); + return Err(Box::new(e)); + } + } + +} + + +#[allow(dead_code)] +fn main_consume(ledger: &String) -> Result<(), Box> { + + match LedgerParser::parse(Rule::ledger, &ledger) { + Ok(root) => { + return Ok(()); + } + + Err(e) => { + println!("ERR: {}", e); + return Err(Box::new(e)); + } + } + // return Ok(()); - Err(e) => { - println!("ERR: {}", e); - return Err(Box::new(e)); +} + + +#[allow(dead_code)] +fn main_parse(ledger: &String) -> Result<(), Box> { + + match LedgerParser::parse(Rule::ledger, &ledger) { + Ok(root) => { + for pair in root.into_iter() { + // println!("\n{:?}", pair.as_span()); + // println!("\n{:?}", pair.as_rule()); + + match pair.as_rule() { + Rule::comment => { + dump_pair(&pair); + } + + Rule::EOI => { dump_pair(&pair); } + Rule::WHITESPACE => { dump_pair(&pair); } + Rule::acct_descriptor => { dump_pair(&pair); } + Rule::acct_separator => { dump_pair(&pair); } + Rule::balance_directive => { dump_pair(&pair); } + Rule::comment_or_newline => { dump_pair(&pair); } + Rule::comment_token => { dump_pair(&pair); } + Rule::currency => { dump_pair(&pair); } + Rule::decimal_value => { dump_pair(&pair); } + Rule::directive_close => { dump_pair(&pair); } + Rule::directive_commodity => { dump_pair(&pair); } + Rule::directive_open => { dump_pair(&pair); } + Rule::directives => { dump_pair(&pair); } + Rule::empty_line => { dump_pair(&pair); } + Rule::iso8601_date_extended => { dump_pair(&pair); } + Rule::ledger => { dump_pair(&pair); } + Rule::options => { dump_pair(&pair); } + Rule::posting_basic => { dump_pair(&pair); } + Rule::posting_indent => { dump_pair(&pair); } + Rule::sub_acct => { dump_pair(&pair); } + Rule::top_level_acct => { dump_pair(&pair); } + Rule::trans_annotation => { dump_pair(&pair); } + Rule::trans_description => { dump_pair(&pair); } + Rule::trans_description_text => { dump_pair(&pair); } + Rule::trans_header => { dump_pair(&pair); } + Rule::transaction_block => { dump_pair(&pair); } } - } + } } Err(e) => { @@ -83,14 +105,52 @@ fn main() -> Result<(), Box> { return Ok(()); } + fn dump_rule(r:&Rule, s:&Span) { println!("\nRULE: {:?}", &r); println!("\n{:?}", &s); } + fn dump_pair(p:&Pair) { dump_rule(&p.as_rule(), &p.as_span()); } + + +fn handle_pair(pair: Pair<'_, Rule>) { + match pair.as_rule() { + Rule::comment => { dump_pair(&pair); } + Rule::EOI => { dump_pair(&pair); } + Rule::WHITESPACE => { dump_pair(&pair); } + Rule::acct_descriptor => { dump_pair(&pair); } + Rule::acct_separator => { dump_pair(&pair); } + Rule::balance_directive => { dump_pair(&pair); } + Rule::comment_or_newline => { dump_pair(&pair); } + Rule::comment_token => { dump_pair(&pair); } + Rule::currency => { dump_pair(&pair); } + Rule::decimal_value => { dump_pair(&pair); } + Rule::directive_close => { dump_pair(&pair); } + Rule::directive_commodity => { dump_pair(&pair); } + Rule::directive_open => { dump_pair(&pair); } + Rule::directives => { dump_pair(&pair); } + Rule::empty_line => { dump_pair(&pair); } + Rule::iso8601_date_extended => { dump_pair(&pair); } + Rule::ledger => { dump_pair(&pair); } + Rule::options => { dump_pair(&pair); } + Rule::posting_basic => { dump_pair(&pair); } + Rule::posting_indent => { dump_pair(&pair); } + Rule::sub_acct => { dump_pair(&pair); } + Rule::top_level_acct => { dump_pair(&pair); } + Rule::trans_annotation => { dump_pair(&pair); } + Rule::trans_description => { dump_pair(&pair); } + Rule::trans_description_text => { dump_pair(&pair); } + Rule::trans_header => { dump_pair(&pair); } + Rule::transaction_block => { dump_pair(&pair); } + } +} + + + #[cfg(test)] mod cli_tests { diff --git a/pta-parser/Cargo.toml b/pta-parser/Cargo.toml index 9ba78a5..473051d 100644 --- a/pta-parser/Cargo.toml +++ b/pta-parser/Cargo.toml @@ -9,6 +9,7 @@ bench = false [dependencies] pest = "2.7.3" +pest_consume = "1.1.3" pest_derive = "2.7.3" [dev-dependencies] diff --git a/pta-parser/src/grammars/ledger.pest b/pta-parser/src/grammars/ledger.pest index 91f9834..6bd600d 100644 --- a/pta-parser/src/grammars/ledger.pest +++ b/pta-parser/src/grammars/ledger.pest @@ -2,28 +2,19 @@ // // This project is licensed under the terms of the MIT license (cf. LICENSE file in root). // -// Pest's built-in rules: -// ASCII_ALPHA_LOWER = { 'a'..'z' } -// ASCII_ALPHA_UPPER = { 'A'..'Z' } -// ASCII_ALPHA = { ASCII_ALPHA_LOWER | ASCII_ALPHA_UPPER } -// ASCII_DIGIT = { '0'..'9' } -// ASCII_ALPHANUMERIC = { ASCII_ALPHA | ASCII-DIGIT } -// -// Avoid using WHITE_SPACE which targets [unicode](https://www.unicode.org/reports/tr31/#R3a) -// WHITESPACE = _{ " " | "\t" } // constants -acct_separator = { ":" } -comment_token = { ";" | "*" } +acct_separator = _{ ":" } +comment_token = _{ ";" | "*" } // TODO: need to handle escaped semi-colon? // TODO: consider whether comment must be preceded by whitespace (except at beginning of line) // a comment -comment = { comment_token ~ (!NEWLINE ~ ANY)* ~ NEWLINE } -comment_or_newline = { (WHITESPACE+ ~ comment) | (WHITESPACE* ~ NEWLINE) } -empty_line = { WHITESPACE* ~ NEWLINE } +comment = _{ comment_token ~ (!NEWLINE ~ ANY)* ~ NEWLINE } +comment_or_newline = _{ (WHITESPACE+ ~ comment) | (WHITESPACE* ~ NEWLINE) } +empty_line = _{ WHITESPACE* ~ NEWLINE } // Each acct token must begin with alpha and may be followed by any number of alpha or number // Full account descriptors are comprised of colon-separated account names. The top-level @@ -36,16 +27,21 @@ acct_descriptor = @{ top_level_acct ~ (sub_acct)* } decimal_value = @{ (("-" ~ NUMBER+) | NUMBER+) ~ "." ~ NUMBER+ } - +// ISO8601 Date Extended format is YYYY-MM-DD where +// YYYY is 4 digits; 0000-9999 +// MM is 2 digits; 01-09, 10-12 +// DD is 2 digits; 01-09, 10-29, 30, 31 iso8601_date_extended = @{ ASCII_DIGIT{4} ~ "-" ~ (( "0" ~ ASCII_NONZERO_DIGIT) | ("1" ~ '0'..'2')) ~ "-" ~ (("30" | "31") | ("0" ~ ASCII_NONZERO_DIGIT) | ('1'..'2' ~ ASCII_DIGIT)) -} // YYYY-MM-DD +} // TODO: consider more lax indent rules -posting_indent = { "\t" | " "{2} } +// Posting lines of a transaction must begin with 1 tab or 2 spaces +posting_indent = _{ "\t" | " "{2} } +// A basic posting must specify an account and a value, ending with a comment or newline posting_basic = @{ posting_indent ~ acct_descriptor @@ -54,12 +50,13 @@ posting_basic = @{ } // TODO: improve on 'text' to allow more in description -trans_description_text = { (ASCII_ALPHANUMERIC+ | WHITESPACE)+ } +trans_description_text = _{ (ASCII_ALPHANUMERIC+ | WHITESPACE)+ } // TODO: is this the full set of annotation options? -trans_annotation = { "txn" | "*" | "!" } -trans_description = { "\"" ~ trans_description_text ~ "\"" } +trans_annotation = _{ "txn" | "*" | "!" } +trans_description = _{ "\"" ~ trans_description_text ~ "\"" } // TODO: how to ensure col 0 / no ws for header row +// The header of a transaction specifies the date, an annotation, a description, and ends with a comment or newline trans_header = @{ iso8601_date_extended ~ WHITESPACE+ @@ -69,6 +66,8 @@ trans_header = @{ ~ comment_or_newline } +// A transaction begins with a single header followed by one or more postings. Whether the transaction balances is +// outside the scope of parsing. transaction_block = @{ trans_header ~ posting_basic+ } @@ -121,4 +120,17 @@ balance_directive = @{ directives = { balance_directive | directive_close | directive_commodity | directive_open } -ledger = { SOI ~ (options | directives | transaction_block | comment | empty_line)+ ~ EOI } \ No newline at end of file +// The rule for a complete ledger +ledger = { SOI ~ (options | directives | transaction_block | comment | empty_line)+ ~ EOI } + + +// +// Pest's built-in rules: +// ASCII_ALPHA_LOWER = { 'a'..'z' } +// ASCII_ALPHA_UPPER = { 'A'..'Z' } +// ASCII_ALPHA = { ASCII_ALPHA_LOWER | ASCII_ALPHA_UPPER } +// ASCII_DIGIT = { '0'..'9' } +// ASCII_ALPHANUMERIC = { ASCII_ALPHA | ASCII-DIGIT } +// +// Avoid using WHITE_SPACE which targets [unicode](https://www.unicode.org/reports/tr31/#R3a) +// diff --git a/pta-parser/src/ledger_parser/mod.rs b/pta-parser/src/ledger_parser/mod.rs index 82df59a..efb2acb 100644 --- a/pta-parser/src/ledger_parser/mod.rs +++ b/pta-parser/src/ledger_parser/mod.rs @@ -9,3 +9,149 @@ use pest_derive::*; #[derive(Parser)] #[grammar = "./grammars/ledger.pest"] pub struct LedgerParser; + + +#[derive(Default)] +struct RawAccountDescriptor { + path: String +} + + +#[derive(Default)] +struct RawTransaction { + date: String, + anno: String, + desc: String, + postings: Vec, + comment: String +} + +#[derive(Default)] +struct RawPosting { + acct: RawAccountDescriptor, + value: f64, + comment: String +} + + +use pest_consume::{Error, match_nodes}; +type Result = std::result::Result>; +type Node<'i> = pest_consume::Node<'i, Rule, ()>; + +#[pest_consume::parser] +impl LedgerParser { + fn EOI(_input: Node) -> Result<()> { + print!("EOI found"); + Ok(()) + } + + fn acct_descriptor(input: Node) -> Result { + Ok( RawAccountDescriptor { path: input.to_string() }) + } + + fn comment(input: Node) -> Result { + print!("comment found; {}", input.as_str()); + Ok(input.to_string()) + } + + fn decimal_value(input: Node) -> Result { + // Get the string captured by this node + input.as_str() + // Convert it into the type we want + .parse::() + // In case of an error, we use `Node::error` to link the error + // with the part of the input that caused it + .map_err(|e| input.error(e)) + } + + + fn iso8601_date_extended(input: Node) -> Result { + Ok(input.to_string()) + } + + fn trans_annotation(input: Node) -> Result { + Ok( input.to_string() ) + } + fn trans_description(input: Node) -> Result { + Ok( input.to_string() ) + } + + fn transaction_block(input: Node) -> Result { + let mut rt = RawTransaction::default(); + + match_nodes!(input.into_children(); + [trans_header(hdr)] => { + // TODO: copy from returned + rt = hdr + }, + [posting_basic(pb)] => rt.postings.push(pb), + [comment(c)] => rt.comment = c, + ); + + Ok(rt) + } + + // trans_header = @{ + // iso8601_date_extended + // ~ WHITESPACE+ + // ~ trans_annotation + // ~ WHITESPACE+ + // ~ trans_description + // ~ comment_or_newline + // } + fn trans_header(input: Node) -> Result { + let mut rt = RawTransaction::default(); + + match_nodes!(input.into_children(); + [iso8601_date_extended(ide)] => rt.date = ide, + [trans_annotation(a)] => rt.anno = a, + [trans_description(d)] => rt.desc = d, + [comment(c)] => rt.comment = c, + ); + + Ok(rt) + } + + // posting_basic = @{ + // posting_indent + // ~ acct_descriptor + // ~ WHITESPACE+ ~ decimal_value + // ~ comment_or_newline + // } + fn posting_basic(input: Node) -> Result { + let mut p = RawPosting::default(); + + match_nodes!(input.into_children(); + [acct_descriptor(ad)] => p.acct = ad, + [decimal_value(v)] => p.value = v, + [comment(c)] => p.comment = c, + ); + + Ok(p) + } + + // directives = { balance_directive | directive_close | directive_commodity | directive_open } + + // fn directives(input: Node) -> Result { + // match_nodes!(input.into_children(); + // [balance_directive(dir)] => dir, + // [directive_close(dir)] => dir, + + // ); + + // Ok("WIP".to_string()) + // } + + fn balance_directive(input: Node) -> Result { + match_nodes!(input.into_children(); + [iso8601_date_extended(d)] => Ok(d), + ) + } + + fn directive_close(input: Node) -> Result { + match_nodes!(input.into_children(); + [iso8601_date_extended(d)] => Ok(d), + ) + } + +} \ No newline at end of file diff --git a/pta-parser/src/parser_tests/transaction.rs b/pta-parser/src/parser_tests/transaction.rs index 4375dc9..2d8f618 100644 --- a/pta-parser/src/parser_tests/transaction.rs +++ b/pta-parser/src/parser_tests/transaction.rs @@ -109,99 +109,103 @@ mod trans_block { } - -#[cfg(test)] -mod trans_header { - use super::*; - - - #[rstest] - // NOTE: use simple text in case; test function wraps in dbl quotes - #[case ("a")] - #[case ("description")] - #[case (" a description ")] - #[case ("\ta description\twith tabs ")] - fn can_parse_trans_descr(#[case] descr: &str) { - - let quoted_descr = format!("\"{}\"", descr); - let pairs = LedgerParser::parse( - Rule::trans_description, "ed_descr) - .unwrap_or_else(|e| panic!("{}", e)); - - // Parsing succeeded; ensure at least 1 pair was returned - assert!(pairs.len() > 0); - } - - - #[rstest] - // NOTE: use simple text in case; test function wraps in dbl quotes - #[case ("")] // empty - no text - #[case (" ")] // empty - only ws - #[case ("\ta description\twith tabs and\n a newline")] // newline is invalid - #[should_panic(expected = "expected trans_")] - fn verify_trans_descr_error(#[case] bad_descr: &str) { - - let quoted_bad_descr = format!("\"{}\"", bad_descr); - LedgerParser::parse( - Rule::trans_description, "ed_bad_descr) - .unwrap_or_else(|e| panic!("{}", e)); - - // should never reach this code since all cases should result in panic - println!("Test case '{}' should fail to parse!", quoted_bad_descr); - assert!(false); - } - - - - #[rstest] - // Verify transaction annotations: !, *, txn - #[case ("2009-01-09 ! \"Bitcoin launch date\"")] - #[case ("2009-01-09 * \"Bitcoin launch date\"")] - #[case ("2009-01-09 txn \"Bitcoin launch date\"")] - // whitespace variations - #[case ("2010-01-09 * \"multi whitespace test\"")] - #[case ("2011-01-09\t!\t\"tab test\"")] - #[case ("2011-01-09\ttxn\t\"tab test\"")] - #[case ("2012-01-09 * \"trailing tab test\"\t")] - #[case ("2013-01-09 ! \"trailing spaces test\" ")] - #[case ("2014-01-09 ! \"trailing tabs and spaces test\" \t \t\t ")] - // #[ignore = "TBD: handle special chars in transaction description"] - // #[case ("2009-01-09 ! \"Special chars in description: !@#$%^&*()-_=+\"")] - fn can_parse_trans_header(#[case] base: &str) { - - // NOTE: addons must end in \n to match rules - let addons = [ - "\n" - ," \n" - ,"\t\n" - ," ; comment 123 ; \n" - ,"\t;\tcomment 123 ;\t\n" - ]; - - for suffix in addons.iter() { - - let tc = format!("{}{}", base, suffix); - println!("Test case: {}", tc); - - assert!(get_pairs(Rule::trans_header, &tc).len() > 0); - } +//=========== +// NOTE: The tests in trans_header can be used by removing the silent indicator ('_') from the relevant pest rules. +// These rules were silenced to simplify processing in code (matching, etc.), but can be un-silenced for debugging, etc. +//=========== + +// #[cfg(test)] +// mod trans_header { +// use super::*; + + +// #[rstest] +// // NOTE: use simple text in case; test function wraps in dbl quotes +// #[case ("a")] +// #[case ("description")] +// #[case (" a description ")] +// #[case ("\ta description\twith tabs ")] +// fn can_parse_trans_descr(#[case] descr: &str) { + +// let quoted_descr = format!("\"{}\"", descr); +// let pairs = LedgerParser::parse( +// Rule::trans_description, "ed_descr) +// .unwrap_or_else(|e| panic!("{}", e)); + +// // Parsing succeeded; ensure at least 1 pair was returned +// assert!(pairs.len() > 0); +// } + + +// #[rstest] +// // NOTE: use simple text in case; test function wraps in dbl quotes +// #[case ("")] // empty - no text +// #[case (" ")] // empty - only ws +// #[case ("\ta description\twith tabs and\n a newline")] // newline is invalid +// #[should_panic(expected = "expected trans_")] +// fn verify_trans_descr_error(#[case] bad_descr: &str) { + +// let quoted_bad_descr = format!("\"{}\"", bad_descr); +// LedgerParser::parse( +// Rule::trans_description, "ed_bad_descr) +// .unwrap_or_else(|e| panic!("{}", e)); + +// // should never reach this code since all cases should result in panic +// println!("Test case '{}' should fail to parse!", quoted_bad_descr); +// assert!(false); +// } + + + +// #[rstest] +// // Verify transaction annotations: !, *, txn +// #[case ("2009-01-09 ! \"Bitcoin launch date\"")] +// #[case ("2009-01-09 * \"Bitcoin launch date\"")] +// #[case ("2009-01-09 txn \"Bitcoin launch date\"")] +// // whitespace variations +// #[case ("2010-01-09 * \"multi whitespace test\"")] +// #[case ("2011-01-09\t!\t\"tab test\"")] +// #[case ("2011-01-09\ttxn\t\"tab test\"")] +// #[case ("2012-01-09 * \"trailing tab test\"\t")] +// #[case ("2013-01-09 ! \"trailing spaces test\" ")] +// #[case ("2014-01-09 ! \"trailing tabs and spaces test\" \t \t\t ")] +// // #[ignore = "TBD: handle special chars in transaction description"] +// // #[case ("2009-01-09 ! \"Special chars in description: !@#$%^&*()-_=+\"")] +// fn can_parse_trans_header(#[case] base: &str) { + +// // NOTE: addons must end in \n to match rules +// let addons = [ +// "\n" +// ," \n" +// ,"\t\n" +// ," ; comment 123 ; \n" +// ,"\t;\tcomment 123 ;\t\n" +// ]; + +// for suffix in addons.iter() { + +// let tc = format!("{}{}", base, suffix); +// println!("Test case: {}", tc); + +// assert!(get_pairs(Rule::trans_header, &tc).len() > 0); +// } - } +// } - #[rstest] - #[case ("2016-01-28 * \"comment after description w/o whitespace\"; 10:01 am, xfer id 56aa57787199a73d29000650\n")] - #[should_panic(expected = "expected trans_header")] - fn verify_trans_header_error(#[case] bad_hdr: &str) { +// #[rstest] +// #[case ("2016-01-28 * \"comment after description w/o whitespace\"; 10:01 am, xfer id 56aa57787199a73d29000650\n")] +// #[should_panic(expected = "expected trans_header")] +// fn verify_trans_header_error(#[case] bad_hdr: &str) { - let quoted_bad_descr = format!("\"{}\"", bad_hdr); - LedgerParser::parse( - Rule::trans_header, "ed_bad_descr) - .unwrap_or_else(|e| panic!("{}", e)); +// let quoted_bad_descr = format!("\"{}\"", bad_hdr); +// LedgerParser::parse( +// Rule::trans_header, "ed_bad_descr) +// .unwrap_or_else(|e| panic!("{}", e)); - // should never reach this code since all cases should result in panic - println!("Test case '{}' should fail to parse!", quoted_bad_descr); - assert!(false); - } +// // should never reach this code since all cases should result in panic +// println!("Test case '{}' should fail to parse!", quoted_bad_descr); +// assert!(false); +// } -} +// } From 777117230fbc3d33779b4e7261355d9dcf9c1069 Mon Sep 17 00:00:00 2001 From: jburnett Date: Wed, 18 Oct 2023 19:05:13 -0400 Subject: [PATCH 14/23] Removed pest_consume --- cli/src/main.rs | 22 +---- journal.md | 7 ++ pta-parser/Cargo.toml | 1 - pta-parser/src/ledger_parser/mod.rs | 123 ---------------------------- 4 files changed, 8 insertions(+), 145 deletions(-) create mode 100644 journal.md diff --git a/cli/src/main.rs b/cli/src/main.rs index 938becb..fe8e6e7 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -19,9 +19,6 @@ fn main() -> Result<(), Box> { match std::fs::read_to_string(p) { Ok(ledger) => { println!("Read string length: {}", ledger.len()); - - // return main_consume(&ledger); - return main_parse(&ledger); } @@ -34,25 +31,8 @@ fn main() -> Result<(), Box> { } -#[allow(dead_code)] -fn main_consume(ledger: &String) -> Result<(), Box> { - - match LedgerParser::parse(Rule::ledger, &ledger) { - Ok(root) => { - return Ok(()); - } - - Err(e) => { - println!("ERR: {}", e); - return Err(Box::new(e)); - } - } - // return Ok(()); - -} - -#[allow(dead_code)] +#[allow(dead_code)] // allows switching b/t mains in primary main above fn main_parse(ledger: &String) -> Result<(), Box> { match LedgerParser::parse(Rule::ledger, &ledger) { diff --git a/journal.md b/journal.md new file mode 100644 index 0000000..2f350e7 --- /dev/null +++ b/journal.md @@ -0,0 +1,7 @@ +# Plain-Text Accounting Parser + +## History + +### 10/18/2023 + +Abandonded effort to integrate [pest_consume](https://lib.rs/crates/pest_consume) since the author is no longer maintaining it. diff --git a/pta-parser/Cargo.toml b/pta-parser/Cargo.toml index 473051d..9ba78a5 100644 --- a/pta-parser/Cargo.toml +++ b/pta-parser/Cargo.toml @@ -9,7 +9,6 @@ bench = false [dependencies] pest = "2.7.3" -pest_consume = "1.1.3" pest_derive = "2.7.3" [dev-dependencies] diff --git a/pta-parser/src/ledger_parser/mod.rs b/pta-parser/src/ledger_parser/mod.rs index efb2acb..ffd786c 100644 --- a/pta-parser/src/ledger_parser/mod.rs +++ b/pta-parser/src/ledger_parser/mod.rs @@ -32,126 +32,3 @@ struct RawPosting { value: f64, comment: String } - - -use pest_consume::{Error, match_nodes}; -type Result = std::result::Result>; -type Node<'i> = pest_consume::Node<'i, Rule, ()>; - -#[pest_consume::parser] -impl LedgerParser { - fn EOI(_input: Node) -> Result<()> { - print!("EOI found"); - Ok(()) - } - - fn acct_descriptor(input: Node) -> Result { - Ok( RawAccountDescriptor { path: input.to_string() }) - } - - fn comment(input: Node) -> Result { - print!("comment found; {}", input.as_str()); - Ok(input.to_string()) - } - - fn decimal_value(input: Node) -> Result { - // Get the string captured by this node - input.as_str() - // Convert it into the type we want - .parse::() - // In case of an error, we use `Node::error` to link the error - // with the part of the input that caused it - .map_err(|e| input.error(e)) - } - - - fn iso8601_date_extended(input: Node) -> Result { - Ok(input.to_string()) - } - - fn trans_annotation(input: Node) -> Result { - Ok( input.to_string() ) - } - fn trans_description(input: Node) -> Result { - Ok( input.to_string() ) - } - - fn transaction_block(input: Node) -> Result { - let mut rt = RawTransaction::default(); - - match_nodes!(input.into_children(); - [trans_header(hdr)] => { - // TODO: copy from returned - rt = hdr - }, - [posting_basic(pb)] => rt.postings.push(pb), - [comment(c)] => rt.comment = c, - ); - - Ok(rt) - } - - // trans_header = @{ - // iso8601_date_extended - // ~ WHITESPACE+ - // ~ trans_annotation - // ~ WHITESPACE+ - // ~ trans_description - // ~ comment_or_newline - // } - fn trans_header(input: Node) -> Result { - let mut rt = RawTransaction::default(); - - match_nodes!(input.into_children(); - [iso8601_date_extended(ide)] => rt.date = ide, - [trans_annotation(a)] => rt.anno = a, - [trans_description(d)] => rt.desc = d, - [comment(c)] => rt.comment = c, - ); - - Ok(rt) - } - - // posting_basic = @{ - // posting_indent - // ~ acct_descriptor - // ~ WHITESPACE+ ~ decimal_value - // ~ comment_or_newline - // } - fn posting_basic(input: Node) -> Result { - let mut p = RawPosting::default(); - - match_nodes!(input.into_children(); - [acct_descriptor(ad)] => p.acct = ad, - [decimal_value(v)] => p.value = v, - [comment(c)] => p.comment = c, - ); - - Ok(p) - } - - // directives = { balance_directive | directive_close | directive_commodity | directive_open } - - // fn directives(input: Node) -> Result { - // match_nodes!(input.into_children(); - // [balance_directive(dir)] => dir, - // [directive_close(dir)] => dir, - - // ); - - // Ok("WIP".to_string()) - // } - - fn balance_directive(input: Node) -> Result { - match_nodes!(input.into_children(); - [iso8601_date_extended(d)] => Ok(d), - ) - } - - fn directive_close(input: Node) -> Result { - match_nodes!(input.into_children(); - [iso8601_date_extended(d)] => Ok(d), - ) - } - -} \ No newline at end of file From c96a3af11449af2242ed62a2acf7b9d63b4cca9c Mon Sep 17 00:00:00 2001 From: jburnett Date: Wed, 18 Oct 2023 19:05:28 -0400 Subject: [PATCH 15/23] vscode settings --- .vscode/settings.json | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..08065e9 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "rust-analyzer.linkedProjects": [ + "./pta-parser/Cargo.toml" + ] +} \ No newline at end of file From 1beb7fd7a79289d5d9eb9ec16f258bc4989d970e Mon Sep 17 00:00:00 2001 From: jburnett Date: Thu, 19 Oct 2023 15:11:11 -0400 Subject: [PATCH 16/23] moving parse handling to builder; refactor cli to use it --- Cargo.toml | 2 + cli/Cargo.toml | 3 + cli/src/main.rs | 118 ++---------------- pta-ledger/Cargo.toml | 19 +++ pta-ledger/src/ledger_builder.rs | 184 ++++++++++++++++++++++++++++ pta-ledger/src/lib.rs | 9 ++ pta-parser/Cargo.toml | 2 + pta-parser/src/ledger_parser/mod.rs | 22 +--- pta-types/Cargo.toml | 16 +++ pta-types/src/lib.rs | 36 ++++++ 10 files changed, 284 insertions(+), 127 deletions(-) create mode 100644 pta-ledger/Cargo.toml create mode 100644 pta-ledger/src/ledger_builder.rs create mode 100644 pta-ledger/src/lib.rs create mode 100644 pta-types/Cargo.toml create mode 100644 pta-types/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index 663746e..cc25b42 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,5 +2,7 @@ resolver = "2" members = [ "cli" + ,"pta-ledger" ,"pta-parser" + ,'pta-types', ] diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 85347c5..a3213f4 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -5,5 +5,8 @@ edition = "2021" [dependencies] +log = "0.4.20" pest = "2.7.3" +pta-ledger = { path = "../pta-ledger" } pta-parser = { path = "../pta-parser" } +pta-types = { path = "../pta-types" } diff --git a/cli/src/main.rs b/cli/src/main.rs index fe8e6e7..68a304d 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,132 +1,38 @@ +extern crate pta_ledger; extern crate pta_parser; + +use log::{info, warn, as_error}; + // TODO: how to isolate pest so clients can just use lib (w/o requiring pest as here) use pest::{*, iterators::Pair}; use pta_parser::{LedgerParser, Rule}; +use pta_types::{FilePosition, RawTransaction, ParserInfo }; + + fn main() -> Result<(), Box> { - // TODO: CLI improvements + // TODO: CLI improvements // - exec with path of file to parse // - optionally output parse results (should be equivalent to input file) let pb = std::env::current_dir()?; - println!("Curr dir: {:?}", pb.as_path()); - let p = pb.join("testdata/basic-ledger"); - println!("Reading {:?}", p); + info!("Reading {:?}", p); match std::fs::read_to_string(p) { Ok(ledger) => { - println!("Read string length: {}", ledger.len()); - return main_parse(&ledger); - } - - Err(e) => { - println!("ERR: {}", e); - return Err(Box::new(e)); - } - } - -} - - - -#[allow(dead_code)] // allows switching b/t mains in primary main above -fn main_parse(ledger: &String) -> Result<(), Box> { - - match LedgerParser::parse(Rule::ledger, &ledger) { - Ok(root) => { - for pair in root.into_iter() { - // println!("\n{:?}", pair.as_span()); - // println!("\n{:?}", pair.as_rule()); - - match pair.as_rule() { - Rule::comment => { - dump_pair(&pair); - } - - Rule::EOI => { dump_pair(&pair); } - Rule::WHITESPACE => { dump_pair(&pair); } - Rule::acct_descriptor => { dump_pair(&pair); } - Rule::acct_separator => { dump_pair(&pair); } - Rule::balance_directive => { dump_pair(&pair); } - Rule::comment_or_newline => { dump_pair(&pair); } - Rule::comment_token => { dump_pair(&pair); } - Rule::currency => { dump_pair(&pair); } - Rule::decimal_value => { dump_pair(&pair); } - Rule::directive_close => { dump_pair(&pair); } - Rule::directive_commodity => { dump_pair(&pair); } - Rule::directive_open => { dump_pair(&pair); } - Rule::directives => { dump_pair(&pair); } - Rule::empty_line => { dump_pair(&pair); } - Rule::iso8601_date_extended => { dump_pair(&pair); } - Rule::ledger => { dump_pair(&pair); } - Rule::options => { dump_pair(&pair); } - Rule::posting_basic => { dump_pair(&pair); } - Rule::posting_indent => { dump_pair(&pair); } - Rule::sub_acct => { dump_pair(&pair); } - Rule::top_level_acct => { dump_pair(&pair); } - Rule::trans_annotation => { dump_pair(&pair); } - Rule::trans_description => { dump_pair(&pair); } - Rule::trans_description_text => { dump_pair(&pair); } - Rule::trans_header => { dump_pair(&pair); } - Rule::transaction_block => { dump_pair(&pair); } - } - } + info!("Read string length: {}", ledger.len()); + return pta_ledger::ledger_builder::parse_string(&ledger); } Err(e) => { - println!("ERR: {}", e); + warn!(err = as_error!(e); "failed to read file as string"); return Err(Box::new(e)); } } - return Ok(()); -} - - -fn dump_rule(r:&Rule, s:&Span) { - println!("\nRULE: {:?}", &r); - println!("\n{:?}", &s); -} - -fn dump_pair(p:&Pair) { - dump_rule(&p.as_rule(), &p.as_span()); -} - - - -fn handle_pair(pair: Pair<'_, Rule>) { - match pair.as_rule() { - Rule::comment => { dump_pair(&pair); } - Rule::EOI => { dump_pair(&pair); } - Rule::WHITESPACE => { dump_pair(&pair); } - Rule::acct_descriptor => { dump_pair(&pair); } - Rule::acct_separator => { dump_pair(&pair); } - Rule::balance_directive => { dump_pair(&pair); } - Rule::comment_or_newline => { dump_pair(&pair); } - Rule::comment_token => { dump_pair(&pair); } - Rule::currency => { dump_pair(&pair); } - Rule::decimal_value => { dump_pair(&pair); } - Rule::directive_close => { dump_pair(&pair); } - Rule::directive_commodity => { dump_pair(&pair); } - Rule::directive_open => { dump_pair(&pair); } - Rule::directives => { dump_pair(&pair); } - Rule::empty_line => { dump_pair(&pair); } - Rule::iso8601_date_extended => { dump_pair(&pair); } - Rule::ledger => { dump_pair(&pair); } - Rule::options => { dump_pair(&pair); } - Rule::posting_basic => { dump_pair(&pair); } - Rule::posting_indent => { dump_pair(&pair); } - Rule::sub_acct => { dump_pair(&pair); } - Rule::top_level_acct => { dump_pair(&pair); } - Rule::trans_annotation => { dump_pair(&pair); } - Rule::trans_description => { dump_pair(&pair); } - Rule::trans_description_text => { dump_pair(&pair); } - Rule::trans_header => { dump_pair(&pair); } - Rule::transaction_block => { dump_pair(&pair); } - } } diff --git a/pta-ledger/Cargo.toml b/pta-ledger/Cargo.toml new file mode 100644 index 0000000..3e6ab08 --- /dev/null +++ b/pta-ledger/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "pta-ledger" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +bench = false + +[dependencies] +log = { version = "0.4.20", features = ["kv_unstable", "kv_unstable_serde"] } +pest = "2.7.3" +pest_derive = "2.7.3" +pretty_env_logger = "0.5.0" +pta-parser = { path = "../pta-parser" } +pta-types ={ path = "../pta-types" } + +[dev-dependencies] +rstest = "0.18.2" diff --git a/pta-ledger/src/ledger_builder.rs b/pta-ledger/src/ledger_builder.rs new file mode 100644 index 0000000..2789d92 --- /dev/null +++ b/pta-ledger/src/ledger_builder.rs @@ -0,0 +1,184 @@ +use log::{info, warn, as_error}; +// use pretty_env_logger::*; + +use pta_types::*; + + + +// TODO: how to isolate pest so clients can just use lib (w/o requiring pest as here) +use pest::{*, iterators::Pair}; +use pta_parser::{LedgerParser, Rule}; +// use pta_types::{FilePosition, RawTransaction, ParserInfo }; + + + +#[allow(dead_code)] // allows switching b/t mains in primary main above +pub fn parse_string(ledger: &String) -> Result<(), Box> { + + pretty_env_logger::init(); + + match LedgerParser::parse(Rule::ledger, &ledger) { + Ok(root) => { + info!("Successfully parsed with Rule::ledger"); + for pair in root.into_iter() { + return handle_pair(pair); + } + } + + Err(err) => { + warn!(err = as_error!(err); "failed to parse with Rule::ledger"); + return Err(Box::new(err)); + } + } + + return Ok(()); +} + + + +fn dump_rule_of_pair(p: &Pair) { + info!("RULE: {:?} at {:?}; SPAN: {:?}", &p.as_rule(), &p.line_col(), &p.as_span()); +} + +// REMOVE: +#[allow(dead_code)] +fn dump_rule(r:&Rule, s:&Span) { + info!("RULE: {:?}; SPAN: {:?}", &r, &s); +} + +fn dump_pair(p:&Pair) { + dump_rule_of_pair(p); + // println!("\nline, col: {:?}", p.line_col()); + // // dump_rule(&p.as_rule(), &p.as_span()); + // dump_rule(&p.as_rule(), &p.as_span()); +} + + + + +fn handle_ledger(pair: & Pair) -> Result<(), Box> { + for inner_pair in pair.clone().into_inner() { + + match handle_pair(inner_pair) { + Ok(_p) => { /* handle_pair does all the work */ } + + Err(err) => { + warn!(err = as_error!(*err); "handle_pair failed in handle_ledger"); + return Err(err); + } + }; + } + + return Ok(()); +} + + +fn handle_posting_basic(xn: &mut RawTransaction, pair: &Pair) -> Result<(), Box> { + + match LedgerParser::parse(Rule::posting_basic, pair.as_span().as_str()) { + Ok(posting) => { + info!("handling posting_basic"); + // handle_posting_basic(xn, posting); + } + + Err(e) => { + warn!(err = as_error!(e); "failed to parse with posting_basic"); + return Err(Box::new(e)); + } + + } + + return Ok(()); +} + +fn handle_trans_header(xn: &mut RawTransaction, pair: &Pair) -> Result<(), Box> { + info!("handling trans_header..."); + + return Ok(()); +} + +fn handle_trans_block(xn: &mut RawTransaction, pair: &Pair) -> Result<(), Box> { + info!("handling trans_block..."); + + xn.pinfo = ParserInfo { + position: FilePosition { + line: pair.line_col().0, + col: pair.line_col().1 + } + }; + + info!("parse with trans_header"); + match LedgerParser::parse(Rule::trans_header, &pair.as_span().as_str()) { + Ok(hdr) => { + for pair in hdr.into_iter() { + info!("attempt handle_trans_header on {}", pair.as_span().as_str()); + handle_trans_header(xn, &pair); + + } + // for p in &pair.into_inner() { + // handle_posting_basic(&mut xn, &p); + // } + } + + Err(e) => { + warn!(err = as_error!(e); "failed to parse with trans_header"); + return Err(Box::new(e)); + } + } + + return Ok(()); + +} + + + + +fn handle_pair(pair: Pair<'_, Rule>) -> Result<(), Box> { + match pair.as_rule() { + Rule::comment => { + info!("Rule::comment: {:?}", pair.as_span().as_str()); + } + Rule::EOI => { + info!("Rule::EOI at {:?}", pair.line_col()); + } + + Rule::WHITESPACE => {} + Rule::acct_descriptor => { dump_pair(&pair); return Ok(()); } + Rule::acct_separator => { dump_pair(&pair); return Ok(()); } + Rule::balance_directive => { dump_pair(&pair); return Ok(()); } + Rule::comment_or_newline => { dump_pair(&pair); return Ok(()); } + Rule::comment_token => { dump_pair(&pair); return Ok(()); } + Rule::currency => { dump_pair(&pair); return Ok(()); } + Rule::decimal_value => { dump_pair(&pair); return Ok(()); } + Rule::directive_close => { dump_pair(&pair); return Ok(()); } + Rule::directive_commodity => { dump_pair(&pair); return Ok(()); } + Rule::directive_open => { dump_pair(&pair); return Ok(()); } + Rule::directives => { dump_pair(&pair); return Ok(()); } + Rule::empty_line => {} + Rule::iso8601_date_extended => { dump_pair(&pair); return Ok(()); } + Rule::ledger => { + return handle_ledger(&pair); + } + Rule::options => { dump_pair(&pair); return Ok(()); } + Rule::posting_basic => { dump_pair(&pair); return Ok(()); } + Rule::posting_indent => { dump_pair(&pair); return Ok(()); } + Rule::sub_acct => { dump_pair(&pair); return Ok(()); } + Rule::top_level_acct => { dump_pair(&pair); return Ok(()); } + Rule::trans_annotation => { dump_pair(&pair); return Ok(()); } + Rule::trans_description => { dump_pair(&pair); return Ok(()); } + Rule::trans_description_text => { dump_pair(&pair); return Ok(()); } + Rule::trans_header => { + let mut xn = RawTransaction::default(); + return handle_trans_header(&mut xn, &pair); + } + Rule::transaction_block => { + let mut xn = RawTransaction::default(); + return handle_trans_block(&mut xn, &pair); + } + } + + return Ok(()); + +} + + diff --git a/pta-ledger/src/lib.rs b/pta-ledger/src/lib.rs new file mode 100644 index 0000000..649140e --- /dev/null +++ b/pta-ledger/src/lib.rs @@ -0,0 +1,9 @@ +pub extern crate pest; +pub extern crate pest_derive; +pub extern crate pta_parser; +pub extern crate pta_types; + +pub mod ledger_builder; + +pub extern crate pretty_env_logger; +#[macro_use] pub extern crate log; \ No newline at end of file diff --git a/pta-parser/Cargo.toml b/pta-parser/Cargo.toml index 9ba78a5..3adf065 100644 --- a/pta-parser/Cargo.toml +++ b/pta-parser/Cargo.toml @@ -8,8 +8,10 @@ edition = "2021" bench = false [dependencies] +log = "0.4.20" pest = "2.7.3" pest_derive = "2.7.3" +pta-types = { path = "../pta-types" } [dev-dependencies] rstest = "0.18.2" diff --git a/pta-parser/src/ledger_parser/mod.rs b/pta-parser/src/ledger_parser/mod.rs index ffd786c..e90a796 100644 --- a/pta-parser/src/ledger_parser/mod.rs +++ b/pta-parser/src/ledger_parser/mod.rs @@ -4,6 +4,7 @@ // + use pest_derive::*; #[derive(Parser)] @@ -11,24 +12,3 @@ use pest_derive::*; pub struct LedgerParser; -#[derive(Default)] -struct RawAccountDescriptor { - path: String -} - - -#[derive(Default)] -struct RawTransaction { - date: String, - anno: String, - desc: String, - postings: Vec, - comment: String -} - -#[derive(Default)] -struct RawPosting { - acct: RawAccountDescriptor, - value: f64, - comment: String -} diff --git a/pta-types/Cargo.toml b/pta-types/Cargo.toml new file mode 100644 index 0000000..b6cb1f0 --- /dev/null +++ b/pta-types/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "pta-types" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +bench = false + +[dependencies] +log = "0.4.20" +pest = "2.7.3" +pest_derive = "2.7.3" + +[dev-dependencies] +rstest = "0.18.2" diff --git a/pta-types/src/lib.rs b/pta-types/src/lib.rs new file mode 100644 index 0000000..767347d --- /dev/null +++ b/pta-types/src/lib.rs @@ -0,0 +1,36 @@ +#[derive(Default)] +pub struct FilePosition { + pub line: usize, + pub col: usize // TODO: u16? u32 is probably overkill +} + +#[derive(Default)] +pub struct ParserInfo { + pub position: FilePosition, +} + + +#[derive(Default)] +pub struct RawAccountDescriptor { + pub path: String, + pub pinfo: ParserInfo, +} + + +#[derive(Default)] +pub struct RawTransaction { + pub date: String, + pub anno: String, + pub desc: String, + pub postings: Vec, + pub comment: String, + pub pinfo: ParserInfo, +} + +#[derive(Default)] +pub struct RawPosting { + pub acct: RawAccountDescriptor, + pub value: f64, + pub comment: String, + pub pinfo: ParserInfo, +} From e71cc923f0c6832d314361a7d7d9dee2cda08dd6 Mon Sep 17 00:00:00 2001 From: jburnett Date: Tue, 24 Oct 2023 13:05:26 -0400 Subject: [PATCH 17/23] WIP: LedgerBuilder, logging --- .vscode/settings.json | 5 +- cli/Cargo.toml | 1 + cli/src/main.rs | 23 +++++-- pta-ledger/Cargo.toml | 1 - pta-ledger/src/ledger_builder.rs | 111 +++++++++++++++++++++++-------- pta-ledger/src/lib.rs | 3 +- pta-types/src/lib.rs | 31 ++------- pta-types/src/parsed_ledger.rs | 14 ++++ pta-types/src/raw_transaction.rs | 27 ++++++++ 9 files changed, 155 insertions(+), 61 deletions(-) create mode 100644 pta-types/src/parsed_ledger.rs create mode 100644 pta-types/src/raw_transaction.rs diff --git a/.vscode/settings.json b/.vscode/settings.json index 08065e9..95de9dc 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,8 @@ { "rust-analyzer.linkedProjects": [ - "./pta-parser/Cargo.toml" + "./cli/Cargo.toml", + "./pta-ledger/Cargo.toml", + "./pta-parser/Cargo.toml", + "./pta-types/Cargo.toml" ] } \ No newline at end of file diff --git a/cli/Cargo.toml b/cli/Cargo.toml index a3213f4..128b89f 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" [dependencies] log = "0.4.20" pest = "2.7.3" +pretty_env_logger = "0.5.0" pta-ledger = { path = "../pta-ledger" } pta-parser = { path = "../pta-parser" } pta-types = { path = "../pta-types" } diff --git a/cli/src/main.rs b/cli/src/main.rs index 68a304d..cc5a167 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -2,10 +2,11 @@ extern crate pta_ledger; extern crate pta_parser; -use log::{info, warn, as_error}; +use log::{info, warn, as_error, error}; // TODO: how to isolate pest so clients can just use lib (w/o requiring pest as here) use pest::{*, iterators::Pair}; +use pta_ledger::ledger_builder::LedgerBuilder; use pta_parser::{LedgerParser, Rule}; use pta_types::{FilePosition, RawTransaction, ParserInfo }; @@ -16,15 +17,29 @@ fn main() -> Result<(), Box> { // - exec with path of file to parse // - optionally output parse results (should be equivalent to input file) + // TODO: consider flag to use init_timed to include time per line + pretty_env_logger::init(); + let pb = std::env::current_dir()?; let p = pb.join("testdata/basic-ledger"); - info!("Reading {:?}", p); + info!("Input file: {:?}", p); + let mut bldr = LedgerBuilder::default(); match std::fs::read_to_string(p) { Ok(ledger) => { - info!("Read string length: {}", ledger.len()); - return pta_ledger::ledger_builder::parse_string(&ledger); + info!("String length from input: {}", ledger.len()); + match bldr.from_string(&ledger) { + Ok(parsed) => { + info!("Successfully parsed into ParsedLedger"); + return Ok(()); + }, + + Err(e) => { + error!("LedgerBuilder failed with {:}", e); + return Err(e); + } + } } Err(e) => { diff --git a/pta-ledger/Cargo.toml b/pta-ledger/Cargo.toml index 3e6ab08..78a7556 100644 --- a/pta-ledger/Cargo.toml +++ b/pta-ledger/Cargo.toml @@ -11,7 +11,6 @@ bench = false log = { version = "0.4.20", features = ["kv_unstable", "kv_unstable_serde"] } pest = "2.7.3" pest_derive = "2.7.3" -pretty_env_logger = "0.5.0" pta-parser = { path = "../pta-parser" } pta-types ={ path = "../pta-types" } diff --git a/pta-ledger/src/ledger_builder.rs b/pta-ledger/src/ledger_builder.rs index 2789d92..9dbfbec 100644 --- a/pta-ledger/src/ledger_builder.rs +++ b/pta-ledger/src/ledger_builder.rs @@ -1,5 +1,6 @@ +use std::error::Error; + use log::{info, warn, as_error}; -// use pretty_env_logger::*; use pta_types::*; @@ -8,34 +9,92 @@ use pta_types::*; // TODO: how to isolate pest so clients can just use lib (w/o requiring pest as here) use pest::{*, iterators::Pair}; use pta_parser::{LedgerParser, Rule}; -// use pta_types::{FilePosition, RawTransaction, ParserInfo }; +#[derive(Default)] +pub struct LedgerBuilder { + pl: ParsedLedger +} -#[allow(dead_code)] // allows switching b/t mains in primary main above -pub fn parse_string(ledger: &String) -> Result<(), Box> { +impl LedgerBuilder { + pub fn from_string(self: &mut Self, ledger: &String) -> Result<&mut ParsedLedger, Box> { - pretty_env_logger::init(); + self.pl = ParsedLedger::default(); - match LedgerParser::parse(Rule::ledger, &ledger) { - Ok(root) => { - info!("Successfully parsed with Rule::ledger"); - for pair in root.into_iter() { - return handle_pair(pair); - } + match LedgerParser::parse(Rule::ledger, &ledger) { + Ok(root) => { + info!("Successfully parsed with Rule::ledger"); + for pair in root.into_iter() { + info!("LedgerBuilder::from_string: root pair is {:}", pair.as_str()); + self.handle_pair(pair)?; + } + } + + Err(err) => { + warn!(err = as_error!(err); "failed to parse with Rule::ledger"); + return Err(Box::new(err)); + } } + + return Ok(&mut self.pl); + } + - Err(err) => { - warn!(err = as_error!(err); "failed to parse with Rule::ledger"); - return Err(Box::new(err)); + fn handle_pair(self: &Self, pair: Pair<'_, Rule>) -> Result<(), Box> { + let parsed = ParsedLedger::default(); + + match pair.as_rule() { + Rule::comment => { + info!("Rule::comment: {:?}", pair.as_span().as_str()); + } + Rule::EOI => { + info!("Rule::EOI at {:?}", pair.line_col()); + } + + Rule::WHITESPACE => {} + Rule::acct_descriptor => { dump_pair(&pair); return Ok(()); } + Rule::acct_separator => { dump_pair(&pair); return Ok(()); } + Rule::balance_directive => { dump_pair(&pair); return Ok(()); } + Rule::comment_or_newline => { dump_pair(&pair); return Ok(()); } + Rule::comment_token => { dump_pair(&pair); return Ok(()); } + Rule::currency => { dump_pair(&pair); return Ok(()); } + Rule::decimal_value => { dump_pair(&pair); return Ok(()); } + Rule::directive_close => { dump_pair(&pair); return Ok(()); } + Rule::directive_commodity => { dump_pair(&pair); return Ok(()); } + Rule::directive_open => { dump_pair(&pair); return Ok(()); } + Rule::directives => { dump_pair(&pair); return Ok(()); } + Rule::empty_line => {} + Rule::iso8601_date_extended => { dump_pair(&pair); return Ok(()); } + Rule::ledger => { + return handle_ledger_rule(&pair); + } + Rule::options => { dump_pair(&pair); return Ok(()); } + Rule::posting_basic => { + dump_pair(&pair); return Ok(()); + } + Rule::posting_indent => { dump_pair(&pair); return Ok(()); } + Rule::sub_acct => { dump_pair(&pair); return Ok(()); } + Rule::top_level_acct => { dump_pair(&pair); return Ok(()); } + Rule::trans_annotation => { dump_pair(&pair); return Ok(()); } + Rule::trans_description => { dump_pair(&pair); return Ok(()); } + Rule::trans_description_text => { dump_pair(&pair); return Ok(()); } + Rule::trans_header => { + let mut xn = raw_transaction::RawTransaction::default(); + return handle_trans_header(&mut xn, &pair); + } + Rule::transaction_block => { + let mut xn = raw_transaction::RawTransaction::default(); + return handle_trans_block(&mut xn, &pair); + } } + + return Ok(()); + } - - return Ok(()); + } - fn dump_rule_of_pair(p: &Pair) { info!("RULE: {:?} at {:?}; SPAN: {:?}", &p.as_rule(), &p.line_col(), &p.as_span()); } @@ -48,15 +107,11 @@ fn dump_rule(r:&Rule, s:&Span) { fn dump_pair(p:&Pair) { dump_rule_of_pair(p); - // println!("\nline, col: {:?}", p.line_col()); - // // dump_rule(&p.as_rule(), &p.as_span()); - // dump_rule(&p.as_rule(), &p.as_span()); } - -fn handle_ledger(pair: & Pair) -> Result<(), Box> { +fn handle_ledger_rule(pair: & Pair) -> Result<(), Box> { for inner_pair in pair.clone().into_inner() { match handle_pair(inner_pair) { @@ -73,7 +128,7 @@ fn handle_ledger(pair: & Pair) -> Result<(), Box> { } -fn handle_posting_basic(xn: &mut RawTransaction, pair: &Pair) -> Result<(), Box> { +fn handle_posting_basic(xn: &mut raw_transaction::RawTransaction, pair: &Pair) -> Result<(), Box> { match LedgerParser::parse(Rule::posting_basic, pair.as_span().as_str()) { Ok(posting) => { @@ -91,13 +146,13 @@ fn handle_posting_basic(xn: &mut RawTransaction, pair: &Pair) -> Result<() return Ok(()); } -fn handle_trans_header(xn: &mut RawTransaction, pair: &Pair) -> Result<(), Box> { +fn handle_trans_header(xn: &mut raw_transaction::RawTransaction, pair: &Pair) -> Result<(), Box> { info!("handling trans_header..."); return Ok(()); } -fn handle_trans_block(xn: &mut RawTransaction, pair: &Pair) -> Result<(), Box> { +fn handle_trans_block(xn: &mut raw_transaction::RawTransaction, pair: &Pair) -> Result<(), Box> { info!("handling trans_block..."); xn.pinfo = ParserInfo { @@ -157,7 +212,7 @@ fn handle_pair(pair: Pair<'_, Rule>) -> Result<(), Box> { Rule::empty_line => {} Rule::iso8601_date_extended => { dump_pair(&pair); return Ok(()); } Rule::ledger => { - return handle_ledger(&pair); + return handle_ledger_rule(&pair); } Rule::options => { dump_pair(&pair); return Ok(()); } Rule::posting_basic => { dump_pair(&pair); return Ok(()); } @@ -168,11 +223,11 @@ fn handle_pair(pair: Pair<'_, Rule>) -> Result<(), Box> { Rule::trans_description => { dump_pair(&pair); return Ok(()); } Rule::trans_description_text => { dump_pair(&pair); return Ok(()); } Rule::trans_header => { - let mut xn = RawTransaction::default(); + let mut xn = raw_transaction::RawTransaction::default(); return handle_trans_header(&mut xn, &pair); } Rule::transaction_block => { - let mut xn = RawTransaction::default(); + let mut xn = raw_transaction::RawTransaction::default(); return handle_trans_block(&mut xn, &pair); } } diff --git a/pta-ledger/src/lib.rs b/pta-ledger/src/lib.rs index 649140e..ebe2b42 100644 --- a/pta-ledger/src/lib.rs +++ b/pta-ledger/src/lib.rs @@ -5,5 +5,4 @@ pub extern crate pta_types; pub mod ledger_builder; -pub extern crate pretty_env_logger; -#[macro_use] pub extern crate log; \ No newline at end of file +pub extern crate log; \ No newline at end of file diff --git a/pta-types/src/lib.rs b/pta-types/src/lib.rs index 767347d..67eeade 100644 --- a/pta-types/src/lib.rs +++ b/pta-types/src/lib.rs @@ -1,36 +1,17 @@ -#[derive(Default)] +#[derive(Default, Clone)] pub struct FilePosition { pub line: usize, pub col: usize // TODO: u16? u32 is probably overkill } -#[derive(Default)] +#[derive(Default, Clone)] pub struct ParserInfo { pub position: FilePosition, } -#[derive(Default)] -pub struct RawAccountDescriptor { - pub path: String, - pub pinfo: ParserInfo, -} - +pub mod parsed_ledger; +pub use parsed_ledger::*; -#[derive(Default)] -pub struct RawTransaction { - pub date: String, - pub anno: String, - pub desc: String, - pub postings: Vec, - pub comment: String, - pub pinfo: ParserInfo, -} - -#[derive(Default)] -pub struct RawPosting { - pub acct: RawAccountDescriptor, - pub value: f64, - pub comment: String, - pub pinfo: ParserInfo, -} +pub mod raw_transaction; +pub use raw_transaction::*; \ No newline at end of file diff --git a/pta-types/src/parsed_ledger.rs b/pta-types/src/parsed_ledger.rs new file mode 100644 index 0000000..dcdff5d --- /dev/null +++ b/pta-types/src/parsed_ledger.rs @@ -0,0 +1,14 @@ +use super::*; +use raw_transaction; + + +#[derive(Default)] +pub struct ParsedLedger { + xns: Vec, +} + +impl ParsedLedger { + pub fn add_transaction(self: &mut Self, xn: raw_transaction::RawTransaction) { + self.xns.push(xn); + } +} \ No newline at end of file diff --git a/pta-types/src/raw_transaction.rs b/pta-types/src/raw_transaction.rs new file mode 100644 index 0000000..3c3b0df --- /dev/null +++ b/pta-types/src/raw_transaction.rs @@ -0,0 +1,27 @@ +use super::*; + + +#[derive(Default, Clone)] +pub struct RawAccountDescriptor { + pub path: String, + pub pinfo: ParserInfo, +} + + +#[derive(Default, Clone)] +pub struct RawTransaction { + pub date: String, + pub anno: String, + pub desc: String, + pub postings: Vec, + pub comment: String, + pub pinfo: ParserInfo, +} + +#[derive(Default, Clone)] +pub struct RawPosting { + pub acct: RawAccountDescriptor, + pub value: f64, + pub comment: String, + pub pinfo: ParserInfo, +} From ae844ccf24e837e9efc1052de7fd8e59da9c9ee0 Mon Sep 17 00:00:00 2001 From: jburnett Date: Fri, 15 Dec 2023 11:33:51 -0500 Subject: [PATCH 18/23] Fix warnings --- cli/src/main.rs | 5 +---- pta-ledger/src/ledger_builder.rs | 23 +++++++++++++++-------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index cc5a167..b56c050 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -5,10 +5,7 @@ extern crate pta_parser; use log::{info, warn, as_error, error}; // TODO: how to isolate pest so clients can just use lib (w/o requiring pest as here) -use pest::{*, iterators::Pair}; use pta_ledger::ledger_builder::LedgerBuilder; -use pta_parser::{LedgerParser, Rule}; -use pta_types::{FilePosition, RawTransaction, ParserInfo }; @@ -30,7 +27,7 @@ fn main() -> Result<(), Box> { Ok(ledger) => { info!("String length from input: {}", ledger.len()); match bldr.from_string(&ledger) { - Ok(parsed) => { + Ok(_parsed) => { info!("Successfully parsed into ParsedLedger"); return Ok(()); }, diff --git a/pta-ledger/src/ledger_builder.rs b/pta-ledger/src/ledger_builder.rs index 9dbfbec..0c56528 100644 --- a/pta-ledger/src/ledger_builder.rs +++ b/pta-ledger/src/ledger_builder.rs @@ -1,4 +1,3 @@ -use std::error::Error; use log::{info, warn, as_error}; @@ -41,7 +40,6 @@ impl LedgerBuilder { fn handle_pair(self: &Self, pair: Pair<'_, Rule>) -> Result<(), Box> { - let parsed = ParsedLedger::default(); match pair.as_rule() { Rule::comment => { @@ -127,13 +125,13 @@ fn handle_ledger_rule(pair: & Pair) -> Result<(), Box) -> Result<(), Box> { +#[allow(dead_code)] // TODO: REMOVE allow dead code +fn handle_posting_basic(_xn: &mut raw_transaction::RawTransaction, pair: &Pair) -> Result<(), Box> { match LedgerParser::parse(Rule::posting_basic, pair.as_span().as_str()) { - Ok(posting) => { + Ok(_posting) => { info!("handling posting_basic"); - // handle_posting_basic(xn, posting); + // handle_posting_basic(xn, posting); TODO: fix } Err(e) => { @@ -146,7 +144,7 @@ fn handle_posting_basic(xn: &mut raw_transaction::RawTransaction, pair: &Pair) -> Result<(), Box> { +fn handle_trans_header(_: &mut raw_transaction::RawTransaction, _: &Pair) -> Result<(), Box> { info!("handling trans_header..."); return Ok(()); @@ -167,7 +165,16 @@ fn handle_trans_block(xn: &mut raw_transaction::RawTransaction, pair: &Pair { for pair in hdr.into_iter() { info!("attempt handle_trans_header on {}", pair.as_span().as_str()); - handle_trans_header(xn, &pair); + match handle_trans_header(xn, &pair) { + Ok(()) => { + // TODO: REVIEW: should anything happen here? + } + + Err(e) => { + warn!(err = e; "handle_trans_header failed"); + return Err(e); + } + } } // for p in &pair.into_inner() { From b78b9f869e484a7571111d9e1fb45c659bff28d2 Mon Sep 17 00:00:00 2001 From: jburnett Date: Fri, 15 Dec 2023 12:38:29 -0500 Subject: [PATCH 19/23] Begin multiple grammar support --- cli/src/main.rs | 4 +- pta-ledger/src/ledger_builder.rs | 8 +- pta-parser/src/grammars/base.pest | 44 ++++++++ pta-parser/src/grammars/generic.pest | 113 +++++++++++++++++++++ pta-parser/src/ledger_parser/mod.rs | 5 +- pta-parser/src/parser_tests/basics.rs | 14 +-- pta-parser/src/parser_tests/mod.rs | 4 +- pta-parser/src/parser_tests/transaction.rs | 14 +-- 8 files changed, 182 insertions(+), 24 deletions(-) create mode 100644 pta-parser/src/grammars/base.pest create mode 100644 pta-parser/src/grammars/generic.pest diff --git a/cli/src/main.rs b/cli/src/main.rs index b56c050..c84005e 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -52,11 +52,11 @@ fn main() -> Result<(), Box> { #[cfg(test)] mod cli_tests { - use pta_parser::LedgerParser; + use pta_parser::GenericParser; #[test] fn can_create_parser() { // simply verifies that the parser can be instantiated, ensuring accessibility - let _ = LedgerParser{}; + let _ = GenericParser{}; } } \ No newline at end of file diff --git a/pta-ledger/src/ledger_builder.rs b/pta-ledger/src/ledger_builder.rs index 0c56528..2587005 100644 --- a/pta-ledger/src/ledger_builder.rs +++ b/pta-ledger/src/ledger_builder.rs @@ -7,7 +7,7 @@ use pta_types::*; // TODO: how to isolate pest so clients can just use lib (w/o requiring pest as here) use pest::{*, iterators::Pair}; -use pta_parser::{LedgerParser, Rule}; +use pta_parser::{GenericParser, Rule}; #[derive(Default)] @@ -20,7 +20,7 @@ impl LedgerBuilder { self.pl = ParsedLedger::default(); - match LedgerParser::parse(Rule::ledger, &ledger) { + match GenericParser::parse(Rule::ledger, &ledger) { Ok(root) => { info!("Successfully parsed with Rule::ledger"); for pair in root.into_iter() { @@ -128,7 +128,7 @@ fn handle_ledger_rule(pair: & Pair) -> Result<(), Box) -> Result<(), Box> { - match LedgerParser::parse(Rule::posting_basic, pair.as_span().as_str()) { + match GenericParser::parse(Rule::posting_basic, pair.as_span().as_str()) { Ok(_posting) => { info!("handling posting_basic"); // handle_posting_basic(xn, posting); TODO: fix @@ -161,7 +161,7 @@ fn handle_trans_block(xn: &mut raw_transaction::RawTransaction, pair: &Pair { for pair in hdr.into_iter() { info!("attempt handle_trans_header on {}", pair.as_span().as_str()); diff --git a/pta-parser/src/grammars/base.pest b/pta-parser/src/grammars/base.pest new file mode 100644 index 0000000..e2b2192 --- /dev/null +++ b/pta-parser/src/grammars/base.pest @@ -0,0 +1,44 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + +WHITESPACE = _{ " " | "\t" } + +// constants +acct_separator = _{ ":" } +comment_token = _{ ";" | "*" } + +// TODO: need to handle escaped semi-colon? +// TODO: consider whether comment must be preceded by whitespace (except at beginning of line) +// a comment +comment = _{ comment_token ~ (!NEWLINE ~ ANY)* ~ NEWLINE } +comment_or_newline = _{ (WHITESPACE+ ~ comment) | (WHITESPACE* ~ NEWLINE) } +empty_line = _{ WHITESPACE* ~ NEWLINE } + + +// ISO8601 Date Extended format is YYYY-MM-DD where +// YYYY is 4 digits; 0000-9999 +// MM is 2 digits; 01-09, 10-12 +// DD is 2 digits; 01-09, 10-29, 30, 31 +iso8601_date_extended = @{ + ASCII_DIGIT{4} + ~ "-" ~ (( "0" ~ ASCII_NONZERO_DIGIT) | ("1" ~ '0'..'2')) + ~ "-" ~ (("30" | "31") | ("0" ~ ASCII_NONZERO_DIGIT) | ('1'..'2' ~ ASCII_DIGIT)) +} + + +currency = { ASCII_ALPHA_UPPER{3} } + + + +// +// Pest's built-in rules: +// ASCII_ALPHA_LOWER = { 'a'..'z' } +// ASCII_ALPHA_UPPER = { 'A'..'Z' } +// ASCII_ALPHA = { ASCII_ALPHA_LOWER | ASCII_ALPHA_UPPER } +// ASCII_DIGIT = { '0'..'9' } +// ASCII_ALPHANUMERIC = { ASCII_ALPHA | ASCII-DIGIT } +// +// Avoid using WHITE_SPACE which targets [unicode](https://www.unicode.org/reports/tr31/#R3a) +// diff --git a/pta-parser/src/grammars/generic.pest b/pta-parser/src/grammars/generic.pest new file mode 100644 index 0000000..7307b0d --- /dev/null +++ b/pta-parser/src/grammars/generic.pest @@ -0,0 +1,113 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + + +// Each acct token must begin with alpha and may be followed by any number of alpha or number +// Full account descriptors are comprised of colon-separated account names. The top-level +// account name must begin with an alpha char, but subaccounts may begin with alphanumeric. +top_level_acct = @{ ASCII_ALPHA ~ ASCII_ALPHANUMERIC* } +sub_acct = @{ acct_separator ~ ASCII_ALPHANUMERIC+ } + +// The full acct descriptor must be one or more acct tokens, each separated by a colon +acct_descriptor = @{ top_level_acct ~ (sub_acct)* } + +decimal_value = @{ (("-" ~ NUMBER+) | NUMBER+) ~ "." ~ NUMBER+ } + + +// TODO: consider more lax indent rules +// Posting lines of a transaction must begin with 1 tab or 2 spaces +posting_indent = _{ "\t" | " "{2} } +// A basic posting must specify an account and a value, ending with a comment or newline +posting_basic = @{ + posting_indent + ~ acct_descriptor + ~ WHITESPACE+ ~ decimal_value + ~ comment_or_newline +} + +// TODO: improve on 'text' to allow more in description +trans_description_text = _{ (ASCII_ALPHANUMERIC+ | WHITESPACE)+ } +// TODO: is this the full set of annotation options? +trans_annotation = _{ "txn" | "*" | "!" } +trans_description = _{ "\"" ~ trans_description_text ~ "\"" } + +// TODO: how to ensure col 0 / no ws for header row +// The header of a transaction specifies the date, an annotation, a description, and ends with a comment or newline +trans_header = @{ + iso8601_date_extended + ~ WHITESPACE+ + ~ trans_annotation + ~ WHITESPACE+ + ~ trans_description + ~ comment_or_newline +} + +// A transaction begins with a single header followed by one or more postings. Whether the transaction balances is +// outside the scope of parsing. +transaction_block = @{ trans_header ~ posting_basic+ } + + +options = { "operating_currency" } + +// TODO: open works but is incomplete +// YYYY-MM-DD open Account [ConstraintCurrency,...] ["BookingMethod"] +directive_open = @{ + iso8601_date_extended + ~ WHITESPACE+ ~ "open" + ~ WHITESPACE+ ~ acct_descriptor + ~ comment_or_newline +} +// YYYY-MM-DD close Account +directive_close = @{ + iso8601_date_extended + ~ WHITESPACE+ ~ "close" + ~ WHITESPACE+ ~ acct_descriptor + ~ comment_or_newline +} +// YYYY-MM-DD commodity Currency +directive_commodity = @{ + iso8601_date_extended + ~ WHITESPACE+ ~ "commodity" + ~ WHITESPACE+ ~ currency + ~ comment_or_newline +} +// YYYY-MM-DD balance Account Amount +balance_directive = @{ + iso8601_date_extended + ~ WHITESPACE+ ~ "balance" + ~ WHITESPACE+ ~ acct_descriptor + ~ WHITESPACE+ ~ decimal_value + ~ WHITESPACE+ ~ currency + ~ comment_or_newline +} + +// TODO: other directives to implement +// YYYY-MM-DD document Account PathToDocument +// YYYY-MM-DD event Name Value +// YYYY-MM-DD note Account Description +// YYYY-MM-DD pad Account AccountPad +// YYYY-MM-DD price Commodity Price +// include Filename +// option Name Value +// plugin ModuleName StringConfig +// poptag +// pushtag + +directives = { balance_directive | directive_close | directive_commodity | directive_open } + +// The rule for a complete ledger +ledger = { SOI ~ (options | directives | transaction_block | comment | empty_line)+ ~ EOI } + + +// +// Pest's built-in rules: +// ASCII_ALPHA_LOWER = { 'a'..'z' } +// ASCII_ALPHA_UPPER = { 'A'..'Z' } +// ASCII_ALPHA = { ASCII_ALPHA_LOWER | ASCII_ALPHA_UPPER } +// ASCII_DIGIT = { '0'..'9' } +// ASCII_ALPHANUMERIC = { ASCII_ALPHA | ASCII-DIGIT } +// +// Avoid using WHITE_SPACE which targets [unicode](https://www.unicode.org/reports/tr31/#R3a) +// diff --git a/pta-parser/src/ledger_parser/mod.rs b/pta-parser/src/ledger_parser/mod.rs index e90a796..6d33b2b 100644 --- a/pta-parser/src/ledger_parser/mod.rs +++ b/pta-parser/src/ledger_parser/mod.rs @@ -8,7 +8,8 @@ use pest_derive::*; #[derive(Parser)] -#[grammar = "./grammars/ledger.pest"] -pub struct LedgerParser; +#[grammar = "./grammars/base.pest"] +#[grammar = "./grammars/generic.pest"] +pub struct GenericParser; diff --git a/pta-parser/src/parser_tests/basics.rs b/pta-parser/src/parser_tests/basics.rs index 4f9d4d8..84d1929 100644 --- a/pta-parser/src/parser_tests/basics.rs +++ b/pta-parser/src/parser_tests/basics.rs @@ -19,7 +19,7 @@ mod acct_desc { #[case ("asset:property:real")] fn can_parse_acct_descriptor(#[case] acct_desc: &str) { - let pairs = LedgerParser::parse( + let pairs = GenericParser::parse( Rule::acct_descriptor, acct_desc) .unwrap_or_else(|e| panic!("{}", e)); @@ -36,7 +36,7 @@ mod acct_desc { #[ignore = "unexpectedly parses without error"] fn verify_acct_descriptor_parsing_error(#[case] bad_acct_desc: &str) { - LedgerParser::parse( + GenericParser::parse( Rule::acct_descriptor, bad_acct_desc) .unwrap_or_else(|e| panic!("{}", e)); @@ -55,7 +55,7 @@ mod acct_desc { #[should_panic(expected = "expected top_level_acct")] fn verify_top_level_acct_parsing_error(#[case] bad_top_level_acct: &str) { - LedgerParser::parse( + GenericParser::parse( Rule::top_level_acct, bad_top_level_acct) .unwrap_or_else(|e| panic!("{}", e)); @@ -79,7 +79,7 @@ mod decimal { #[case ("-0.00000001")] fn can_parse_decimal_value(#[case] dec: &str) { - let pairs = LedgerParser::parse( + let pairs = GenericParser::parse( Rule::decimal_value, dec) .unwrap_or_else(|e| panic!("{}", e)); @@ -100,7 +100,7 @@ mod decimal { #[should_panic(expected = "expected decimal_value")] fn verify_decimal_value_error(#[case] bad_dec: &str) { - LedgerParser::parse( + GenericParser::parse( Rule::decimal_value, bad_dec) .unwrap_or_else(|e| panic!("{}", e)); @@ -122,7 +122,7 @@ mod iso8601 { #[case ("2015-12-31")] fn can_parse_iso8601_date_extended(#[case] year: &str) { - let pairs = LedgerParser::parse( + let pairs = GenericParser::parse( Rule::iso8601_date_extended, year) .unwrap_or_else(|e| panic!("{}", e)); @@ -156,7 +156,7 @@ mod iso8601 { #[should_panic(expected = "expected iso8601_")] // matches errors from multiple iso8601 rules fn verify_iso8601_date_extended_error(#[case] bad_date: &str) { - LedgerParser::parse( + GenericParser::parse( Rule::iso8601_date_extended, bad_date) .unwrap_or_else(|e| panic!("{}", e)); diff --git a/pta-parser/src/parser_tests/mod.rs b/pta-parser/src/parser_tests/mod.rs index 5c366f4..50afd93 100644 --- a/pta-parser/src/parser_tests/mod.rs +++ b/pta-parser/src/parser_tests/mod.rs @@ -71,7 +71,7 @@ mod ledger_file { ")] fn can_parse_ledger(#[case] year: &str) { - let pairs = LedgerParser::parse( + let pairs = GenericParser::parse( Rule::ledger, year) .unwrap_or_else(|e| panic!("{}", e)); @@ -85,7 +85,7 @@ mod ledger_file { pub fn get_pairs(r: Rule, content: &str) -> Pairs<'_, Rule> { - let x = LedgerParser::parse( + let x = GenericParser::parse( r, content) diff --git a/pta-parser/src/parser_tests/transaction.rs b/pta-parser/src/parser_tests/transaction.rs index 2d8f618..1170b8d 100644 --- a/pta-parser/src/parser_tests/transaction.rs +++ b/pta-parser/src/parser_tests/transaction.rs @@ -39,7 +39,7 @@ mod posting { #[should_panic(expected = "expected posting_basic")] // matches errors from multiple iso8601 rules fn verify_posting_basic_error(#[case] bad_date: &str) { - LedgerParser::parse( + GenericParser::parse( Rule::posting_basic, bad_date) .unwrap_or_else(|e| panic!("{}", e)); @@ -68,7 +68,7 @@ mod trans_block { #[case ("2009-01-09 ! \"Bitcoin launch date\"\n\tassets 1.0000\n equity -1.0000\n")] fn can_parse_trans_block(#[case] tblock: &str) { - let pairs = LedgerParser::parse( + let pairs = GenericParser::parse( Rule::transaction_block, &tblock) .unwrap_or_else(|e| panic!("{}", e)); @@ -82,7 +82,7 @@ mod trans_block { ")] #[should_panic(expected = "expected transaction_block")] fn verify_trans_block_posting_error(#[case] bad_block: &str) { - LedgerParser::parse( + GenericParser::parse( Rule::transaction_block, &bad_block) .unwrap_or_else(|e| panic!("{}", e)); @@ -97,7 +97,7 @@ mod trans_block { #[case ("2009-01-09 ! \"Bitcoin launch date\"")] #[should_panic(expected = "expected trans_header")] fn verify_trans_block_trans_header_error(#[case] bad_block: &str) { - LedgerParser::parse( + GenericParser::parse( Rule::transaction_block, &bad_block) .unwrap_or_else(|e| panic!("{}", e)); @@ -128,7 +128,7 @@ mod trans_block { // fn can_parse_trans_descr(#[case] descr: &str) { // let quoted_descr = format!("\"{}\"", descr); -// let pairs = LedgerParser::parse( +// let pairs = GenericParser::parse( // Rule::trans_description, "ed_descr) // .unwrap_or_else(|e| panic!("{}", e)); @@ -146,7 +146,7 @@ mod trans_block { // fn verify_trans_descr_error(#[case] bad_descr: &str) { // let quoted_bad_descr = format!("\"{}\"", bad_descr); -// LedgerParser::parse( +// GenericParser::parse( // Rule::trans_description, "ed_bad_descr) // .unwrap_or_else(|e| panic!("{}", e)); @@ -198,7 +198,7 @@ mod trans_block { // fn verify_trans_header_error(#[case] bad_hdr: &str) { // let quoted_bad_descr = format!("\"{}\"", bad_hdr); -// LedgerParser::parse( +// GenericParser::parse( // Rule::trans_header, "ed_bad_descr) // .unwrap_or_else(|e| panic!("{}", e)); From ab9e28b55f72dcf66a66efdccebf2d22248c65a3 Mon Sep 17 00:00:00 2001 From: jburnett Date: Fri, 15 Dec 2023 16:43:35 -0500 Subject: [PATCH 20/23] v0.2.0 - multi-grammar support --- Cargo.toml | 11 ++ cli/Cargo.toml | 10 +- cli/src/main.rs | 9 +- pta-ledger/Cargo.toml | 10 +- pta-ledger/src/ledger_builder.rs | 152 +++++++++--------- pta-ledger/src/lib.rs | 5 + pta-parser/Cargo.toml | 10 +- .../grammars/{ledger.pest => beancount.pest} | 30 +--- pta-parser/src/grammars/generic.pest | 2 +- pta-parser/src/ledger_parser/mod.rs | 15 -- pta-parser/src/lib.rs | 6 +- pta-parser/src/parser_tests/basics.rs | 34 ++-- pta-parser/src/parser_tests/mod.rs | 32 ++-- pta-parser/src/parser_tests/transaction.rs | 29 ++-- pta-parser/src/parsers/mod.rs | 30 ++++ pta-types/Cargo.toml | 11 +- pta-types/src/lib.rs | 5 + pta-types/src/parsed_ledger.rs | 5 + pta-types/src/raw_transaction.rs | 5 + 19 files changed, 244 insertions(+), 167 deletions(-) rename pta-parser/src/grammars/{ledger.pest => beancount.pest} (76%) delete mode 100644 pta-parser/src/ledger_parser/mod.rs create mode 100644 pta-parser/src/parsers/mod.rs diff --git a/Cargo.toml b/Cargo.toml index cc25b42..a871a99 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,7 @@ +# Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +# +# This project is licensed under the terms of the MIT license (cf. LICENSE file in root). + [workspace] resolver = "2" members = [ @@ -6,3 +10,10 @@ members = [ ,"pta-parser" ,'pta-types', ] + +# Default values for workspace projects +[workspace.package] +edition = "2021" +version = "0.2.0" +authors = ["AltaModa Technologies"] +respository = "https://github.com/altamodatech/pta-parser" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 128b89f..39d97c8 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,7 +1,13 @@ +# Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +# +# This project is licensed under the terms of the MIT license (cf. LICENSE file in root). + [package] name = "cli" -version = "0.1.0" -edition = "2021" +version.workspace = true +authors.workspace = true +respository.workspace = true +edition.workspace = true [dependencies] diff --git a/cli/src/main.rs b/cli/src/main.rs index c84005e..442cf23 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,3 +1,8 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + extern crate pta_ledger; extern crate pta_parser; @@ -52,11 +57,11 @@ fn main() -> Result<(), Box> { #[cfg(test)] mod cli_tests { - use pta_parser::GenericParser; + use pta_parser::parsers::generic::Parser; #[test] fn can_create_parser() { // simply verifies that the parser can be instantiated, ensuring accessibility - let _ = GenericParser{}; + let _ = Parser{}; } } \ No newline at end of file diff --git a/pta-ledger/Cargo.toml b/pta-ledger/Cargo.toml index 78a7556..fc43b8c 100644 --- a/pta-ledger/Cargo.toml +++ b/pta-ledger/Cargo.toml @@ -1,7 +1,13 @@ +# Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +# +# This project is licensed under the terms of the MIT license (cf. LICENSE file in root). + [package] name = "pta-ledger" -version = "0.1.0" -edition = "2021" +version.workspace = true +authors.workspace = true +respository.workspace = true +edition.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] diff --git a/pta-ledger/src/ledger_builder.rs b/pta-ledger/src/ledger_builder.rs index 2587005..64852c2 100644 --- a/pta-ledger/src/ledger_builder.rs +++ b/pta-ledger/src/ledger_builder.rs @@ -1,3 +1,8 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + use log::{info, warn, as_error}; @@ -7,8 +12,7 @@ use pta_types::*; // TODO: how to isolate pest so clients can just use lib (w/o requiring pest as here) use pest::{*, iterators::Pair}; -use pta_parser::{GenericParser, Rule}; - +use pta_parser::parsers::generic; #[derive(Default)] pub struct LedgerBuilder { @@ -20,9 +24,9 @@ impl LedgerBuilder { self.pl = ParsedLedger::default(); - match GenericParser::parse(Rule::ledger, &ledger) { + match generic::Parser::parse(generic::Rule::generic_ledger, &ledger) { Ok(root) => { - info!("Successfully parsed with Rule::ledger"); + info!("Successfully parsed with generic::Rule::generic_ledger"); for pair in root.into_iter() { info!("LedgerBuilder::from_string: root pair is {:}", pair.as_str()); self.handle_pair(pair)?; @@ -30,7 +34,7 @@ impl LedgerBuilder { } Err(err) => { - warn!(err = as_error!(err); "failed to parse with Rule::ledger"); + warn!(err = as_error!(err); "failed to parse with generic::Rule::generic_ledger"); return Err(Box::new(err)); } } @@ -39,48 +43,48 @@ impl LedgerBuilder { } - fn handle_pair(self: &Self, pair: Pair<'_, Rule>) -> Result<(), Box> { + fn handle_pair(self: &Self, pair: Pair<'_, generic::Rule>) -> Result<(), Box> { match pair.as_rule() { - Rule::comment => { - info!("Rule::comment: {:?}", pair.as_span().as_str()); + generic::Rule::comment => { + info!("generic::Rule::comment: {:?}", pair.as_span().as_str()); } - Rule::EOI => { - info!("Rule::EOI at {:?}", pair.line_col()); + generic::Rule::EOI => { + info!("generic::Rule::EOI at {:?}", pair.line_col()); } - Rule::WHITESPACE => {} - Rule::acct_descriptor => { dump_pair(&pair); return Ok(()); } - Rule::acct_separator => { dump_pair(&pair); return Ok(()); } - Rule::balance_directive => { dump_pair(&pair); return Ok(()); } - Rule::comment_or_newline => { dump_pair(&pair); return Ok(()); } - Rule::comment_token => { dump_pair(&pair); return Ok(()); } - Rule::currency => { dump_pair(&pair); return Ok(()); } - Rule::decimal_value => { dump_pair(&pair); return Ok(()); } - Rule::directive_close => { dump_pair(&pair); return Ok(()); } - Rule::directive_commodity => { dump_pair(&pair); return Ok(()); } - Rule::directive_open => { dump_pair(&pair); return Ok(()); } - Rule::directives => { dump_pair(&pair); return Ok(()); } - Rule::empty_line => {} - Rule::iso8601_date_extended => { dump_pair(&pair); return Ok(()); } - Rule::ledger => { + generic::Rule::WHITESPACE => {} + generic::Rule::acct_descriptor => { dump_pair(&pair); return Ok(()); } + generic::Rule::acct_separator => { dump_pair(&pair); return Ok(()); } + generic::Rule::balance_directive => { dump_pair(&pair); return Ok(()); } + generic::Rule::comment_or_newline => { dump_pair(&pair); return Ok(()); } + generic::Rule::comment_token => { dump_pair(&pair); return Ok(()); } + generic::Rule::currency => { dump_pair(&pair); return Ok(()); } + generic::Rule::decimal_value => { dump_pair(&pair); return Ok(()); } + generic::Rule::directive_close => { dump_pair(&pair); return Ok(()); } + generic::Rule::directive_commodity => { dump_pair(&pair); return Ok(()); } + generic::Rule::directive_open => { dump_pair(&pair); return Ok(()); } + generic::Rule::directives => { dump_pair(&pair); return Ok(()); } + generic::Rule::empty_line => {} + generic::Rule::iso8601_date_extended => { dump_pair(&pair); return Ok(()); } + generic::Rule::generic_ledger => { return handle_ledger_rule(&pair); } - Rule::options => { dump_pair(&pair); return Ok(()); } - Rule::posting_basic => { + generic::Rule::options => { dump_pair(&pair); return Ok(()); } + generic::Rule::posting_basic => { dump_pair(&pair); return Ok(()); } - Rule::posting_indent => { dump_pair(&pair); return Ok(()); } - Rule::sub_acct => { dump_pair(&pair); return Ok(()); } - Rule::top_level_acct => { dump_pair(&pair); return Ok(()); } - Rule::trans_annotation => { dump_pair(&pair); return Ok(()); } - Rule::trans_description => { dump_pair(&pair); return Ok(()); } - Rule::trans_description_text => { dump_pair(&pair); return Ok(()); } - Rule::trans_header => { + generic::Rule::posting_indent => { dump_pair(&pair); return Ok(()); } + generic::Rule::sub_acct => { dump_pair(&pair); return Ok(()); } + generic::Rule::top_level_acct => { dump_pair(&pair); return Ok(()); } + generic::Rule::trans_annotation => { dump_pair(&pair); return Ok(()); } + generic::Rule::trans_description => { dump_pair(&pair); return Ok(()); } + generic::Rule::trans_description_text => { dump_pair(&pair); return Ok(()); } + generic::Rule::trans_header => { let mut xn = raw_transaction::RawTransaction::default(); return handle_trans_header(&mut xn, &pair); } - Rule::transaction_block => { + generic::Rule::transaction_block => { let mut xn = raw_transaction::RawTransaction::default(); return handle_trans_block(&mut xn, &pair); } @@ -93,23 +97,23 @@ impl LedgerBuilder { } -fn dump_rule_of_pair(p: &Pair) { +fn dump_rule_of_pair(p: &Pair) { info!("RULE: {:?} at {:?}; SPAN: {:?}", &p.as_rule(), &p.line_col(), &p.as_span()); } // REMOVE: #[allow(dead_code)] -fn dump_rule(r:&Rule, s:&Span) { +fn dump_rule(r:&generic::Rule, s:&Span) { info!("RULE: {:?}; SPAN: {:?}", &r, &s); } -fn dump_pair(p:&Pair) { +fn dump_pair(p:&Pair) { dump_rule_of_pair(p); } -fn handle_ledger_rule(pair: & Pair) -> Result<(), Box> { +fn handle_ledger_rule(pair: & Pair) -> Result<(), Box> { for inner_pair in pair.clone().into_inner() { match handle_pair(inner_pair) { @@ -126,9 +130,9 @@ fn handle_ledger_rule(pair: & Pair) -> Result<(), Box) -> Result<(), Box> { +fn handle_posting_basic(_xn: &mut raw_transaction::RawTransaction, pair: &Pair) -> Result<(), Box> { - match GenericParser::parse(Rule::posting_basic, pair.as_span().as_str()) { + match generic::Parser::parse(generic::Rule::posting_basic, pair.as_span().as_str()) { Ok(_posting) => { info!("handling posting_basic"); // handle_posting_basic(xn, posting); TODO: fix @@ -144,13 +148,13 @@ fn handle_posting_basic(_xn: &mut raw_transaction::RawTransaction, pair: &Pair) -> Result<(), Box> { +fn handle_trans_header(_: &mut raw_transaction::RawTransaction, _: &Pair) -> Result<(), Box> { info!("handling trans_header..."); return Ok(()); } -fn handle_trans_block(xn: &mut raw_transaction::RawTransaction, pair: &Pair) -> Result<(), Box> { +fn handle_trans_block(xn: &mut raw_transaction::RawTransaction, pair: &Pair) -> Result<(), Box> { info!("handling trans_block..."); xn.pinfo = ParserInfo { @@ -161,7 +165,7 @@ fn handle_trans_block(xn: &mut raw_transaction::RawTransaction, pair: &Pair { for pair in hdr.into_iter() { info!("attempt handle_trans_header on {}", pair.as_span().as_str()); @@ -195,45 +199,45 @@ fn handle_trans_block(xn: &mut raw_transaction::RawTransaction, pair: &Pair) -> Result<(), Box> { +fn handle_pair(pair: Pair<'_, generic::Rule>) -> Result<(), Box> { match pair.as_rule() { - Rule::comment => { - info!("Rule::comment: {:?}", pair.as_span().as_str()); + generic::Rule::comment => { + info!("generic::Rule::comment: {:?}", pair.as_span().as_str()); } - Rule::EOI => { - info!("Rule::EOI at {:?}", pair.line_col()); + generic::Rule::EOI => { + info!("generic::Rule::EOI at {:?}", pair.line_col()); } - Rule::WHITESPACE => {} - Rule::acct_descriptor => { dump_pair(&pair); return Ok(()); } - Rule::acct_separator => { dump_pair(&pair); return Ok(()); } - Rule::balance_directive => { dump_pair(&pair); return Ok(()); } - Rule::comment_or_newline => { dump_pair(&pair); return Ok(()); } - Rule::comment_token => { dump_pair(&pair); return Ok(()); } - Rule::currency => { dump_pair(&pair); return Ok(()); } - Rule::decimal_value => { dump_pair(&pair); return Ok(()); } - Rule::directive_close => { dump_pair(&pair); return Ok(()); } - Rule::directive_commodity => { dump_pair(&pair); return Ok(()); } - Rule::directive_open => { dump_pair(&pair); return Ok(()); } - Rule::directives => { dump_pair(&pair); return Ok(()); } - Rule::empty_line => {} - Rule::iso8601_date_extended => { dump_pair(&pair); return Ok(()); } - Rule::ledger => { + generic::Rule::WHITESPACE => {} + generic::Rule::acct_descriptor => { dump_pair(&pair); return Ok(()); } + generic::Rule::acct_separator => { dump_pair(&pair); return Ok(()); } + generic::Rule::balance_directive => { dump_pair(&pair); return Ok(()); } + generic::Rule::comment_or_newline => { dump_pair(&pair); return Ok(()); } + generic::Rule::comment_token => { dump_pair(&pair); return Ok(()); } + generic::Rule::currency => { dump_pair(&pair); return Ok(()); } + generic::Rule::decimal_value => { dump_pair(&pair); return Ok(()); } + generic::Rule::directive_close => { dump_pair(&pair); return Ok(()); } + generic::Rule::directive_commodity => { dump_pair(&pair); return Ok(()); } + generic::Rule::directive_open => { dump_pair(&pair); return Ok(()); } + generic::Rule::directives => { dump_pair(&pair); return Ok(()); } + generic::Rule::empty_line => {} + generic::Rule::iso8601_date_extended => { dump_pair(&pair); return Ok(()); } + generic::Rule::generic_ledger => { return handle_ledger_rule(&pair); } - Rule::options => { dump_pair(&pair); return Ok(()); } - Rule::posting_basic => { dump_pair(&pair); return Ok(()); } - Rule::posting_indent => { dump_pair(&pair); return Ok(()); } - Rule::sub_acct => { dump_pair(&pair); return Ok(()); } - Rule::top_level_acct => { dump_pair(&pair); return Ok(()); } - Rule::trans_annotation => { dump_pair(&pair); return Ok(()); } - Rule::trans_description => { dump_pair(&pair); return Ok(()); } - Rule::trans_description_text => { dump_pair(&pair); return Ok(()); } - Rule::trans_header => { + generic::Rule::options => { dump_pair(&pair); return Ok(()); } + generic::Rule::posting_basic => { dump_pair(&pair); return Ok(()); } + generic::Rule::posting_indent => { dump_pair(&pair); return Ok(()); } + generic::Rule::sub_acct => { dump_pair(&pair); return Ok(()); } + generic::Rule::top_level_acct => { dump_pair(&pair); return Ok(()); } + generic::Rule::trans_annotation => { dump_pair(&pair); return Ok(()); } + generic::Rule::trans_description => { dump_pair(&pair); return Ok(()); } + generic::Rule::trans_description_text => { dump_pair(&pair); return Ok(()); } + generic::Rule::trans_header => { let mut xn = raw_transaction::RawTransaction::default(); return handle_trans_header(&mut xn, &pair); } - Rule::transaction_block => { + generic::Rule::transaction_block => { let mut xn = raw_transaction::RawTransaction::default(); return handle_trans_block(&mut xn, &pair); } diff --git a/pta-ledger/src/lib.rs b/pta-ledger/src/lib.rs index ebe2b42..83b94e0 100644 --- a/pta-ledger/src/lib.rs +++ b/pta-ledger/src/lib.rs @@ -1,3 +1,8 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + pub extern crate pest; pub extern crate pest_derive; pub extern crate pta_parser; diff --git a/pta-parser/Cargo.toml b/pta-parser/Cargo.toml index 3adf065..72e942e 100644 --- a/pta-parser/Cargo.toml +++ b/pta-parser/Cargo.toml @@ -1,7 +1,13 @@ +# Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +# +# This project is licensed under the terms of the MIT license (cf. LICENSE file in root). + [package] name = "pta-parser" -version = "0.1.0" -edition = "2021" +version.workspace = true +authors.workspace = true +respository.workspace = true +edition.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] diff --git a/pta-parser/src/grammars/ledger.pest b/pta-parser/src/grammars/beancount.pest similarity index 76% rename from pta-parser/src/grammars/ledger.pest rename to pta-parser/src/grammars/beancount.pest index 6bd600d..d7b4abc 100644 --- a/pta-parser/src/grammars/ledger.pest +++ b/pta-parser/src/grammars/beancount.pest @@ -3,40 +3,19 @@ // This project is licensed under the terms of the MIT license (cf. LICENSE file in root). // -WHITESPACE = _{ " " | "\t" } - -// constants -acct_separator = _{ ":" } -comment_token = _{ ";" | "*" } - -// TODO: need to handle escaped semi-colon? -// TODO: consider whether comment must be preceded by whitespace (except at beginning of line) -// a comment -comment = _{ comment_token ~ (!NEWLINE ~ ANY)* ~ NEWLINE } -comment_or_newline = _{ (WHITESPACE+ ~ comment) | (WHITESPACE* ~ NEWLINE) } -empty_line = _{ WHITESPACE* ~ NEWLINE } // Each acct token must begin with alpha and may be followed by any number of alpha or number // Full account descriptors are comprised of colon-separated account names. The top-level // account name must begin with an alpha char, but subaccounts may begin with alphanumeric. -top_level_acct = @{ ASCII_ALPHA ~ ASCII_ALPHANUMERIC* } -sub_acct = @{ acct_separator ~ ASCII_ALPHANUMERIC+ } +// BEANCOUNT diffs: requires account names to being with upper case alpha +top_level_acct = @{ ASCII_ALPHA_UPPER ~ ASCII_ALPHANUMERIC* } +sub_acct = @{ acct_separator ~ ASCII_ALPHA_UPPER ~ ASCII_ALPHANUMERIC+ } // The full acct descriptor must be one or more acct tokens, each separated by a colon acct_descriptor = @{ top_level_acct ~ (sub_acct)* } decimal_value = @{ (("-" ~ NUMBER+) | NUMBER+) ~ "." ~ NUMBER+ } -// ISO8601 Date Extended format is YYYY-MM-DD where -// YYYY is 4 digits; 0000-9999 -// MM is 2 digits; 01-09, 10-12 -// DD is 2 digits; 01-09, 10-29, 30, 31 -iso8601_date_extended = @{ - ASCII_DIGIT{4} - ~ "-" ~ (( "0" ~ ASCII_NONZERO_DIGIT) | ("1" ~ '0'..'2')) - ~ "-" ~ (("30" | "31") | ("0" ~ ASCII_NONZERO_DIGIT) | ('1'..'2' ~ ASCII_DIGIT)) -} - // TODO: consider more lax indent rules // Posting lines of a transaction must begin with 1 tab or 2 spaces @@ -71,7 +50,6 @@ trans_header = @{ transaction_block = @{ trans_header ~ posting_basic+ } -currency = { ASCII_ALPHA_UPPER{3} } options = { "operating_currency" } // TODO: open works but is incomplete @@ -121,7 +99,7 @@ balance_directive = @{ directives = { balance_directive | directive_close | directive_commodity | directive_open } // The rule for a complete ledger -ledger = { SOI ~ (options | directives | transaction_block | comment | empty_line)+ ~ EOI } +beancount_ledger = { SOI ~ (options | directives | transaction_block | comment | empty_line)+ ~ EOI } // diff --git a/pta-parser/src/grammars/generic.pest b/pta-parser/src/grammars/generic.pest index 7307b0d..40768ae 100644 --- a/pta-parser/src/grammars/generic.pest +++ b/pta-parser/src/grammars/generic.pest @@ -98,7 +98,7 @@ balance_directive = @{ directives = { balance_directive | directive_close | directive_commodity | directive_open } // The rule for a complete ledger -ledger = { SOI ~ (options | directives | transaction_block | comment | empty_line)+ ~ EOI } +generic_ledger = { SOI ~ (options | directives | transaction_block | comment | empty_line)+ ~ EOI } // diff --git a/pta-parser/src/ledger_parser/mod.rs b/pta-parser/src/ledger_parser/mod.rs deleted file mode 100644 index 6d33b2b..0000000 --- a/pta-parser/src/ledger_parser/mod.rs +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. -// -// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). -// - - - -use pest_derive::*; - -#[derive(Parser)] -#[grammar = "./grammars/base.pest"] -#[grammar = "./grammars/generic.pest"] -pub struct GenericParser; - - diff --git a/pta-parser/src/lib.rs b/pta-parser/src/lib.rs index b39a570..734f0e3 100644 --- a/pta-parser/src/lib.rs +++ b/pta-parser/src/lib.rs @@ -9,8 +9,10 @@ pub extern crate pest_derive; pub extern crate rstest; // Export ledger parser -pub mod ledger_parser; -pub use ledger_parser::*; +pub mod parsers; +// pub use parsers::generic::*; +// pub use parsers::beancount::*; +// pub use parsers::*; pub mod parser_tests; diff --git a/pta-parser/src/parser_tests/basics.rs b/pta-parser/src/parser_tests/basics.rs index 84d1929..2ec2bfc 100644 --- a/pta-parser/src/parser_tests/basics.rs +++ b/pta-parser/src/parser_tests/basics.rs @@ -1,6 +1,12 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + #[cfg(test)] use super::*; #[cfg(test)] use rstest::rstest; +// mod generic; #[cfg(test)] mod acct_desc { @@ -19,8 +25,8 @@ mod acct_desc { #[case ("asset:property:real")] fn can_parse_acct_descriptor(#[case] acct_desc: &str) { - let pairs = GenericParser::parse( - Rule::acct_descriptor, acct_desc) + let pairs = generic::Parser::parse( + generic::Rule::acct_descriptor, acct_desc) .unwrap_or_else(|e| panic!("{}", e)); // Parsing succeeded; ensure at least 1 pair was returned @@ -36,8 +42,8 @@ mod acct_desc { #[ignore = "unexpectedly parses without error"] fn verify_acct_descriptor_parsing_error(#[case] bad_acct_desc: &str) { - GenericParser::parse( - Rule::acct_descriptor, bad_acct_desc) + generic::Parser::parse( + generic::Rule::acct_descriptor, bad_acct_desc) .unwrap_or_else(|e| panic!("{}", e)); // should never reach this code since all cases should result in panic @@ -55,8 +61,8 @@ mod acct_desc { #[should_panic(expected = "expected top_level_acct")] fn verify_top_level_acct_parsing_error(#[case] bad_top_level_acct: &str) { - GenericParser::parse( - Rule::top_level_acct, bad_top_level_acct) + generic::Parser::parse( + generic::Rule::top_level_acct, bad_top_level_acct) .unwrap_or_else(|e| panic!("{}", e)); // should never reach this code since all cases should result in panic @@ -79,8 +85,8 @@ mod decimal { #[case ("-0.00000001")] fn can_parse_decimal_value(#[case] dec: &str) { - let pairs = GenericParser::parse( - Rule::decimal_value, dec) + let pairs = generic::Parser::parse( + generic::Rule::decimal_value, dec) .unwrap_or_else(|e| panic!("{}", e)); // Parsing succeeded; ensure at least 1 pair was returned @@ -100,8 +106,8 @@ mod decimal { #[should_panic(expected = "expected decimal_value")] fn verify_decimal_value_error(#[case] bad_dec: &str) { - GenericParser::parse( - Rule::decimal_value, bad_dec) + generic::Parser::parse( + generic::Rule::decimal_value, bad_dec) .unwrap_or_else(|e| panic!("{}", e)); // should never reach this code since all cases should result in panic @@ -122,8 +128,8 @@ mod iso8601 { #[case ("2015-12-31")] fn can_parse_iso8601_date_extended(#[case] year: &str) { - let pairs = GenericParser::parse( - Rule::iso8601_date_extended, year) + let pairs = generic::Parser::parse( + generic::Rule::iso8601_date_extended, year) .unwrap_or_else(|e| panic!("{}", e)); // Parsing succeeded; ensure at least 1 pair was returned @@ -156,8 +162,8 @@ mod iso8601 { #[should_panic(expected = "expected iso8601_")] // matches errors from multiple iso8601 rules fn verify_iso8601_date_extended_error(#[case] bad_date: &str) { - GenericParser::parse( - Rule::iso8601_date_extended, bad_date) + generic::Parser::parse( + generic::Rule::iso8601_date_extended, bad_date) .unwrap_or_else(|e| panic!("{}", e)); // should never reach this code since all cases should result in panic diff --git a/pta-parser/src/parser_tests/mod.rs b/pta-parser/src/parser_tests/mod.rs index 50afd93..4de4151 100644 --- a/pta-parser/src/parser_tests/mod.rs +++ b/pta-parser/src/parser_tests/mod.rs @@ -1,9 +1,15 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + pub use super::*; pub use pest::{Parser, iterators::Pairs}; #[cfg(test)] pub use rstest::rstest; +use super::parsers::*; mod basics; mod transaction; @@ -16,15 +22,15 @@ mod directives { // YYYY-MM-DD open Account [ConstraintCurrency,...] ["BookingMethod"] #[rstest] - #[case (Rule::directive_open, "2001-09-11 open assets")] - #[case (Rule::directive_open, "2001-09-11 open assets:cash")] - #[case (Rule::directive_open, "2001-09-11 open Assets1:cash2:3petty")] - #[case (Rule::directive_close, "2001-09-11 close assets")] - #[case (Rule::directive_close, "2001-09-11 close assets1:2cash:3petty")] - #[case (Rule::directive_commodity, "2001-09-11 commodity USD")] - #[case (Rule::balance_directive, "2001-09-11 balance assets 123.456 USD")] - #[case (Rule::balance_directive, "2001-09-11 balance assets1:2cash -0.456 USD")] - fn can_parse_misc_directive(#[case] r: Rule, #[case] base: &str) { + #[case (generic::Rule::directive_open, "2001-09-11 open assets")] + #[case (generic::Rule::directive_open, "2001-09-11 open assets:cash")] + #[case (generic::Rule::directive_open, "2001-09-11 open Assets1:cash2:3petty")] + #[case (generic::Rule::directive_close, "2001-09-11 close assets")] + #[case (generic::Rule::directive_close, "2001-09-11 close assets1:2cash:3petty")] + #[case (generic::Rule::directive_commodity, "2001-09-11 commodity USD")] + #[case (generic::Rule::balance_directive, "2001-09-11 balance assets 123.456 USD")] + #[case (generic::Rule::balance_directive, "2001-09-11 balance assets1:2cash -0.456 USD")] + fn can_parse_misc_directive(#[case] r: generic::Rule, #[case] base: &str) { // NOTE: addons must end in \n to match rules let addons = [ @@ -71,8 +77,8 @@ mod ledger_file { ")] fn can_parse_ledger(#[case] year: &str) { - let pairs = GenericParser::parse( - Rule::ledger, year) + let pairs = generic::Parser::parse( + generic::Rule::generic_ledger, year) .unwrap_or_else(|e| panic!("{}", e)); // Parsing succeeded; ensure at least 1 pair was returned @@ -84,8 +90,8 @@ mod ledger_file { -pub fn get_pairs(r: Rule, content: &str) -> Pairs<'_, Rule> { - let x = GenericParser::parse( +pub fn get_pairs(r: generic::Rule, content: &str) -> Pairs<'_, generic::Rule> { + let x = generic::Parser::parse( r, content) diff --git a/pta-parser/src/parser_tests/transaction.rs b/pta-parser/src/parser_tests/transaction.rs index 1170b8d..b5a06e1 100644 --- a/pta-parser/src/parser_tests/transaction.rs +++ b/pta-parser/src/parser_tests/transaction.rs @@ -1,3 +1,8 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + #[cfg(test)] use super::*; #[cfg(test)] use rstest::rstest; @@ -26,7 +31,7 @@ mod posting { let tc = format!("{}{}", base, suffix); println!("Test case: {}", tc); - assert!(get_pairs(Rule::posting_basic, &tc).len() > 0); + assert!(get_pairs(generic::Rule::posting_basic, &tc).len() > 0); } } @@ -39,8 +44,8 @@ mod posting { #[should_panic(expected = "expected posting_basic")] // matches errors from multiple iso8601 rules fn verify_posting_basic_error(#[case] bad_date: &str) { - GenericParser::parse( - Rule::posting_basic, bad_date) + generic::Parser::parse( + generic::Rule::posting_basic, bad_date) .unwrap_or_else(|e| panic!("{}", e)); // should never reach this code since all cases should result in panic @@ -68,8 +73,8 @@ mod trans_block { #[case ("2009-01-09 ! \"Bitcoin launch date\"\n\tassets 1.0000\n equity -1.0000\n")] fn can_parse_trans_block(#[case] tblock: &str) { - let pairs = GenericParser::parse( - Rule::transaction_block, &tblock) + let pairs = generic::Parser::parse( + generic::Rule::transaction_block, &tblock) .unwrap_or_else(|e| panic!("{}", e)); // Parsing succeeded; ensure at least 1 pair was returned @@ -82,8 +87,8 @@ mod trans_block { ")] #[should_panic(expected = "expected transaction_block")] fn verify_trans_block_posting_error(#[case] bad_block: &str) { - GenericParser::parse( - Rule::transaction_block, &bad_block) + generic::Parser::parse( + generic::Rule::transaction_block, &bad_block) .unwrap_or_else(|e| panic!("{}", e)); // should never reach this code since all cases should result in panic @@ -97,8 +102,8 @@ mod trans_block { #[case ("2009-01-09 ! \"Bitcoin launch date\"")] #[should_panic(expected = "expected trans_header")] fn verify_trans_block_trans_header_error(#[case] bad_block: &str) { - GenericParser::parse( - Rule::transaction_block, &bad_block) + generic::Parser::parse( + generic::Rule::transaction_block, &bad_block) .unwrap_or_else(|e| panic!("{}", e)); // should never reach this code since all cases should result in panic @@ -128,7 +133,7 @@ mod trans_block { // fn can_parse_trans_descr(#[case] descr: &str) { // let quoted_descr = format!("\"{}\"", descr); -// let pairs = GenericParser::parse( +// let pairs = Parser::parse( // Rule::trans_description, "ed_descr) // .unwrap_or_else(|e| panic!("{}", e)); @@ -146,7 +151,7 @@ mod trans_block { // fn verify_trans_descr_error(#[case] bad_descr: &str) { // let quoted_bad_descr = format!("\"{}\"", bad_descr); -// GenericParser::parse( +// Parser::parse( // Rule::trans_description, "ed_bad_descr) // .unwrap_or_else(|e| panic!("{}", e)); @@ -198,7 +203,7 @@ mod trans_block { // fn verify_trans_header_error(#[case] bad_hdr: &str) { // let quoted_bad_descr = format!("\"{}\"", bad_hdr); -// GenericParser::parse( +// Parser::parse( // Rule::trans_header, "ed_bad_descr) // .unwrap_or_else(|e| panic!("{}", e)); diff --git a/pta-parser/src/parsers/mod.rs b/pta-parser/src/parsers/mod.rs new file mode 100644 index 0000000..184fdf4 --- /dev/null +++ b/pta-parser/src/parsers/mod.rs @@ -0,0 +1,30 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + + + +use pest_derive::*; + +pub mod generic { + + use super::*; + #[derive(Parser)] + #[grammar = "./grammars/base.pest"] + #[grammar = "./grammars/generic.pest"] + pub struct Parser; + +} + + +pub mod beancount { + + use super::*; + + #[derive(Parser)] + #[grammar = "./grammars/base.pest"] + #[grammar = "./grammars/beancount.pest"] + pub struct Parser; + +} \ No newline at end of file diff --git a/pta-types/Cargo.toml b/pta-types/Cargo.toml index b6cb1f0..208c0f3 100644 --- a/pta-types/Cargo.toml +++ b/pta-types/Cargo.toml @@ -1,7 +1,14 @@ +# Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +# +# This project is licensed under the terms of the MIT license (cf. LICENSE file in root). + [package] name = "pta-types" -version = "0.1.0" -edition = "2021" +version.workspace = true +authors.workspace = true +respository.workspace = true +edition.workspace = true + # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] diff --git a/pta-types/src/lib.rs b/pta-types/src/lib.rs index 67eeade..6cc9d70 100644 --- a/pta-types/src/lib.rs +++ b/pta-types/src/lib.rs @@ -1,3 +1,8 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + #[derive(Default, Clone)] pub struct FilePosition { pub line: usize, diff --git a/pta-types/src/parsed_ledger.rs b/pta-types/src/parsed_ledger.rs index dcdff5d..ab6d3d1 100644 --- a/pta-types/src/parsed_ledger.rs +++ b/pta-types/src/parsed_ledger.rs @@ -1,3 +1,8 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + use super::*; use raw_transaction; diff --git a/pta-types/src/raw_transaction.rs b/pta-types/src/raw_transaction.rs index 3c3b0df..ed20ea9 100644 --- a/pta-types/src/raw_transaction.rs +++ b/pta-types/src/raw_transaction.rs @@ -1,3 +1,8 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + use super::*; From 2a9b29d1ebd4a319e240570d9478d7afea8e113c Mon Sep 17 00:00:00 2001 From: jburnett Date: Mon, 20 May 2024 00:12:24 -0400 Subject: [PATCH 21/23] pkg upgrades --- pta-ledger/Cargo.toml | 2 +- pta-ledger/src/ledger_builder.rs | 12 ++++++------ pta-parser/Cargo.toml | 2 +- pta-types/Cargo.toml | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pta-ledger/Cargo.toml b/pta-ledger/Cargo.toml index fc43b8c..5ae41e0 100644 --- a/pta-ledger/Cargo.toml +++ b/pta-ledger/Cargo.toml @@ -21,4 +21,4 @@ pta-parser = { path = "../pta-parser" } pta-types ={ path = "../pta-types" } [dev-dependencies] -rstest = "0.18.2" +rstest = "0.19.0" diff --git a/pta-ledger/src/ledger_builder.rs b/pta-ledger/src/ledger_builder.rs index 64852c2..fe0125b 100644 --- a/pta-ledger/src/ledger_builder.rs +++ b/pta-ledger/src/ledger_builder.rs @@ -4,7 +4,7 @@ // -use log::{info, warn, as_error}; +use log::{info, warn}; use pta_types::*; @@ -34,7 +34,7 @@ impl LedgerBuilder { } Err(err) => { - warn!(err = as_error!(err); "failed to parse with generic::Rule::generic_ledger"); + warn!("failed to parse with generic::Rule::generic_ledger. err: {err}"); return Err(Box::new(err)); } } @@ -120,7 +120,7 @@ fn handle_ledger_rule(pair: & Pair) -> Result<(), Box { /* handle_pair does all the work */ } Err(err) => { - warn!(err = as_error!(*err); "handle_pair failed in handle_ledger"); + warn!("handle_pair failed in handle_ledger. err: {err}"); return Err(err); } }; @@ -139,7 +139,7 @@ fn handle_posting_basic(_xn: &mut raw_transaction::RawTransaction, pair: &Pair { - warn!(err = as_error!(e); "failed to parse with posting_basic"); + warn!("failed to parse with posting_basic. err: {e}"); return Err(Box::new(e)); } @@ -175,7 +175,7 @@ fn handle_trans_block(xn: &mut raw_transaction::RawTransaction, pair: &Pair { - warn!(err = e; "handle_trans_header failed"); + warn!("handle_trans_header failed. err: {e}"); return Err(e); } } @@ -187,7 +187,7 @@ fn handle_trans_block(xn: &mut raw_transaction::RawTransaction, pair: &Pair { - warn!(err = as_error!(e); "failed to parse with trans_header"); + warn!("failed to parse with trans_header. err: {e}"); return Err(Box::new(e)); } } diff --git a/pta-parser/Cargo.toml b/pta-parser/Cargo.toml index 72e942e..125b3fc 100644 --- a/pta-parser/Cargo.toml +++ b/pta-parser/Cargo.toml @@ -20,4 +20,4 @@ pest_derive = "2.7.3" pta-types = { path = "../pta-types" } [dev-dependencies] -rstest = "0.18.2" +rstest = "0.19.0" diff --git a/pta-types/Cargo.toml b/pta-types/Cargo.toml index 208c0f3..c82248d 100644 --- a/pta-types/Cargo.toml +++ b/pta-types/Cargo.toml @@ -20,4 +20,4 @@ pest = "2.7.3" pest_derive = "2.7.3" [dev-dependencies] -rstest = "0.18.2" +rstest = "0.19.0" From cb00a3f2e8def1a9ec8d399a1bfdb42035a33ac0 Mon Sep 17 00:00:00 2001 From: jburnett Date: Mon, 3 Jun 2024 20:48:02 -0400 Subject: [PATCH 22/23] Upgrade log to 0.4.21 & adapt warn macro --- cli/Cargo.toml | 2 +- cli/src/main.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 39d97c8..c2ea89f 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -11,7 +11,7 @@ edition.workspace = true [dependencies] -log = "0.4.20" +log = "0.4.21" pest = "2.7.3" pretty_env_logger = "0.5.0" pta-ledger = { path = "../pta-ledger" } diff --git a/cli/src/main.rs b/cli/src/main.rs index 442cf23..18e8c28 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -7,7 +7,7 @@ extern crate pta_ledger; extern crate pta_parser; -use log::{info, warn, as_error, error}; +use log::{info, warn, error}; // TODO: how to isolate pest so clients can just use lib (w/o requiring pest as here) use pta_ledger::ledger_builder::LedgerBuilder; @@ -45,7 +45,7 @@ fn main() -> Result<(), Box> { } Err(e) => { - warn!(err = as_error!(e); "failed to read file as string"); + warn!("failed to read file as string; {e}"); return Err(Box::new(e)); } } From 6396bbb6b28d574c77b3e6547a1bbc59ed017047 Mon Sep 17 00:00:00 2001 From: jburnett Date: Mon, 3 Jun 2024 20:48:37 -0400 Subject: [PATCH 23/23] Rm warnings on unused workspace repos info --- Cargo.toml | 2 +- cli/Cargo.toml | 2 +- pta-ledger/Cargo.toml | 2 +- pta-parser/Cargo.toml | 2 +- pta-types/Cargo.toml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a871a99..7addbb2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,4 +16,4 @@ members = [ edition = "2021" version = "0.2.0" authors = ["AltaModa Technologies"] -respository = "https://github.com/altamodatech/pta-parser" +# respository = "https://github.com/altamodatech/pta-parser" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index c2ea89f..e81de08 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -6,7 +6,7 @@ name = "cli" version.workspace = true authors.workspace = true -respository.workspace = true +# respository.workspace = true edition.workspace = true diff --git a/pta-ledger/Cargo.toml b/pta-ledger/Cargo.toml index 5ae41e0..ec2aa88 100644 --- a/pta-ledger/Cargo.toml +++ b/pta-ledger/Cargo.toml @@ -6,7 +6,7 @@ name = "pta-ledger" version.workspace = true authors.workspace = true -respository.workspace = true +# respository.workspace = true edition.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/pta-parser/Cargo.toml b/pta-parser/Cargo.toml index 125b3fc..4fba65e 100644 --- a/pta-parser/Cargo.toml +++ b/pta-parser/Cargo.toml @@ -6,7 +6,7 @@ name = "pta-parser" version.workspace = true authors.workspace = true -respository.workspace = true +# respository.workspace = true edition.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/pta-types/Cargo.toml b/pta-types/Cargo.toml index c82248d..0fa4763 100644 --- a/pta-types/Cargo.toml +++ b/pta-types/Cargo.toml @@ -6,7 +6,7 @@ name = "pta-types" version.workspace = true authors.workspace = true -respository.workspace = true +# respository.workspace = true edition.workspace = true