diff --git a/cli/src/command/dump.rs b/cli/src/command/dump.rs index 6c94154..3f9320f 100644 --- a/cli/src/command/dump.rs +++ b/cli/src/command/dump.rs @@ -17,7 +17,10 @@ pub(crate) fn cli() -> Command { pub(crate) async fn exec(args: &ArgMatches) -> Result<()> { let fname = args.get_one::("PATH_OR_URI").unwrap(); - let options = DataReaderOptions::ENABLE_READING_BODY; + let options = DataReaderOptions::ALLOW_TRAILING_COMMA + | DataReaderOptions::ALLOW_EMPTY_FIELD_NAME + | DataReaderOptions::ALLOW_STR_INSTEAD_OF_NSTR + | DataReaderOptions::ENABLE_READING_BODY; let options = if args.get_flag("ignore-size") { options.union(DataReaderOptions::IGNORE_DATA_SIZE_FIELD) } else { diff --git a/cli/src/command/header.rs b/cli/src/command/header.rs index 8dc0eae..ec50d96 100644 --- a/cli/src/command/header.rs +++ b/cli/src/command/header.rs @@ -20,7 +20,9 @@ pub(crate) fn cli() -> Command { pub(crate) async fn exec(args: &ArgMatches) -> Result<()> { let fname = args.get_one::("PATH_OR_URI").unwrap(); let n_bytes = args.get_one::("N").unwrap(); - let options = DataReaderOptions::default(); + let options = DataReaderOptions::ALLOW_TRAILING_COMMA + | DataReaderOptions::ALLOW_EMPTY_FIELD_NAME + | DataReaderOptions::ALLOW_STR_INSTEAD_OF_NSTR; let (_, header, _) = read_from_source(fname, Some(n_bytes), options).await?; println!("{}", HeaderDisplay(&header)); diff --git a/cli/src/command/schema.rs b/cli/src/command/schema.rs index c9b36db..bfc3517 100644 --- a/cli/src/command/schema.rs +++ b/cli/src/command/schema.rs @@ -23,7 +23,9 @@ pub(crate) fn cli() -> Command { pub(crate) async fn exec(args: &ArgMatches) -> Result<()> { let fname = args.get_one::("PATH_OR_URI").unwrap(); let n_bytes = args.get_one::("N").unwrap(); - let options = DataReaderOptions::default(); + let options = DataReaderOptions::ALLOW_TRAILING_COMMA + | DataReaderOptions::ALLOW_EMPTY_FIELD_NAME + | DataReaderOptions::ALLOW_STR_INSTEAD_OF_NSTR; let (schema, _, _) = read_from_source(fname, Some(n_bytes), options).await?; if args.get_flag("tree") { diff --git a/cli/src/visitor.rs b/cli/src/visitor.rs index 448d980..35052b6 100644 --- a/cli/src/visitor.rs +++ b/cli/src/visitor.rs @@ -196,7 +196,7 @@ fn prettify_special_field_name(name: &str) -> &str { #[cfg(test)] mod tests { - use rrr::Schema; + use rrr::{parse, DataReaderOptions}; use super::*; @@ -205,7 +205,8 @@ mod tests { #[test] fn $name() { let input = $input; - let schema = input.parse::().unwrap(); + let options = DataReaderOptions::default(); + let schema = parse(input.as_bytes(), options).unwrap(); let actual = format!("{}", SchemaTreeDisplay(&schema.ast)); let actual = console::strip_ansi_codes(&actual); let expected = $expected; diff --git a/src/ast.rs b/src/ast.rs index 531d494..80944d4 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,6 +1,11 @@ -use std::str::FromStr; +use crate::{param::ParamStack, DataReaderOptions}; -use crate::param::ParamStack; +pub fn parse(bytes: &[u8], options: DataReaderOptions) -> Result { + let parser = SchemaParser::new(bytes, options); + parser + .parse() + .map_err(|e| crate::Error::Schema(e, bytes.to_vec())) +} #[derive(Debug, PartialEq, Eq)] pub struct Schema { @@ -8,25 +13,6 @@ pub struct Schema { pub params: ParamStack, } -impl TryFrom<&[u8]> for Schema { - type Error = crate::Error; - - fn try_from(bytes: &[u8]) -> Result { - let parser = SchemaParser::new(bytes); - parser - .parse() - .map_err(|e| crate::Error::Schema(e, bytes.to_vec())) - } -} - -impl FromStr for Schema { - type Err = crate::Error; - - fn from_str(s: &str) -> Result { - ::try_from(s.as_bytes()) - } -} - #[derive(Debug, PartialEq, Eq)] pub struct Ast { pub kind: AstKind, @@ -86,19 +72,35 @@ struct SchemaParser<'b> { lexer: std::iter::Peekable>, location: Location, params: ParamStack, + options: DataReaderOptions, } impl<'b> SchemaParser<'b> { - fn new(input: &'b [u8]) -> Self { + fn new(input: &'b [u8], options: DataReaderOptions) -> Self { Self { lexer: SchemaLexer::new(input).peekable(), location: Location(0, 0), params: ParamStack::new(), + options, } } fn parse(mut self) -> Result { - let kind = self.parse_field_list()?; + let kind = if self + .options + .contains(DataReaderOptions::ALLOW_EMPTY_FIELD_NAME) + && matches!( + self.lexer.peek(), + Some(Ok(Token { + kind: TokenKind::Colon, + .. + })) + ) { + self.parse_field_with_empty_name()? + } else { + self.parse_field_list()? + }; + if let Some(result) = self.lexer.next() { // should be TokenKind::RBracket let token = result.unwrap(); @@ -116,6 +118,17 @@ impl<'b> SchemaParser<'b> { Ok(schema) } + fn parse_field_with_empty_name(&mut self) -> Result { + self.consume_symbol(TokenKind::Colon)?; + + let kind = self.parse_type()?; + let name = String::new(); + let member = Ast { kind, name }; + let members = vec![member]; + let kind = AstKind::Struct(members); + Ok(kind) + } + fn parse_field_list(&mut self) -> Result { let mut members = Vec::new(); @@ -148,6 +161,20 @@ impl<'b> SchemaParser<'b> { if self.next_token()?.kind != TokenKind::Comma { return Err(self.err_unexpected_token()); } + + if self + .options + .contains(DataReaderOptions::ALLOW_TRAILING_COMMA) + && matches!( + self.lexer.peek(), + None | Some(Ok(Token { + kind: TokenKind::RBracket, + .. + })) + ) + { + break; + } } if members.is_empty() { @@ -201,7 +228,12 @@ impl<'b> SchemaParser<'b> { self.consume_symbol(TokenKind::RAngleBracket)?; if let TokenKind::Ident(s) = self.next_token()?.kind { - if s.as_str() != "NSTR" { + if !(s.as_str() == "NSTR" + || (self + .options + .contains(DataReaderOptions::ALLOW_STR_INSTEAD_OF_NSTR) + && s.as_str() == "STR")) + { return Err(self.err_unexpected_token()); } } else { @@ -460,7 +492,7 @@ mod tests { #[test] fn parse_single_field() { let input = "fld1:INT16"; - let parser = SchemaParser::new(input.as_bytes()); + let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default()); let actual = parser.parse(); let expected_ast = Ast { name: "".to_owned(), @@ -480,7 +512,7 @@ mod tests { #[test] fn parse_single_struct() { let input = "fld1:[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32]"; - let parser = SchemaParser::new(input.as_bytes()); + let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default()); let actual = parser.parse(); let expected_ast = Ast { name: "".to_owned(), @@ -513,7 +545,7 @@ mod tests { #[test] fn parse_nested_struct() { let input = "fld1:[sfld1:[ssfld1:<4>NSTR,ssfld2:STR,ssfld3:INT32]]"; - let parser = SchemaParser::new(input.as_bytes()); + let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default()); let actual = parser.parse(); let expected_ast = Ast { name: "".to_owned(), @@ -549,7 +581,7 @@ mod tests { #[test] fn parse_single_fixed_length_builtin_type_array() { let input = "fld1:{3}INT8"; - let parser = SchemaParser::new(input.as_bytes()); + let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default()); let actual = parser.parse(); let expected_ast = Ast { name: "".to_owned(), @@ -575,7 +607,7 @@ mod tests { #[test] fn parse_single_fixed_length_struct_array() { let input = "fld1:{3}[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32]"; - let parser = SchemaParser::new(input.as_bytes()); + let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default()); let actual = parser.parse(); let expected_ast = Ast { name: "".to_owned(), @@ -614,7 +646,7 @@ mod tests { #[test] fn parse_single_variable_length_struct_array() { let input = "fld1:INT8,fld2:{fld1}[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32]"; - let parser = SchemaParser::new(input.as_bytes()); + let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default()); let actual = parser.parse(); let expected_ast = Ast { name: "".to_owned(), @@ -662,7 +694,7 @@ mod tests { #[test] fn parse_single_unlimited_length_struct_array() { let input = "fld1:+[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32]"; - let parser = SchemaParser::new(input.as_bytes()); + let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default()); let actual = parser.parse(); let expected_ast = Ast { name: "".to_owned(), @@ -698,12 +730,88 @@ mod tests { assert_eq!(actual, expected); } + macro_rules! test_format_options_support { + ($(($name:ident, $input:expr, $options:expr, $success_expected:expr),)*) => ($( + #[test] + fn $name() { + let input = $input; + let parser = SchemaParser::new(input.as_bytes(), $options); + let succeeded = parser.parse().is_ok(); + + assert_eq!(succeeded, $success_expected); + } + )*); + } + + test_format_options_support! { + ( + trailing_comma_not_allowed, + "fld1:[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32,],", + DataReaderOptions::default(), + false + ), + ( + trailing_comma_allowed, + "fld1:[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32,],", + DataReaderOptions::ALLOW_TRAILING_COMMA, + true + ), + ( + multiple_trailing_commas_not_allowed_even_when_trailing_comma_is_allowed, + "fld1:[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32,,],,", + DataReaderOptions::ALLOW_TRAILING_COMMA, + false + ), + ( + double_commas_not_allowed_even_when_trailing_comma_is_allowed, + "fld1:[sfld1:<4>NSTR,sfld2:STR,,sfld3:INT32]", + DataReaderOptions::ALLOW_TRAILING_COMMA, + false + ), + ( + empty_field_name_not_allowed, + ":+UINT8", + DataReaderOptions::default(), + false + ), + ( + empty_field_name_allowed, + ":+UINT8", + DataReaderOptions::ALLOW_EMPTY_FIELD_NAME, + true + ), + ( + empty_field_name_not_allowed_when_there_are_other_fields, + ":UINT8,fld1:INT8", + DataReaderOptions::ALLOW_EMPTY_FIELD_NAME, + false + ), + ( + empty_field_name_not_allowed_when_trailing_comma_exists, + ":UINT8,", + DataReaderOptions::ALLOW_TRAILING_COMMA | DataReaderOptions::ALLOW_EMPTY_FIELD_NAME, + false + ), + ( + str_instead_of_nstr_not_allowed, + "fld1:<4>NSTR,fld2:<4>STR", + DataReaderOptions::default(), + false + ), + ( + str_instead_of_nstr_allowed, + "fld1:<4>NSTR,fld2:<4>STR", + DataReaderOptions::ALLOW_STR_INSTEAD_OF_NSTR, + true + ), + } + macro_rules! test_parse_errors { ($(($name:ident, $input:expr, $kind:ident, $start:expr, $end:expr),)*) => ($( #[test] fn $name() { let input = $input; - let parser = SchemaParser::new(input.as_bytes()); + let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default()); let actual = parser.parse(); let expected = Err(SchemaParseError { kind: SchemaParseErrorKind::$kind, diff --git a/src/lib.rs b/src/lib.rs index 9d13891..e9be18c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,7 +9,7 @@ mod walker; use std::borrow::Cow; pub use crate::{ - ast::{Ast, AstKind, Len, Location, Schema, SchemaParseError, SchemaParseErrorKind}, + ast::{parse, Ast, AstKind, Len, Location, Schema, SchemaParseError, SchemaParseErrorKind}, reader::{DataReader, DataReaderOptions}, utils::json_escape_str, visitor::{AstVisitor, JsonDisplay, JsonFormattingStyle, SchemaOnelineDisplay}, @@ -100,21 +100,23 @@ mod tests { use super::*; use crate::{ - ast::{Schema, Size}, + ast::{parse, Schema, Size}, value::{Number, Value, ValueTree}, walker::BufWalker, }; fn schema_without_str() -> Result { + let options = DataReaderOptions::default(); let ast = "date:[year:UINT16,month:UINT8,day:UINT8],\ data:{4}[loc:<4>NSTR,temp:INT16,rhum:UINT16],comment:<16>NSTR"; - ast.parse() + parse(ast.as_bytes(), options) } fn schema_with_str() -> Result { + let options = DataReaderOptions::default(); let ast = "date:[year:UINT16,month:UINT8,day:UINT8],\ data:{4}[loc:STR,temp:INT16,rhum:UINT16],comment:<16>NSTR"; - ast.parse() + parse(ast.as_bytes(), options) } #[test] diff --git a/src/reader.rs b/src/reader.rs index 97c9111..707f9d3 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -6,7 +6,10 @@ use std::{ use flate2::read::GzDecoder; pub use options::DataReaderOptions; -use crate::{ast::Schema, Error}; +use crate::{ + ast::{parse, Schema}, + Error, +}; mod options; @@ -36,7 +39,7 @@ where let map = self.read_header_fields()?; let schema = map.get_required_field("format")?; - let schema: Schema = schema.as_slice().try_into()?; + let schema = parse(schema.as_slice(), self.options)?; let body = if self .options diff --git a/src/reader/options.rs b/src/reader/options.rs index b06d016..886be69 100644 --- a/src/reader/options.rs +++ b/src/reader/options.rs @@ -1,6 +1,6 @@ /// [`DataReaderOptions`] is a type representing the various flags of /// [`DataReader`](super::DataReader) and options as the union of those flags. -#[derive(Debug, PartialEq, Eq, Default)] +#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)] pub struct DataReaderOptions(u32); impl DataReaderOptions { @@ -8,6 +8,13 @@ impl DataReaderOptions { pub const ENABLE_READING_BODY: Self = Self(1 << 1); /// Flag to ignore the value of `data_size` header field. pub const IGNORE_DATA_SIZE_FIELD: Self = Self(1 << 2); + /// Flag to allow a trailing comma in the `format` header field. + pub const ALLOW_TRAILING_COMMA: Self = Self(1 << 3); + /// Flag to allow an empty string to be used for a field name when there are + /// no other fields. + pub const ALLOW_EMPTY_FIELD_NAME: Self = Self(1 << 4); + /// Flag to allow use of `STR` instead of `NSTR`. + pub const ALLOW_STR_INSTEAD_OF_NSTR: Self = Self(1 << 5); /// Returns the union of `self` and a `flag`. pub fn union(&self, flag: Self) -> Self { @@ -24,6 +31,15 @@ impl DataReaderOptions { } } +impl std::ops::BitOr for DataReaderOptions { + type Output = Self; + + fn bitor(self, rhs: Self) -> Self::Output { + let inner = self.0 | rhs.0; + Self(inner) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/visitor.rs b/src/visitor.rs index 05ccf19..a95862e 100644 --- a/src/visitor.rs +++ b/src/visitor.rs @@ -345,14 +345,15 @@ impl IndentLevel { #[cfg(test)] mod tests { use super::*; - use crate::ast::Schema; + use crate::{ast::parse, DataReaderOptions}; macro_rules! test_schema_oneline_display { ($(($name:ident, $schema:expr),)*) => ($( #[test] fn $name() { let input = $schema; - let schema = input.parse::().unwrap(); + let options = DataReaderOptions::default(); + let schema = parse(input.as_bytes(), options).unwrap(); let output = format!("{}", SchemaOnelineDisplay(&schema.ast)); assert_eq!(output, input); @@ -527,7 +528,8 @@ mod tests { ($(($name:ident, $schema:expr, $buf:expr, $expected:expr),)*) => ($( #[test] fn $name() { - let schema = $schema.parse::().unwrap(); + let options = crate::DataReaderOptions::default(); + let schema = parse($schema.as_bytes(), options).unwrap(); let buf = $buf; let actual = format!("{}", JsonDisplay::new(&schema, &buf, JsonFormattingStyle::Minimal)); let expected = $expected @@ -561,7 +563,8 @@ mod tests { #[test] fn json_serialization_with_pretty_printing_style() { - let schema = NESTED_DATA_SCHEMA.parse::().unwrap(); + let options = crate::DataReaderOptions::default(); + let schema = parse(NESTED_DATA_SCHEMA.as_bytes(), options).unwrap(); let actual = format!( "{}", JsonDisplay::new(&schema, NESTED_DATA_BUF, JsonFormattingStyle::Pretty) diff --git a/web/src/main.rs b/web/src/main.rs index a5f414d..94ead54 100644 --- a/web/src/main.rs +++ b/web/src/main.rs @@ -2,6 +2,7 @@ use std::ops::Deref; use drop_area::FileDropArea; use gloo_file::{futures::read_as_bytes, Blob}; +use rrr::DataReaderOptions; use yew::prelude::*; mod drop_area; @@ -55,7 +56,10 @@ fn app() -> Html { if let Ok(bytes) = result { let mut reader = rrr::DataReader::new( std::io::Cursor::new(&bytes), - rrr::DataReaderOptions::ENABLE_READING_BODY, + DataReaderOptions::ALLOW_TRAILING_COMMA + | DataReaderOptions::ALLOW_EMPTY_FIELD_NAME + | DataReaderOptions::ALLOW_STR_INSTEAD_OF_NSTR + | DataReaderOptions::ENABLE_READING_BODY, ); let triplet = reader.read(); file_content.set(triplet.ok()) diff --git a/web/src/tree.rs b/web/src/tree.rs index 970849d..d73000e 100644 --- a/web/src/tree.rs +++ b/web/src/tree.rs @@ -103,7 +103,7 @@ fn prettify_special_field_name(name: &str) -> &str { #[cfg(test)] mod tests { - use rrr::Schema; + use rrr::{parse, DataReaderOptions}; use super::*; @@ -112,7 +112,10 @@ mod tests { #[test] fn $name() { let input = $input; - let schema = input.parse::().unwrap(); + let options = DataReaderOptions::ALLOW_TRAILING_COMMA + | DataReaderOptions::ALLOW_EMPTY_FIELD_NAME + | DataReaderOptions::ALLOW_STR_INSTEAD_OF_NSTR; + let schema = parse(input.as_bytes(), options).unwrap(); let actual = create_schema_tree(&schema.ast).unwrap(); let expected = $expected;