Skip to content

Question about the implementation of the parser #7

Open
@mezoni

Description

@mezoni

Hello, István!
For the sake of interest, I implemented a parser for your queries specification.
To be honest, your specification does not specify everything, so I took some liberties in interpreting the missing definitions.
But this is not a problem, because it can be fixed in a few minutes in the parser definition.
Most of the time was spent studying the source code in order to understand the grammar (and specification too).

It took me no more than a few hours to write the parser definition, and then only because the specification is not comprehensive.

Below is the source code for the parser definition. The source code of the tests are included (for simplicity) in the source code of the parser. By running the generated parser, you are essentially running tests.

To be able to generate this parser, you need to add the package from the Github repository into dependencies.

import 'package:parser_builder/branch.dart';
import 'package:parser_builder/bytes.dart';
import 'package:parser_builder/combinator.dart';
import 'package:parser_builder/fast_build.dart';
import 'package:parser_builder/multi.dart';
import 'package:parser_builder/parser_builder.dart';
import 'package:parser_builder/sequence.dart';
import 'package:parser_builder/transformers.dart';
import 'package:query/src/ast.dart';

Future<void> main(List<String> args) async {
  final context = Context();
  context.optimizeForSize = false;
  await fastBuild(
      context, [_parse, _expression_, _and_], 'bin/query_parser.dart',
      header: __header, publish: {'parseQuery': _parse});
}

const __header = r'''
// ignore_for_file: unused_local_variable

import 'package:query/src/ast.dart';
import 'package:source_span/source_span.dart';
import 'package:test/test.dart';

String debugQuery(String input) => parseQuery(input).toString(debug: true);

void main() {
  group('Base expressions', () {
test('missing `)` or `"`', () {
      expect(debugQuery('(field: "x'), '(field:"<x>")');
      expect(debugQuery('("x'), '("<x>")');
      expect(debugQuery('('), '(<>)');
      expect(debugQuery('(a b'), '((<a> <b>))');
      expect(debugQuery('-(-(a OR -b) -c) | -(d'),
          '(-((-((<a> OR -<b>)) -<c>)) OR -(<d>))');

      // Missing after `c`
      // The parser parses, but the result is unpredictable.
      debugQuery('-(-(a OR -b) -c | -(d)');
    });

    test('1 string', () {
      expect(debugQuery('abc'), '<abc>');
      expect(debugQuery('"abc"'), '"<abc>"');
      expect(debugQuery('abc*'), '<abc*>');
    });

    test('2 strings', () {
      expect(debugQuery('abc def'), '(<abc> <def>)');
      expect(debugQuery('"abc 1" def'), '("<abc> <1>" <def>)');
      expect(debugQuery('abc "def 2"'), '(<abc> "<def> <2>")');
      expect(debugQuery('"abc" "def"'), '("<abc>" "<def>")');
      expect(debugQuery('"abc 1" "def 2"'), '("<abc> <1>" "<def> <2>")');
    });

    test('explicit AND', () {
      expect(debugQuery('a AND b c'), '(<a> <b> <c>)');
    });

    test('negative word', () {
      expect(debugQuery('-abc'), '-<abc>');
      expect(debugQuery('-"abc"'), '-"<abc>"');
      expect(debugQuery('-"abc 1"'), '-"<abc> <1>"');

      expect(debugQuery('NOT abc'), '-<abc>');
      expect(debugQuery('NOT "abc"'), '-"<abc>"');
      expect(debugQuery('NOT "abc 1"'), '-"<abc> <1>"');
    });

    test('scoped', () {
      expect(debugQuery('a:abc'), 'a:<abc>');
      expect(debugQuery('a:"abc"'), 'a:"<abc>"');
      expect(debugQuery('a:"abc 1"'), 'a:"<abc> <1>"');
      expect(debugQuery('a:-"abc 1"'), 'a:-"<abc> <1>"');
      expect(debugQuery('NOT field:abc'), '-field:<abc>');
    });

    test('special scoped', () {
      expect(debugQuery('a*:abc'), 'a*:<abc>');
      expect(debugQuery('a%:"abc"'), 'a%:"<abc>"');
    });

    test('compare', () {
      expect(debugQuery('year < 2000'), '<year<2000>');
      expect(debugQuery('field >= "test case"'), '<field>="test case">');
    });

    test('range', () {
      expect(debugQuery('[1 TO 10]'), '<[<1> TO <10>]>');
      expect(debugQuery(']1 TO 10['), '<]<1> TO <10>[>');
      expect(debugQuery(']"1 a" TO "10 b"['), '<]"<1> <a>" TO "<10> <b>"[>');
    });
  });

  group('or', () {
    test('2 items', () {
      expect(debugQuery('a OR b'), '(<a> OR <b>)');
    });

    test('2 items pipe', () {
      expect(debugQuery('a | b'), '(<a> OR <b>)');
    });

    test('3 items', () {
      expect(debugQuery('a OR b OR c'), '(<a> OR <b> OR <c>)');
    });

    test('3 items pipe', () {
      expect(debugQuery('a | b | c'), '(<a> OR <b> OR <c>)');
    });

    test('3 items pipe mixed', () {
      expect(debugQuery('a OR b | c'), '(<a> OR <b> OR <c>)');
      expect(debugQuery('a | b OR c'), '(<a> OR <b> OR <c>)');
    });

    test('precedence of implicit AND, explicit OR', () {
      expect(debugQuery('a b OR c'), '(<a> (<b> OR <c>))');
      expect(debugQuery('a OR b c'), '(<a> OR (<b> <c>))');
      expect(debugQuery('a OR b c OR d'), '(<a> OR (<b> (<c> OR <d>)))');
    });

    test('precedence of implicit AND, explicit OR pipe', () {
      expect(debugQuery('a b | c'), '(<a> (<b> OR <c>))');
      expect(debugQuery('a | b c'), '(<a> OR (<b> <c>))');
      expect(debugQuery('a | b c | d'), '(<a> OR (<b> (<c> OR <d>)))');
    });
  });

  group('complex cases', () {
    test('#1', () {
      expect(debugQuery('a:-v1 b:(beta OR moon < Deimos OR [a TO e])'),
          '(a:-<v1> b:((<beta> OR <moon<Deimos> OR <[<a> TO <e>]>)))');
    });

    test('#2', () {
      expect(debugQuery('a = 2000 b > 2000 c'), '(<a=2000> <b>2000> <c>)');
    });

    test('#3', () {
      // TODO: fix
      // expect(debugQuery('(f:abc)'), '');
    });
  });

  group('unicode chars', () {
    test('hungarian', () {
      expect(
          debugQuery('árvíztűrő TÜKÖRFÚRÓGÉP'), '(<árvíztűrő> <TÜKÖRFÚRÓGÉP>)');
    });
  });

  group('grouping precedence', () {
    test('empty group', () {
      expect(debugQuery('()'), '(<>)');
    });
    test('empty group with space', () {
      expect(debugQuery('(  )'), '(<>)');
    });
    test('single item group', () {
      expect(debugQuery('(a)'), '(<a>)');
    });
    test('single item group with space', () {
      expect(debugQuery('( a )'), '(<a>)');
    });
    test('grouping with two items implicit AND', () {
      expect(debugQuery('(a b)'), '((<a> <b>))');
    });
    test('grouping with two items explicit AND', () {
      expect(debugQuery('(a AND b)'), '((<a> <b>))');
    });
    test('grouping with multiple items', () {
      expect(debugQuery('(a | b) c (d | e)'),
          '(((<a> OR <b>)) <c> ((<d> OR <e>)))');
    });
    test('nested grouping', () {
      expect(debugQuery('(a OR b) OR c'), '(((<a> OR <b>)) OR <c>)');
      expect(debugQuery('(a OR b) c'), '(((<a> OR <b>)) <c>)');
      expect(debugQuery('((a OR b) c) | d'), '(((((<a> OR <b>)) <c>)) OR <d>)');
    });
    test('negative grouping', () {
      expect(debugQuery('-(a OR b) OR c'), '(-((<a> OR <b>)) OR <c>)');
      expect(debugQuery('(a OR -b) c'), '(((<a> OR -<b>)) <c>)');
      expect(debugQuery('-(-(a OR -b) -c) | -(d)'),
          '(-((-((<a> OR -<b>)) -<c>)) OR -(<d>))');
    });
    test('scoped grouping', () {
      expect(debugQuery('(field:abc)'), '(field:<abc>)');
      expect(debugQuery('(field:abc AND field:def)'),
          '((field:<abc> field:<def>))');
      expect(debugQuery('(field:abc OR field:def)'),
          '((field:<abc> OR field:<def>))');
    });
  });

  group('phrase match', () {
    test('empty phrase', () {
      expect(debugQuery('""'), '""');
    });
    test('empty phrase with space', () {
      expect(debugQuery('"  "'), '""');
    });
    test('simple word phrase', () {
      expect(debugQuery('"a"'), '"<a>"');
    });
    test('single word phrase with space', () {
      expect(debugQuery('" a "'), '"<a>"');
    });
    test('two word phrase', () {
      expect(debugQuery('"a b"'), '"<a> <b>"');
    });
    test('three word phrase', () {
      expect(debugQuery('"a b c"'), '"<a> <b> <c>"');
    });
    test('three word phrase with AND', () {
      expect(debugQuery('"a AND b"'), '"<a> <AND> <b>"');
    });
    test('three word phrase with OR', () {
      expect(debugQuery('"a OR b"'), '"<a> <OR> <b>"');
    });
    test('negative phrase grouping', () {
      expect(debugQuery('-("a OR b") OR c'), '(-("<a> <OR> <b>") OR <c>)');
      expect(debugQuery('(a OR -"b") ("c")'), '(((<a> OR -"<b>")) ("<c>"))');
      expect(debugQuery('-(-"a OR -b" -c) | -"d"'),
          '(-((-"<a> <OR> <-b>" -<c>)) OR -"<d>")');
    });
    test('phrase with parenthesis', () {
      expect(debugQuery('"(a OR -b)" -("-c | []")'),
          '("<(a> <OR> <-b)>" -("<-c> <|> <[]>"))');
    });
  });
}
''';

const _and = Ref<String, Query>('_and');

const _and_ = Named(
    '_and',
    Map1(
        CombinedList1(_or, Preceded(Opt(_andOp), _or)),
        Expr<Query>([
          'children'
        ], '{{children}}.length == 1 ? {{children}}[0] : AndQuery({{children}})')));

const _andOp = Named('_andOp', Terminated(Tag('AND'), _ws));

const _closeBracket = Named('_closeBracket', Terminated(Tag(']'), _ws));

const _closeParen = Named('_closeParen', Terminated(Tag(')'), _ws));

const _colon = Named('_colon', Terminated(Tag(':'), _ws));

const _comparison = Named(
    '_comparison',
    Map3(
        _identifier,
        _comparisonOperator,
        _wordOrText,
        Expr<FieldCompareQuery>(['field', 'op', 'text'],
            'FieldCompareQuery({{field}}, {{op}}, {{text}})')));

const _comparisonOperator = Named('_comparisonOperator',
    Terminated(Tags(['<=', '<', '>=', '>', '!=', '=']), _ws));

const _exclusion = Named(
    '_exclusion',
    Alt2(
      Map1(Preceded(_exclusionOp, _expression),
          Expr<NotQuery>(['child'], 'NotQuery({{child}})')),
      _expression,
    ));

const _exclusionOp = Named(
    '_exclusionOp',
    Alt2(
      Tag('-'),
      Terminated(Tag('NOT'), _ws),
    ));

const _expression = Ref<String, Query>('_expression');

const _expression_ = Named(
    '_expression',
    Alt5(
      _group,
      _phrase,
      _range,
      _comparison,
      _word,
    ));

const _field = Named('_field', TakeWhile1(_isFieldChar));

const _group = Named(
    '_group',
    Preceded(
        _openParen,
        Alt2(
          Map1(Terminated(_and, _recoveryCloseParen),
              Expr<GroupQuery>(['child'], 'GroupQuery({{child}})')),
          Map1(_recoveryCloseParen,
              Expr<TextQuery>(['_'], "GroupQuery(TextQuery(''))")),
        )));

const _identifier =
    Named('_identifier', Terminated(TakeWhile1(_isAllowedChar), _ws));

const _isAllowedChar = NotCharClass('"[" | "]" | [():<!=>] | [#x0-#x20]');

const _isFieldChar = NotCharClass('[#x0-#x20] | [:]');

const _isPhraseWord = NotCharClass('#x9 | #xA | #xD | [" ]');

const _openBracket = Named('_openBracket', Terminated(Tag('['), _ws));

const _openOrCloseBracket =
    Named('_openOrCloseBracket', Alt2(_openBracket, _closeBracket));

const _openParen = Named('_openParen', Terminated(Tag('('), _ws));

const _or = Named(
    '_or',
    Map1(
        CombinedList1(
            Alt2(
              _group,
              _scopedExclusion,
            ),
            Preceded(_orOp, _and)),
        FuncTransformer<Query>(['List<Query> children'], '''
if (children.length == 1) return children.single;
final second = children.last;
if (children.length == 2 && second is OrQuery) {
  second.children.insert(0, children.first);
  return second;
}
return OrQuery(children);''')));

const _orOp = Named(
    '_orOp',
    Alt2(
      Terminated(Tag('|'), _ws),
      Terminated(Tag('OR'), _ws),
    ));

const _parse = Named('_parse', Delimited(_ws, _and, Eof<String>()));

const _phrase = Named(
    '_phrase',
    Map1(
        Delimited(_quote, Many0(_phraseWord), _recoveryCloseQuote),
        Expr<PhraseQuery>([
          'words'
        ], 'PhraseQuery({{words}}.join(\' \'), {{words}}.map((e) => TextQuery(e)).toList())')));

const _phraseWord =
    Named('_phraseWord', Terminated(TakeWhile1(_isPhraseWord), _ws));

const _quote = Named('_closeQuote', Terminated(Tag('"'), _ws));

const _range = Named(
    '_range',
    Map5(
        _openOrCloseBracket,
        _wordOrText,
        _to,
        _wordOrText,
        _openOrCloseBracket,
        Expr<RangeQuery>(['open', 'start', '_', 'end', 'close'], '''
RangeQuery({{start}}, {{end}}, startInclusive: {{open}} == '[', endInclusive: {{close}} == ']')
''')));

const _recoveryCloseParen = Named('_recoveryCloseParen',
    Terminated(Alt2(Tag(')'), Value<String, String>(')')), _ws));

const _recoveryCloseQuote = Named('_recoveryCloseQuote',
    Terminated(Alt2(Tag('"'), Value<String, String>('"')), _ws));

const _scopedExclusion = Named(
    '_scopedExclusion',
    Alt2(
      Map1(Preceded(_exclusionOp, _scopedExpression),
          Expr<NotQuery>(['child'], 'NotQuery({{child}})')),
      _scopedExpression,
    ));

const _scopedExpression = Named(
    '_scopedExpression',
    Alt2(
      Map3(
          _field,
          _colon,
          _exclusion,
          Expr<FieldScope>(
              ['field', '_', 'child'], 'FieldScope({{field}}, {{child}})')),
      _exclusion,
    ));

const _to = Named('_to', Terminated(Tag('TO'), _ws));

const _word = Named('_word',
    Map1(_identifier, Expr<TextQuery>(['text'], 'TextQuery({{text}})')));

const _wordOrText = Named('_wordOrText ', Alt2(_phrase, _word));

const _ws = Named('_ws', SkipWhile(CharClass('#x9 | #xA | #xD | #x20')));

typedef Expr<T> = ExprTransformer<T>;

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions