Trying out PetitParser for Java
Inspired by this SO question I wanted to find a nice tool for Groovy/Java to write a parser in the hope to find something, that is as nice to start with as Instaparse. So far I am quite pleased with PetitParser.
@Grab("com.github.petitparser:petitparser-core:2.2.0") import org.petitparser.tools.GrammarDefinition import org.petitparser.tools.GrammarParser import org.petitparser.parser.primitive.CharacterParser as CP import org.petitparser.parser.primitive.StringParser as SP import org.petitparser.utils.Functions as F class SearchQueryGrammerDefinition extends GrammarDefinition { SearchQueryGrammerDefinition() { define("start", ref("term") .separatedBy(ref("operation"))) define("operation", SP.ofIgnoringCase("and") .or(SP.ofIgnoringCase("or")) .trim() .map{ it.toLowerCase() }) define("term", ref("keyword-term") .or(ref("simple-term"))) define("simple-term", ref("quoted-or-unquoted-term")) define("quoted-or-unquoted-term", ref("unquoted-term") .or(ref("quoted-term"))) define("keyword-term", ref("keyword") .seq(CP.of(":" as Character)) .seq(ref("quoted-or-unquoted-term"))) define("unquoted-term", CP.anyOf(' "').neg().plus().flatten().trim()) define("quoted-term", CP.anyOf('"') .seq(SP.of('\\"').or(CP.pattern('^"')).star().flatten()) .seq(CP.anyOf('"')) .map{ it[1].replace('\\"', '"') }) define("keyword", CP.letter() .seq(CP.word()) .star() .flatten()) } /** Helper for `def`, which is a keyword in groovy */ void define(s, p) { super.def(s,p) } } class SearchQueryParserDefinition extends SearchQueryGrammerDefinition { SearchQueryParserDefinition() { action("keyword-term", { [(it[0]): it[2]] }) action("simple-term", { ['*': it] }) } } class SearchQueryParser extends GrammarParser { SearchQueryParser() { super(new SearchQueryParserDefinition()) } } println(new SearchQueryParser().parse('metabolism and heart and TAXONOMY:40674 AND curation:"Non \\"curated\\"" and cancer')) // => Success[1:82]: [{*=metabolism}, and, {*=heart}, and, {TAXONOMY=40674}, and, {curation=Non "curated"}, and, {*=cancer}]