Skip to main content

Trying out PetitParser for Java

Inspired by this SO question I wanted to find a nice tool for Groovy/Java to write a parser in the hope to find something, that is as nice to start with Instaparse. So for I am quite pleased with PetitParser.

@Grab("com.github.petitparser:petitparser-core:2.2.0")
import org.petitparser.tools.GrammarDefinition
import org.petitparser.tools.GrammarParser
import org.petitparser.parser.primitive.CharacterParser as CP
import org.petitparser.parser.primitive.StringParser as SP
import org.petitparser.utils.Functions as F

class SearchQueryGrammerDefinition extends GrammarDefinition {
    SearchQueryGrammerDefinition() {
        define("start",
                ref("term")
                .separatedBy(ref("operation")))
        define("operation",
                SP.ofIgnoringCase("and")
                .or(SP.ofIgnoringCase("or"))
                .trim()
                .map{ it.toLowerCase() })
        define("term",
                ref("keyword-term")
                .or(ref("simple-term")))
        define("simple-term",
                ref("quoted-or-unquoted-term"))
        define("quoted-or-unquoted-term",
                ref("unquoted-term")
                .or(ref("quoted-term")))
        define("keyword-term",
                ref("keyword")
                .seq(CP.of(":" as Character))
                .seq(ref("quoted-or-unquoted-term")))
        define("unquoted-term",
                CP.anyOf(' "').neg().plus().flatten().trim())
        define("quoted-term",
                CP.anyOf('"')
                .seq(SP.of('\\"').or(CP.pattern('^"')).star().flatten())
                .seq(CP.anyOf('"'))
                .map{ it[1].replace('\\"', '"') })
        define("keyword",
                CP.letter()
                .seq(CP.word())
                .star()
                .flatten())
    }

    /** Helper for `def`, which is a keyword in groovy */
    void define(s, p) { super.def(s,p) }
}

class SearchQueryParserDefinition extends SearchQueryGrammerDefinition {
    SearchQueryParserDefinition() {
        action("keyword-term", { [(it[0]): it[2]] })
        action("simple-term", { ['*': it] })
    }
}

class SearchQueryParser extends GrammarParser {
    SearchQueryParser() {
        super(new SearchQueryParserDefinition())
    }
}

println(new SearchQueryParser().parse('metabolism and heart and TAXONOMY:40674 AND curation:"Non \\"curated\\"" and cancer'))
// => Success[1:82]: [{*=metabolism}, and, {*=heart}, and, {TAXONOMY=40674}, and, {curation=Non "curated"}, and, {*=cancer}]