diff options
| author | 2021-07-25 23:50:50 -0400 | |
|---|---|---|
| committer | 2021-07-26 22:55:45 -0400 | |
| commit | 04063088250ff17e7f5f5314b6c06ddd0124769d (patch) | |
| tree | 3e6a4d63c177d1b77161a52f5830ded2f76c5762 | |
| parent | move SQL schema setup to variable (diff) | |
start query parser
| -rw-r--r-- | example/query.md | 14 | ||||
| -rw-r--r-- | queryparse.py | 121 | ||||
| -rwxr-xr-x | test/t_queryparse.py | 10 |
3 files changed, 145 insertions, 0 deletions
diff --git a/example/query.md b/example/query.md new file mode 100644 index 0000000..ada8638 --- /dev/null +++ b/example/query.md @@ -0,0 +1,14 @@ +Queries are passed to the command line, using the keywords OR +and NOT. Grouping is done using [ and ] (these are not reserved by +the shell). Two tildes (`~~`) at the beginning of a string denote +a glob match on tags. A tilde and a dash (`~-`) denote a glob match +on titles. + +The grammar is implemented as a recursive descent parser. + +``` +e1 ::= e2 "OR" e1 | e2; +e2 ::= e3 e2 | e3; +e3 ::= "NOT" e4 | e4; +e4 ::= TAG | "[" e1 "]"; +``` diff --git a/queryparse.py b/queryparse.py new file mode 100644 index 0000000..3fcbbb4 --- /dev/null +++ b/queryparse.py @@ -0,0 +1,121 @@ +from enum import Enum + +class TokType(Enum): + OR = 0 + NOT = 1 + TAGGLOB = 2 + TITLEGLOB = 3 + TAGLIT = 4 + EOF = 5 + AND = 6 + OPEN = 7 + CLOSE = 8 + TAG = 9 + +def classify_head(l): + if len(l) == 0: + return (TokType.EOF, None) + + s = l[0] + if s == "OR": + return (TokType.OR,None) + elif s == "NOT": + return (TokType.NOT,None) + elif s == "[": + return (TokType.OPEN,None) + elif s == "]": + return (TokType.CLOSE,None) + elif s[:2] == "~~": + return (TokType.TAGGLOB, s[2:]) + elif s[:2] == "~-": + return (TokType.TITLEGLOB, s[2:]) + else: + return (TokType.TAG, s) + +class Atom: + def __str__(self): + return f'({self.op} "{self.v}")' + def __init__(self, op, v): + self.op = op + self.v = v + +class ParseTree: + def push(self, pt): + self.l.append(pt) + def __str__(self): + s = f'({self.op}' + for i in self.l: + s = s + ' ' + str(i) + return s + ')' + def __init__(self, op, l): + self.op = op + self.l = l + +def encapsulate(pt, op): + if pt.op != op: + pt = ParseTree(op, [pt]) + return pt + +""" +The parser is implemented like + +e1 ::= e2 ("OR" e2)*; +e2 ::= e3*; +e3 ::= "NOT"? e4; +e4 ::= TAG | "[" e1 "]"; +""" + +class ParseError(Exception): + pass + +def e4(l): + typ,s = classify_head(l) + + if typ == TokType.OPEN: + pt,l = e1(l) + typ,_ = classify_head(l) + if typ != TokType.CLOSE: + raise ParseError + return pt,l[1:] + elif typ == TokType.TAG: + return ParseTree(TokType.TAG, s), l[1:] + else: + raise ParseError + +def e3(l): + typ,_ = classify_head(l) + if typ == TokType.NOT: + return encapsulate(e4(l[1:]), \ + TokType.NOT) + return e4(l) + +def e2(l): + pt,l = e3(l) + try: + pt = encapsulate(pt, TokType.AND) + while True: + pt2,l = e3(l) + pt.append(pt2) + except ParseError: + pass + return pt,l + +def e1(l): + pt,l = e2(l) + typ,_ = classify_head(l) + + try: + pt = encapsulate(pt, TokType.OR) + while typ == TokType.OR: + pt2,l = e2(l[1:]) + pt.push(pt2) + typ,_ = classify_head(l) + except ParseError: + pass + return pt,l + +def parse(l): + x,l = e1(l) + if l != []: + raise ParseError + return x diff --git a/test/t_queryparse.py b/test/t_queryparse.py new file mode 100755 index 0000000..9bf0511 --- /dev/null +++ b/test/t_queryparse.py @@ -0,0 +1,10 @@ +#!/usr/bin/python3 +import sys +sys.path.insert(0, "../") +import queryparse + +def f(x): + print(x) + print(queryparse.parse(x)) + +f(["x", "OR", "y"]) |
