aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Peter McGoron 2021-07-25 23:50:50 -0400
committerGravatar - 2021-07-26 22:55:45 -0400
commit04063088250ff17e7f5f5314b6c06ddd0124769d (patch)
tree3e6a4d63c177d1b77161a52f5830ded2f76c5762
parentmove SQL schema setup to variable (diff)
start query parser
-rw-r--r--example/query.md14
-rw-r--r--queryparse.py121
-rwxr-xr-xtest/t_queryparse.py10
3 files changed, 145 insertions, 0 deletions
diff --git a/example/query.md b/example/query.md
new file mode 100644
index 0000000..ada8638
--- /dev/null
+++ b/example/query.md
@@ -0,0 +1,14 @@
+Queries are passed to the command line, using the keywords OR
+and NOT. Grouping is done using [ and ] (these are not reserved by
+the shell). Two tildes (`~~`) at the beginning of a string denote
+a glob match on tags. A tilde and a dash (`~-`) denote a glob match
+on titles.
+
+The grammar is implemented as a recursive descent parser.
+
+```
+e1 ::= e2 "OR" e1 | e2;
+e2 ::= e3 e2 | e3;
+e3 ::= "NOT" e4 | e4;
+e4 ::= TAG | "[" e1 "]";
+```
diff --git a/queryparse.py b/queryparse.py
new file mode 100644
index 0000000..3fcbbb4
--- /dev/null
+++ b/queryparse.py
@@ -0,0 +1,121 @@
+from enum import Enum
+
+class TokType(Enum):
+ OR = 0
+ NOT = 1
+ TAGGLOB = 2
+ TITLEGLOB = 3
+ TAGLIT = 4
+ EOF = 5
+ AND = 6
+ OPEN = 7
+ CLOSE = 8
+ TAG = 9
+
+def classify_head(l):
+ if len(l) == 0:
+ return (TokType.EOF, None)
+
+ s = l[0]
+ if s == "OR":
+ return (TokType.OR,None)
+ elif s == "NOT":
+ return (TokType.NOT,None)
+ elif s == "[":
+ return (TokType.OPEN,None)
+ elif s == "]":
+ return (TokType.CLOSE,None)
+ elif s[:2] == "~~":
+ return (TokType.TAGGLOB, s[2:])
+ elif s[:2] == "~-":
+ return (TokType.TITLEGLOB, s[2:])
+ else:
+ return (TokType.TAG, s)
+
+class Atom:
+ def __str__(self):
+ return f'({self.op} "{self.v}")'
+ def __init__(self, op, v):
+ self.op = op
+ self.v = v
+
+class ParseTree:
+ def push(self, pt):
+ self.l.append(pt)
+ def __str__(self):
+ s = f'({self.op}'
+ for i in self.l:
+ s = s + ' ' + str(i)
+ return s + ')'
+ def __init__(self, op, l):
+ self.op = op
+ self.l = l
+
+def encapsulate(pt, op):
+ if pt.op != op:
+ pt = ParseTree(op, [pt])
+ return pt
+
+"""
+The parser is implemented like
+
+e1 ::= e2 ("OR" e2)*;
+e2 ::= e3*;
+e3 ::= "NOT"? e4;
+e4 ::= TAG | "[" e1 "]";
+"""
+
+class ParseError(Exception):
+ pass
+
+def e4(l):
+ typ,s = classify_head(l)
+
+ if typ == TokType.OPEN:
+ pt,l = e1(l)
+ typ,_ = classify_head(l)
+ if typ != TokType.CLOSE:
+ raise ParseError
+ return pt,l[1:]
+ elif typ == TokType.TAG:
+ return ParseTree(TokType.TAG, s), l[1:]
+ else:
+ raise ParseError
+
+def e3(l):
+ typ,_ = classify_head(l)
+ if typ == TokType.NOT:
+ return encapsulate(e4(l[1:]), \
+ TokType.NOT)
+ return e4(l)
+
+def e2(l):
+ pt,l = e3(l)
+ try:
+ pt = encapsulate(pt, TokType.AND)
+ while True:
+ pt2,l = e3(l)
+ pt.append(pt2)
+ except ParseError:
+ pass
+ return pt,l
+
+def e1(l):
+ pt,l = e2(l)
+ typ,_ = classify_head(l)
+
+ try:
+ pt = encapsulate(pt, TokType.OR)
+ while typ == TokType.OR:
+ pt2,l = e2(l[1:])
+ pt.push(pt2)
+ typ,_ = classify_head(l)
+ except ParseError:
+ pass
+ return pt,l
+
+def parse(l):
+ x,l = e1(l)
+ if l != []:
+ raise ParseError
+ return x
diff --git a/test/t_queryparse.py b/test/t_queryparse.py
new file mode 100755
index 0000000..9bf0511
--- /dev/null
+++ b/test/t_queryparse.py
@@ -0,0 +1,10 @@
+#!/usr/bin/python3
+import sys
+sys.path.insert(0, "../")
+import queryparse
+
+def f(x):
+ print(x)
+ print(queryparse.parse(x))
+
+f(["x", "OR", "y"])