aboutsummaryrefslogtreecommitdiffstats
path: root/queryparse.py
blob: 17227c2575a03a86a936a98dcc400cd48ca53b0b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from enum import Enum

class TokType(Enum):
	OR = 0
	NOT = 1
	TAGGLOB = 2
	TITLEGLOB = 3
	TAGLIT = 4
	EOF = 5
	AND = 6
	OPEN = 7
	CLOSE = 8
	TAG = 9

def classify_head(l):
	if len(l) == 0:
		return (TokType.EOF, None)

	s = l[0]
	if s == "OR":
		return (TokType.OR,None)
	elif s == "NOT":
		return (TokType.NOT,None)
	elif s == "[":
		return (TokType.OPEN,None)
	elif s == "]":
		return (TokType.CLOSE,None)
	elif s[:2] == "~~":
		return (TokType.TAGGLOB, s[2:])
	elif s[:2] == "~-":
		return (TokType.TITLEGLOB, s[2:])
	else:
		return (TokType.TAG, s)

class Atom:
	def __str__(self):
		return f'({self.op} "{self.v}")'
	def __init__(self, op, v):
		self.op = op
		self.v = v

class ParseTree:
	def push(self, pt):
		self.l.append(pt)
	def __str__(self):
		s = f'({self.op}'
		for i in self.l:
			s = s + ' ' + str(i)
		return s + ')'
	def __init__(self, op, l):
		self.op = op
		self.l = l

def encapsulate(pt, op):
	if pt.op != op:
		pt = ParseTree(op, [pt])
	return pt

"""
The parser is implemented like

e1 ::= e2 ("OR" e2)*;
e2 ::= e3*;
e3 ::= "NOT"? e4;
e4 ::= TAG | "[" e1 "]";
"""

class ParseError(Exception):
	pass

def e4(l):
	typ,s = classify_head(l)

	if typ == TokType.OPEN:
		pt,l = e1(l)
		typ,_ = classify_head(l)
		if typ != TokType.CLOSE:
			raise ParseError
		return pt,l[1:]
	elif typ == TokType.TAG:
		return ParseTree(TokType.TAG, s), l[1:]
	else:
		raise ParseError

def e3(l):
	typ,_ = classify_head(l)
	if typ == TokType.NOT:
		pt,l = e4(l[1:])
		return encapsulate(pt, TokType.NOT), l
	return e4(l)

def e2(l):
	pt,l = e3(l)
	try:
		while True:
			pt2,l = e3(l)
			pt = encapsulate(pt, TokType.AND)
			pt.push(pt2)
	except ParseError:
		pass
	return pt,l

def e1(l):
	pt,l = e2(l)
	typ,_ = classify_head(l)

	try:
		while typ == TokType.OR:
			pt2,l = e2(l[1:])
			pt = encapsulate(pt, TokType.OR)
			pt.push(pt2)
			typ,_ = classify_head(l)
	except ParseError:
		pass
	return pt,l

def parse(l):
	x,l = e1(l)
	if l != []:
		raise ParseError
	return x