1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
from enum import Enum
class TokType(Enum):
OR = 0
NOT = 1
TAGGLOB = 2
TITLEGLOB = 3
TAGLIT = 4
EOF = 5
AND = 6
OPEN = 7
CLOSE = 8
TAG = 9
def classify_head(l):
if len(l) == 0:
return (TokType.EOF, None)
s = l[0]
if s == "OR":
return (TokType.OR,None)
elif s == "NOT":
return (TokType.NOT,None)
elif s == "[":
return (TokType.OPEN,None)
elif s == "]":
return (TokType.CLOSE,None)
elif s[:2] == "~~":
return (TokType.TAGGLOB, s[2:])
elif s[:2] == "~-":
return (TokType.TITLEGLOB, s[2:])
else:
return (TokType.TAG, s)
class Atom:
def __str__(self):
return f'({self.op} "{self.v}")'
def __init__(self, op, v):
self.op = op
self.v = v
class ParseTree:
def push(self, pt):
self.l.append(pt)
def __str__(self):
s = f'({self.op}'
for i in self.l:
s = s + ' ' + str(i)
return s + ')'
def __init__(self, op, l):
self.op = op
self.l = l
def encapsulate(pt, op):
if pt.op != op:
pt = ParseTree(op, [pt])
return pt
"""
The parser is implemented like
e1 ::= e2 ("OR" e2)*;
e2 ::= e3*;
e3 ::= "NOT"? e4;
e4 ::= TAG | "[" e1 "]";
"""
class ParseError(Exception):
pass
def e4(l):
typ,s = classify_head(l)
if typ == TokType.OPEN:
pt,l = e1(l)
typ,_ = classify_head(l)
if typ != TokType.CLOSE:
raise ParseError
return pt,l[1:]
elif typ == TokType.TAG:
return ParseTree(TokType.TAG, s), l[1:]
else:
raise ParseError
def e3(l):
typ,_ = classify_head(l)
if typ == TokType.NOT:
return encapsulate(e4(l[1:]), \
TokType.NOT)
return e4(l)
def e2(l):
pt,l = e3(l)
try:
pt = encapsulate(pt, TokType.AND)
while True:
pt2,l = e3(l)
pt.push(pt2)
except ParseError:
pass
return pt,l
def e1(l):
pt,l = e2(l)
typ,_ = classify_head(l)
try:
pt = encapsulate(pt, TokType.OR)
while typ == TokType.OR:
pt2,l = e2(l[1:])
pt.push(pt2)
typ,_ = classify_head(l)
except ParseError:
pass
return pt,l
def parse(l):
x,l = e1(l)
if l != []:
raise ParseError
return x
|