python의 장점은 적당히 쓸만한 lib을 누군가가 이미 만들어서 공유한게 잔뜩 있다는 점인데 이번에는 운이 없는 것 같아 직접 만들었다. ORM 조작은 다른 데서 처리하므로 구문트리만 뽑아낸다.
from ply import lex, yacc
__all__ = (
'parse_search_query'
)
tokens = (
'PAREN_L',
'PAREN_R',
'MINUS',
'FIELD',
'STRING',
'AND',
'OR',
'NOT'
)
t_PAREN_L = r"\("
t_PAREN_R = r'\)'
t_MINUS = r'-'
t_AND = r'&'
t_OR = r'\|'
t_NOT = r'!'
t_ignore = r" "
def t_FIELD(t):
r'\s?[a-z0-9가-힣_]+:'
return t
def t_STRING(t):
r'[a-z0-9가-힣 ]+'
t.value = str(t.value)
return t
def t_error(t):
print('illegal character %s' % t.value[0])
t.lexer.skip(1)
lex.lex()
class BinaryOp(object):
op = ''
left = None
right = None
def __init__(self, op, left, right):
self.op = op
self.left = left
self.right = right
class Term(object):
field = ''
factor = None
def __init__(self, factor, field=None):
self.factor = factor
self.field = field
class Factor(object):
op = None
value = ''
def __init__(self, value, op=None):
self.value = value
self.op = op
def p_expr(p):
"""
expr : expr binary_op expr
| PAREN_L expr PAREN_R
| term
"""
if len(p) > 3:
if p[1] == '(' and p[3] == ')':
p[0] = p[2]
else:
p[0] = BinaryOp(op=p[2], left=p[1], right=p[3])
else:
p[0] = p[1]
def p_binary_op(p):
"""
binary_op : AND
| OR
"""
ops = {
'&': 'and',
'|': 'or'
}
p[0] = ops[p[1]]
def p_term(p):
"""
term : FIELD factor
| STRING
"""
if len(p) > 2:
p[0] = Term(field=p[1][:-1], factor=p[2])
else:
p[0] = Factor(value=p[1])
def p_factor(p):
"""
factor : STRING
| unary_op STRING
"""
if len(p) > 2:
p[0] = Factor(op=p[1], value=p[2])
else:
p[0] = Factor(op=None, value=p[1])
def p_unary_op(p):
"""
unary_op : MINUS
| NOT
"""
ops = {
'-': 'minus',
'!': 'not',
}
p[0] = ops[p[1]]
def p_error(error):
raise SyntaxError('QueryParser says: %s' % str(error))
yacc.yacc()
def parse_search_query(string):
return yacc.parse(string)
test code :
from unittest import TestCase
from extension.search_query import parse_search_query
class ParserTest(TestCase):
text1 = "(주소: 대전광역시 서구 & that: was) | (this: is & that: -was)"
text2 = '9990'
def test_parser(self):
result = parse_search_query(self.text1)
self.assertEqual('or', result.op)
self.assertEqual('that', result.left.right.field)
self.assertEqual('minus', result.right.right.factor.op)
self.assertEqual('is', result.right.left.factor.value.strip())
result = parse_search_query(self.text2)
self.assertEqual(self.text2, result.value)
dependency : PLY