python의 장점은 적당히 쓸만한 lib을 누군가가 이미 만들어서 공유한게 잔뜩 있다는 점인데 이번에는 운이 없는 것 같아 직접 만들었다. ORM 조작은 다른 데서 처리하므로 구문트리만 뽑아낸다.
from ply import lex, yacc __all__ = ( 'parse_search_query' ) tokens = ( 'PAREN_L', 'PAREN_R', 'MINUS', 'FIELD', 'STRING', 'AND', 'OR', 'NOT' ) t_PAREN_L = r"\(" t_PAREN_R = r'\)' t_MINUS = r'-' t_AND = r'&' t_OR = r'\|' t_NOT = r'!' t_ignore = r" " def t_FIELD(t): r'\s?[a-z0-9가-힣_]+:' return t def t_STRING(t): r'[a-z0-9가-힣 ]+' t.value = str(t.value) return t def t_error(t): print('illegal character %s' % t.value[0]) t.lexer.skip(1) lex.lex() class BinaryOp(object): op = '' left = None right = None def __init__(self, op, left, right): self.op = op self.left = left self.right = right class Term(object): field = '' factor = None def __init__(self, factor, field=None): self.factor = factor self.field = field class Factor(object): op = None value = '' def __init__(self, value, op=None): self.value = value self.op = op def p_expr(p): """ expr : expr binary_op expr | PAREN_L expr PAREN_R | term """ if len(p) > 3: if p[1] == '(' and p[3] == ')': p[0] = p[2] else: p[0] = BinaryOp(op=p[2], left=p[1], right=p[3]) else: p[0] = p[1] def p_binary_op(p): """ binary_op : AND | OR """ ops = { '&': 'and', '|': 'or' } p[0] = ops[p[1]] def p_term(p): """ term : FIELD factor | STRING """ if len(p) > 2: p[0] = Term(field=p[1][:-1], factor=p[2]) else: p[0] = Factor(value=p[1]) def p_factor(p): """ factor : STRING | unary_op STRING """ if len(p) > 2: p[0] = Factor(op=p[1], value=p[2]) else: p[0] = Factor(op=None, value=p[1]) def p_unary_op(p): """ unary_op : MINUS | NOT """ ops = { '-': 'minus', '!': 'not', } p[0] = ops[p[1]] def p_error(error): raise SyntaxError('QueryParser says: %s' % str(error)) yacc.yacc() def parse_search_query(string): return yacc.parse(string)
test code :
from unittest import TestCase from extension.search_query import parse_search_query class ParserTest(TestCase): text1 = "(주소: 대전광역시 서구 & that: was) | (this: is & that: -was)" text2 = '9990' def test_parser(self): result = parse_search_query(self.text1) self.assertEqual('or', result.op) self.assertEqual('that', result.left.right.field) self.assertEqual('minus', result.right.right.factor.op) self.assertEqual('is', result.right.left.factor.value.strip()) result = parse_search_query(self.text2) self.assertEqual(self.text2, result.value)
dependency : PLY