python의 장점은 적당히 쓸만한 lib을 누군가가 이미 만들어서 공유한게 잔뜩 있다는 점인데 이번에는 운이 없는 것 같아 직접 만들었다. ORM 조작은 다른 데서 처리하므로 구문트리만 뽑아낸다.



from ply import lex, yacc

__all__ = (
    'parse_search_query'
)


tokens = (
    'PAREN_L',
    'PAREN_R',
    'MINUS',
    'FIELD',
    'STRING',
    'AND',
    'OR',
    'NOT'
)

t_PAREN_L = r"\("
t_PAREN_R = r'\)'
t_MINUS = r'-'
t_AND = r'&'
t_OR = r'\|'
t_NOT = r'!'

t_ignore = r" "


def t_FIELD(t):
    r'\s?[a-z0-9가-힣_]+:'
    return t


def t_STRING(t):
    r'[a-z0-9가-힣 ]+'
    t.value = str(t.value)
    return t


def t_error(t):
    print('illegal character %s' % t.value[0])
    t.lexer.skip(1)


lex.lex()


class BinaryOp(object):
    op = ''
    left = None
    right = None

    def __init__(self, op, left, right):
        self.op = op
        self.left = left
        self.right = right


class Term(object):
    field = ''
    factor = None

    def __init__(self, factor, field=None):
        self.factor = factor
        self.field = field


class Factor(object):
    op = None
    value = ''

    def __init__(self, value, op=None):
        self.value = value
        self.op = op


def p_expr(p):
    """
    expr : expr binary_op expr
         | PAREN_L expr PAREN_R
         | term
    """
    if len(p) > 3:
        if p[1] == '(' and p[3] == ')':
            p[0] = p[2]
        else:
            p[0] = BinaryOp(op=p[2], left=p[1], right=p[3])

    else:
        p[0] = p[1]


def p_binary_op(p):
    """
    binary_op : AND
              | OR
    """
    ops = {
        '&': 'and',
        '|': 'or'
    }

    p[0] = ops[p[1]]


def p_term(p):
    """
    term : FIELD factor
         | STRING
    """
    if len(p) > 2:
        p[0] = Term(field=p[1][:-1], factor=p[2])
    else:
        p[0] = Factor(value=p[1])


def p_factor(p):
    """
    factor : STRING
           | unary_op STRING
    """
    if len(p) > 2:
        p[0] = Factor(op=p[1], value=p[2])
    else:
        p[0] = Factor(op=None, value=p[1])


def p_unary_op(p):
    """
    unary_op : MINUS
             | NOT
    """
    ops = {
        '-': 'minus',
        '!': 'not',
    }
    p[0] = ops[p[1]]


def p_error(error):
    raise SyntaxError('QueryParser says: %s' % str(error))


yacc.yacc()


def parse_search_query(string):
    return yacc.parse(string)


test code :

from unittest import TestCase
from extension.search_query import parse_search_query


class ParserTest(TestCase):

    text1 = "(주소: 대전광역시 서구 & that: was) | (this: is & that: -was)"

    text2 = '9990'

    def test_parser(self):
        result = parse_search_query(self.text1)
        self.assertEqual('or', result.op)
        self.assertEqual('that', result.left.right.field)
        self.assertEqual('minus', result.right.right.factor.op)
        self.assertEqual('is',  result.right.left.factor.value.strip())

        result = parse_search_query(self.text2)
        self.assertEqual(self.text2, result.value)

dependency : PLY