From 2374a6a6cfe4619a266a65a6eda904c53ff696ed Mon Sep 17 00:00:00 2001 From: Nikolai Kondrashov Date: Tue, 8 Sep 2020 16:38:10 +0300 Subject: [PATCH] Add parse() Add an implementation of parse() function accepting either text or a text iterator and producing an iterable returning parsed values. This allows parsing JSON and JSON streams without passing them through a program. --- jq.pyx | 57 +++++++++++++++++++++++++++++++++++++++++++++++ tests/jq_tests.py | 47 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/jq.pyx b/jq.pyx index cbb7b5d..c5835e2 100644 --- a/jq.pyx +++ b/jq.pyx @@ -106,6 +106,57 @@ cdef object _jv_to_python(jv value): return python_value +cdef class _Parser(object): + cdef jv_parser* _parser + cdef object _text_iter + cdef object _bytes + + def __dealloc__(self): + jv_parser_free(self._parser) + + def __cinit__(self, text_iter): + self._parser = jv_parser_new(0) + self._text_iter = text_iter + self._bytes = None + + def __iter__(self): + return self + + def __next__(self): + cdef jv value + while True: + # If we have no bytes to parse + if self._bytes is None: + # Ready some more + self._ready_next_bytes() + # Parse whatever we've readied, if any + value = jv_parser_next(self._parser) + if jv_is_valid(value): + return _jv_to_python(value) + elif jv_invalid_has_msg(jv_copy(value)): + error_message = jv_invalid_get_msg(value) + message = jv_string_value(error_message).decode("utf8") + jv_free(error_message) + raise ValueError(u"parse error: " + message) + else: + jv_free(value) + # If we didn't ready any bytes + if self._bytes is None: + raise StopIteration + self._bytes = None + + cdef bint _ready_next_bytes(self) except 1: + cdef char* cbytes + try: + self._bytes = next(self._text_iter).encode("utf8") + cbytes = PyBytes_AsString(self._bytes) + jv_parser_set_buf(self._parser, cbytes, len(cbytes), 1) + except StopIteration: + self._bytes = None + jv_parser_set_buf(self._parser, "", 0, 0) + return 0 + + def compile(object program): cdef object program_bytes = program.encode("utf8") return _Program(program_bytes) @@ -341,6 +392,12 @@ def text(program, value=_NO_VALUE, text=_NO_VALUE): return compile(program).input(value, text=text).text() +def parse(text=_NO_VALUE, text_iter=_NO_VALUE): + if (text is _NO_VALUE) == (text_iter is _NO_VALUE): + raise ValueError("Either the text or text_iter argument should be set") + return _Parser(text_iter if text_iter is not _NO_VALUE else _iter((text,))) + + # Support the 0.1.x API for backwards compatibility def jq(object program): return compile(program) diff --git a/tests/jq_tests.py b/tests/jq_tests.py index 6b63556..f5e7abe 100644 --- a/tests/jq_tests.py +++ b/tests/jq_tests.py @@ -196,6 +196,53 @@ def program_string_can_be_retrieved_from_program(): program = jq.compile(".") assert_equal(".", program.program_string) +@istest +def parse_both_text_and_text_iter_accepted(): + assert_equal(True, next(jq.parse(text="true"))) + assert_equal(True, next(jq.parse(text_iter=iter(["true"])))) + +@istest +def parse_empty_text_iter_stops(): + assert_raises(StopIteration, next, jq.parse(text_iter=iter([]))) + assert_raises(StopIteration, next, jq.parse(text_iter=iter([""]))) + assert_raises(StopIteration, next, jq.parse(text_iter=iter(["", ""]))) + +@istest +def parse_single_complete_text_iter_works(): + assert_equal(False, next(jq.parse(text_iter=iter(["false"])))) + assert_equal(True, next(jq.parse(text_iter=iter(["true"])))) + assert_equal(42, next(jq.parse(text_iter=iter(["42"])))) + assert_equal(-42, next(jq.parse(text_iter=iter(["-42"])))) + assert_equal("42", next(jq.parse(text_iter=iter(['"42"'])))) + assert_equal([42], next(jq.parse(text_iter=iter(["[42]"])))) + assert_equal(dict(a=42), next(jq.parse(text_iter=iter(['{"a": 42}'])))) + +@istest +def parse_multi_complete_text_iter_works(): + assert_equal(False, next(jq.parse(text_iter=iter(["fa", "lse"])))) + assert_equal(True, next(jq.parse(text_iter=iter(["tr", "ue"])))) + assert_equal(42, next(jq.parse(text_iter=iter(["4", "2"])))) + assert_equal(-42, next(jq.parse(text_iter=iter(["-4", "2"])))) + assert_equal("42", next(jq.parse(text_iter=iter(['"4', '2"'])))) + assert_equal([42], next(jq.parse(text_iter=iter(["[4", "2]"])))) + assert_equal(dict(a=42), next(jq.parse(text_iter=iter(['{"a":', ' 42}'])))) + +@istest +def parse_single_incomplete_text_iter_breaks(): + assert_raises(ValueError, next, jq.parse(text_iter=iter(["fals"]))) + assert_raises(ValueError, next, jq.parse(text_iter=iter(["tru"]))) + assert_raises(ValueError, next, jq.parse(text_iter=iter(["-"]))) + assert_raises(ValueError, next, jq.parse(text_iter=iter(['"42']))) + assert_raises(ValueError, next, jq.parse(text_iter=iter(["[42"]))) + assert_raises(ValueError, next, jq.parse(text_iter=iter(['{"a": 42']))) + +@istest +def parse_multi_incomplete_text_iter_breaks(): + assert_raises(ValueError, next, jq.parse(text_iter=iter(["fa", "ls"]))) + assert_raises(ValueError, next, jq.parse(text_iter=iter(["tr", "u"]))) + assert_raises(ValueError, next, jq.parse(text_iter=iter(['"4', '2']))) + assert_raises(ValueError, next, jq.parse(text_iter=iter(["[4", "2"]))) + assert_raises(ValueError, next, jq.parse(text_iter=iter(['{"a":', ' 42']))) @istest def program_preserves_null():