Skip to content

Commit

Permalink
Add parse_json() and parse_json_file()
Browse files Browse the repository at this point in the history
Add an implementation of parse_json() function accepting either text or
a text iterator and producing an iterable returning parsed values.

Add a naive implementation of parse_json_file() function accepting a
text file object and producing an iterable returning parsed values.

This allows parsing JSON and JSON streams without passing them through a
program.
  • Loading branch information
spbnick committed Feb 10, 2021
1 parent 0c95036 commit 6b8bf75
Show file tree
Hide file tree
Showing 2 changed files with 171 additions and 0 deletions.
105 changes: 105 additions & 0 deletions jq.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,71 @@ cdef object _jv_to_python(jv value):
return python_value


class JSONParseError(Exception):
"""A failure to parse JSON"""


cdef class _JSONParser(object):
cdef jv_parser* _parser
cdef object _text_iter
cdef object _bytes

def __dealloc__(self):
jv_parser_free(self._parser)

def __cinit__(self, text_iter):
self._parser = jv_parser_new(0)
self._text_iter = text_iter
self._bytes = None

def __iter__(self):
return self

def __next__(self):
"""
Retrieve next parsed JSON value.
Returns:
The next parsed JSON value.
Raises:
JSONParseError: failed parsing the input JSON.
StopIteration: no more values available.
"""
cdef jv value
while True:
# If we have no bytes to parse
if self._bytes is None:
# Ready some more
self._ready_next_bytes()
# Parse whatever we've readied, if any
value = jv_parser_next(self._parser)
if jv_is_valid(value):
return _jv_to_python(value)
elif jv_invalid_has_msg(jv_copy(value)):
error_message = jv_invalid_get_msg(value)
message = jv_string_value(error_message).decode("utf8")
jv_free(error_message)
raise JSONParseError(message)
else:
jv_free(value)
# If we didn't ready any bytes
if self._bytes is None:
raise StopIteration
self._bytes = None

cdef bint _ready_next_bytes(self) except 1:
cdef char* cbytes
try:
self._bytes = next(self._text_iter).encode("utf8")
cbytes = PyBytes_AsString(self._bytes)
jv_parser_set_buf(self._parser, cbytes, len(cbytes), 1)
except StopIteration:
self._bytes = None
jv_parser_set_buf(self._parser, "", 0, 0)
return 0


def compile(object program, args=None):
cdef object program_bytes = program.encode("utf8")
return _Program(program_bytes, args=args)
Expand Down Expand Up @@ -356,6 +421,46 @@ def text(program, value=_NO_VALUE, text=_NO_VALUE):
return compile(program).input(value, text=text).text()


def parse_json(text=_NO_VALUE, text_iter=_NO_VALUE):
"""
Parse a JSON stream.
Either "text" or "text_iter" must be specified.
Args:
text: A string containing the JSON stream to parse.
text_iter: An iterator returning strings - pieces of the JSON stream
to parse.
Returns:
An iterator returning parsed values.
Raises:
JSONParseError: failed parsing the input JSON stream.
"""
if (text is _NO_VALUE) == (text_iter is _NO_VALUE):
raise ValueError("Either the text or text_iter argument should be set")
return _JSONParser(text_iter
if text_iter is not _NO_VALUE
else _iter((text,)))


def parse_json_file(fp):
"""
Parse a JSON stream file.
Args:
fp: The file-like object to read the JSON stream from.
Must be in text mode.
Returns:
An iterator returning parsed values.
Raises:
JSONParseError: failed parsing the JSON stream.
"""
return parse_json(text=fp.read())


# Support the 0.1.x API for backwards compatibility
def jq(object program):
return compile(program)
66 changes: 66 additions & 0 deletions tests/jq_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from nose.tools import istest, assert_equal, assert_is, assert_raises

import io
import jq


Expand Down Expand Up @@ -204,6 +205,71 @@ def program_string_can_be_retrieved_from_program():
program = jq.compile(".")
assert_equal(".", program.program_string)

@istest
def parse_json_both_text_and_text_iter_accepted():
assert_equal(True, next(jq.parse_json(text="true")))
assert_equal(True, next(jq.parse_json(text_iter=iter(["true"]))))

@istest
def parse_json_file_works():
fp = io.StringIO('{"abc": "def"}')
assert_equal([dict(abc="def")], list(jq.parse_json_file(fp)))

@istest
def parse_json_empty_text_iter_stops():
assert_raises(StopIteration, next, jq.parse_json(text_iter=iter([])))
assert_raises(StopIteration, next, jq.parse_json(text_iter=iter([""])))
assert_raises(StopIteration, next, jq.parse_json(text_iter=iter(["", ""])))

@istest
def parse_json_single_complete_text_iter_works():
assert_equal(False, next(jq.parse_json(text_iter=iter(["false"]))))
assert_equal(True, next(jq.parse_json(text_iter=iter(["true"]))))
assert_equal(42, next(jq.parse_json(text_iter=iter(["42"]))))
assert_equal(-42, next(jq.parse_json(text_iter=iter(["-42"]))))
assert_equal("42", next(jq.parse_json(text_iter=iter(['"42"']))))
assert_equal([42], next(jq.parse_json(text_iter=iter(["[42]"]))))
assert_equal(dict(a=42),
next(jq.parse_json(text_iter=iter(['{"a": 42}']))))

@istest
def parse_json_multi_complete_text_iter_works():
assert_equal(False, next(jq.parse_json(text_iter=iter(["fa", "lse"]))))
assert_equal(True, next(jq.parse_json(text_iter=iter(["tr", "ue"]))))
assert_equal(42, next(jq.parse_json(text_iter=iter(["4", "2"]))))
assert_equal(-42, next(jq.parse_json(text_iter=iter(["-4", "2"]))))
assert_equal("42", next(jq.parse_json(text_iter=iter(['"4', '2"']))))
assert_equal([42], next(jq.parse_json(text_iter=iter(["[4", "2]"]))))
assert_equal(dict(a=42),
next(jq.parse_json(text_iter=iter(['{"a":', ' 42}']))))

@istest
def parse_json_single_incomplete_text_iter_breaks():
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(["fals"])))
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(["tru"])))
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(["-"])))
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(['"42'])))
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(["[42"])))
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(['{"a": 42'])))

@istest
def parse_json_multi_incomplete_text_iter_breaks():
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(["fa", "ls"])))
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(["tr", "u"])))
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(['"4', '2'])))
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(["[4", "2"])))
assert_raises(jq.JSONParseError, next,
jq.parse_json(text_iter=iter(['{"a":', ' 42'])))

@istest
class TestJvToPython(object):
Expand Down

0 comments on commit 6b8bf75

Please sign in to comment.