Skip to content

Commit

Permalink
Merge pull request #65 from trailofbits/pydiff
Browse files Browse the repository at this point in the history
Adds a new edit digest output, as well as the ability to diff in-memory Python objects
  • Loading branch information
ESultanik authored Jan 9, 2023
2 parents f9849dd + 8ebb6d3 commit 4596c52
Show file tree
Hide file tree
Showing 13 changed files with 618 additions and 66 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ Use `--condensed` or `-j` to apply both of these options:

The `--only-edits` or `-e` option will print out a list of edits rather than applying them to the input file in place.

The `--edit-digest` or `-d` option is like `--only-edits` but prints a more concise context for each edit that is more
human-readable.

### Matching Options
By default, Graphtage tries to match all possible pairs of elements in a dictionary.

Expand Down Expand Up @@ -139,6 +142,9 @@ itself. See [our documentation](https://trailofbits.github.io/graphtage/latest/h

## Using Graphtage as a Library

Graphtage has a complete API for programmatically operating its diffing capabilities.
When using Graphtage as a library, it is also capable of diffing in-memory Python objects.
This can be useful for debugging Python code, for example, to determine a differential between two objects.
See [our documentation](https://trailofbits.github.io/graphtage/latest/library.html) for more information.

## Extending Graphtage
Expand All @@ -155,4 +161,4 @@ This research was developed by [Trail of Bits](https://www.trailofbits.com/) wit
Advanced Research Projects Agency (DARPA) under the SafeDocs program as a subcontractor to [Galois](https://galois.com).
It is licensed under the [GNU Lesser General Public License v3.0](LICENSE).
[Contact us](mailto:[email protected]) if you're looking for an exception to the terms.
© 2020, Trail of Bits.
© 2020–2023, Trail of Bits.
1 change: 1 addition & 0 deletions docs/_templates/layout.html
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
{% endfor %}
{% else %}
<dd><a href="/graphtage/latest">latest</a></dd>
<dd><a href="/graphtage/v0.2.7">0.2.7</a></dd>
<dd><a href="/graphtage/v0.2.6">0.2.6</a></dd>
<dd><a href="/graphtage/v0.2.5">0.2.5</a></dd>
<dd><a href="/graphtage/v0.2.4">0.2.4</a></dd>
Expand Down
22 changes: 22 additions & 0 deletions docs/library.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,25 @@ just as easily output the diff in another format, like YAML::
- 3
- 4

Diffing In-Memory Python Objects
--------------------------------

When used as a library, Graphtage has the ability to diff in-memory Python objects. This can be useful when debugging,
for example, to quickly determine the difference between two Python objects that cause a differential.::

>>> from graphtage.pydiff import print_diff
>>> with printer.DEFAULT_PRINTER as p:
... obj1 = [1, 2, {3: "three"}, 4]
... obj2 = [1, 2, {3: 3}, "four"]
... print_diff(obj1, obj2, printer=p)
[1,2,{3: "three" -> 3},++"four"++~~4~~]

Python object diffing also works with custom classes::

>>> class Foo:
... def __init__(self, bar, baz):
... self.bar = bar
... self.baz = baz
>>> with printer.DEFAULT_PRINTER as p:
... print_diff(Foo("bar", "baz"), Foo("bar", "bak"), printer=p)
Foo(bar="bar", baz="ba++k++~~z~~")
2 changes: 1 addition & 1 deletion graphtage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .edits import *

from .version import __version__, VERSION_STRING
from . import bounds, edits, expressions, fibonacci, formatter, levenshtein, matching, printer, \
from . import bounds, edits, expressions, fibonacci, formatter, levenshtein, matching, printer, pydiff, \
search, sequences, tree, utils
from . import csv, json, xml, yaml, plist

Expand Down
34 changes: 31 additions & 3 deletions graphtage/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from abc import ABCMeta, abstractmethod
from typing import Optional

from colorama.ansi import Fore

from .edits import Edit
from . import expressions
from . import graphtage
Expand Down Expand Up @@ -113,9 +115,19 @@ def main(argv=None) -> int:
default=None,
help=f'equivalent to `--to-mime {mime}`'
)
parser.add_argument('--match-if', '-m', type=str, default=None, help='only attempt to match two dictionaries if the provided expression is satisfied. For example, `--match-if "from[\'foo\'] == to[\'bar\']"` will mean that only a dictionary which has a "foo" key that has the same value as the other dictionary\'s "bar" key will be attempted to be paired')
parser.add_argument('--match-unless', '-u', type=str, default=None, help='similar to `--match-if`, but only attempt a match if the provided expression evaluates to `False`')
parser.add_argument('--only-edits', '-e', action='store_true', help='only print the edits rather than a full diff')
parser.add_argument('--match-if', '-m', type=str, default=None,
help='only attempt to match two dictionaries if the provided expression is satisfied. For '
'example, `--match-if "from[\'foo\'] == to[\'bar\']"` will mean that only a dictionary '
'which has a "foo" key that has the same value as the other dictionary\'s "bar" key will '
'be attempted to be paired')
parser.add_argument('--match-unless', '-u', type=str, default=None,
help='similar to `--match-if`, but only attempt a match if the provided expression evaluates '
'to `False`')
edit_output = parser.add_mutually_exclusive_group()
edit_output.add_argument('--only-edits', '-e', action='store_true',
help='only print the edits rather than a full diff')
edit_output.add_argument('--edit-digest', '-d', action='store_true',
help='similar to `--only-edits`, but prints a more concise context for edits')
formatting = parser.add_argument_group(title='output formatting')
formatting.add_argument('--format', '-f', choices=graphtage.FILETYPES_BY_TYPENAME.keys(), default=None,
help='output format for the diff (default is to use the format of FROM_PATH)')
Expand Down Expand Up @@ -330,6 +342,22 @@ def printer_type(*pos_args, **kwargs):
printer.write(str(edit))
printer.newline()
had_edits = had_edits or edit.has_non_zero_cost()
elif args.edit_digest:
if args.format is not None:
formatter = graphtage.FILETYPES_BY_TYPENAME[args.format].get_default_formatter()
else:
formatter = from_format.get_default_formatter()

for ancestors, edit in from_tree.get_all_edit_contexts(to_tree):
for i, node in enumerate(ancestors):
if node.parent is not None:
node.parent.print_parent_context(printer, for_child=node)
if i == len(ancestors) - 1:
with printer.color(Fore.BLUE):
printer.write(" -> ")
formatter.print(printer, edit)
printer.newline()
had_edits = had_edits or edit.has_non_zero_cost()
else:
diff = from_tree.diff(to_tree)
if args.format is not None:
Expand Down
2 changes: 1 addition & 1 deletion graphtage/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ def __new__(cls, *args, **kwargs) -> 'Formatter[T]':
:attr:`parent<Formatter.parent>` to this new formatter.
"""
ret: Formatter[T] = super().__new__(cls, *args, **kwargs)
ret: Formatter[T] = super().__new__(cls)
setattr(ret, 'sub_formatters', [])
for sub_formatter in ret.sub_format_types:
ret.sub_formatters.append(sub_formatter())
Expand Down
60 changes: 49 additions & 11 deletions graphtage/graphtage.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,23 @@ def __init__(self, key: LeafNode, value: TreeNode, allow_key_edits: bool = True)
def to_obj(self):
return self.key, self.value

def print_parent_context(self, printer: Printer, for_child: TreeNode):
if for_child.parent is not self:
# this is not one of our children!
return
elif for_child is self.key:
# we only print the context for the value
return
with printer.color(Fore.BLUE):
printer.write("[")
self.key.print(printer)
with printer.color(Fore.BLUE):
printer.write("]")

def editable_dict(self) -> Dict[str, Any]:
ret = dict(self.__dict__)
ret['key'] = self.key.make_edited()
ret['value'] = self.value.make_edited()
ret["key"] = self.key.make_edited()
ret["value"] = self.value.make_edited()
return ret

def children(self) -> Tuple[LeafNode, TreeNode]:
Expand Down Expand Up @@ -378,11 +391,33 @@ def __repr__(self):
class MappingNode(ContainerNode, ABC):
"""An abstract base class for nodes that represent mappings."""

@classmethod
def make_key_value_pair_node(cls, key: LeafNode, value: TreeNode, allow_key_edits: bool = True) -> KeyValuePairNode:
return KeyValuePairNode(key=key, value=value, allow_key_edits=allow_key_edits)

@classmethod
@abstractmethod
def from_dict(cls: Type[T], source_dict: Dict[LeafNode, TreeNode]) -> T:
"""Constructs a :class:`MappingNode` from a mapping of :class:`LeafNode` to :class:`TreeNode`.
Args:
source_dict: The source mapping.
Returns:
DictNode: The resulting :class:`MappingNode`.
"""
raise NotImplementedError()

def to_obj(self) -> Dict[Any, Any]:
return {
k.to_obj(): v.to_obj() for k, v in self.items()
}

def print_parent_context(self, printer: Printer, for_child: "TreeNode"):
# this is handled by KeyValuePairNode
pass

def items(self) -> Iterator[Tuple[TreeNode, TreeNode]]:
"""Iterates over the key/value pairs in this mapping, similar to :meth:`dict.items`.
Expand Down Expand Up @@ -458,8 +493,8 @@ class DictNode(MappingNode, MultiSetNode[KeyValuePairNode]):
"""

@staticmethod
def from_dict(source_dict: Dict[LeafNode, TreeNode]) -> 'DictNode':
@classmethod
def from_dict(cls: Type[T], source_dict: Dict[LeafNode, TreeNode]) -> T:
"""Constructs a :class:`DictNode` from a mapping of :class:`LeafNode` to :class:`TreeNode`.
Args:
Expand All @@ -469,8 +504,8 @@ def from_dict(source_dict: Dict[LeafNode, TreeNode]) -> 'DictNode':
DictNode: The resulting :class:`DictNode`.
"""
return DictNode(
sorted(KeyValuePairNode(key, value, allow_key_edits=True) for key, value in source_dict.items())
return cls(
sorted(cls.make_key_value_pair_node(key, value, allow_key_edits=True) for key, value in source_dict.items())
)

def edits(self, node: TreeNode) -> Edit:
Expand Down Expand Up @@ -520,8 +555,8 @@ def container_type(self) -> Type[Dict[LeafNode, KeyValuePairNode]]:
"""
return dict

@staticmethod
def from_dict(source_dict: Dict[LeafNode, TreeNode]) -> 'FixedKeyDictNode':
@classmethod
def from_dict(cls: Type[T], source_dict: Dict[LeafNode, TreeNode]) -> T:
"""Constructs a :class:`FixedKeyDictNode` from a mapping of :class:`LeafNode` to :class:`TreeNode`.
Args:
Expand All @@ -535,9 +570,12 @@ def from_dict(source_dict: Dict[LeafNode, TreeNode]) -> 'FixedKeyDictNode':
FixedKeyDictNode: The resulting :class:`FixedKeyDictNode`
"""
return FixedKeyDictNode({
return cls({
kvp.key: kvp
for kvp in (KeyValuePairNode(key, value, allow_key_edits=False) for key, value in source_dict.items())
for kvp in (
cls.make_key_value_pair_node(key, value, allow_key_edits=False)
for key, value in source_dict.items()
)
})

def __getitem__(self, item: LeafNode):
Expand Down Expand Up @@ -579,7 +617,7 @@ def items(self) -> Iterator[Tuple[LeafNode, TreeNode]]:

def editable_dict(self) -> Dict[str, Any]:
ret = dict(self.__dict__)
ret['_children'] = {e.key: e for e in (kvp.make_edited() for kvp in self)}
ret["_children"] = {e.key: e for e in (kvp.make_edited() for kvp in self)}
return ret

def __hash__(self):
Expand Down
Loading

0 comments on commit 4596c52

Please sign in to comment.