diff --git a/bdbag/__init__.py b/bdbag/__init__.py index e096df6..f20a6a5 100644 --- a/bdbag/__init__.py +++ b/bdbag/__init__.py @@ -28,7 +28,7 @@ logger = logging.getLogger(__name__) -__version__ = "1.7.2" +__version__ = "1.7.3" __bagit_version__ = "1.8.1" __bagit_profile_version__ = "1.3.1" @@ -146,11 +146,11 @@ def parse_content_disposition(value): # pragma: no cover # Per the bagit spec, we just want to replace (%,\r,\n,\t, ) for storage in fetch.txt, but this is also applicable # to URI/IRI storage in ro-metadata as well -def escape_uri(uri, encode_whitespace=True): +def escape_uri(uri, encode_whitespace=True, encode_other=False): if not uri: return uri - - uri = uri.replace("%", "%25").replace("\n", "%0A").replace("\r", "%0D") + if encode_other: + uri = uri.replace("%", "%25").replace("\n", "%0A").replace("\r", "%0D") if encode_whitespace: uri = uri.replace(" ", "%20").replace("\t", "%09") diff --git a/bdbag/bdbag_ro.py b/bdbag/bdbag_ro.py index 4665f5b..b3af2b7 100644 --- a/bdbag/bdbag_ro.py +++ b/bdbag/bdbag_ro.py @@ -146,7 +146,7 @@ def add_file_metadata(manifest, retrieved_from = None if local_path: - uri = escape_uri(ensure_payload_path_prefix(local_path), encode_whitespace=False) + uri = escape_uri(ensure_payload_path_prefix(local_path), encode_whitespace=False, encode_other=True) if source_url: retrieved_from = dict(retrievedFrom=source_url) diff --git a/bdbag/bdbagit.py b/bdbag/bdbagit.py index 476ad01..48cfb36 100644 --- a/bdbag/bdbagit.py +++ b/bdbag/bdbagit.py @@ -285,7 +285,7 @@ def _make_fetch_file(path, remote_entries): fetch_file.write("%s\t%s\t%s\n" % (escape_uri(remote_entries[filename]['url']), remote_entries[filename]['length'], - escape_uri(_denormalize_filename(filename), encode_whitespace=False))) + escape_uri(_denormalize_filename(filename), encode_whitespace=False, encode_other=True))) def _find_tag_files(bag_dir): diff --git a/test/test-data/test-config/test-fetch-manifest-encoding.json b/test/test-data/test-config/test-fetch-manifest-encoding.json index a327600..b37d523 100644 --- a/test/test-data/test-config/test-fetch-manifest-encoding.json +++ b/test/test-data/test-config/test-fetch-manifest-encoding.json @@ -1,6 +1,6 @@ [ { - "url":"https://raw.githubusercontent.com/fair-research/bdbag/master/test/test-data/test-http-encoded/test fetch%http®.txt", + "url":"https://raw.githubusercontent.com/fair-research/bdbag/master/test/test-data/test-http-encoded/test fetch%25http®.txt", "length":201, "filename":"test fetch http®.txt", "md5":"f3ad851f4213d41ce9690542010bffa0", @@ -9,7 +9,7 @@ "sha512":"1bdbb4fc33dbea55ee0e5473062cbd003221c571c6110e2b3d2ebe5288a95a74ccd60fbbbcbd42fcfa4cd320b980ca27d537e6d29e60f73822e301a7ba6c1bfa" }, { - "url":"https://raw.githubusercontent.com/fair-research/bdbag/master/test/test-data/test-http-encoded/test fetch%http®.txt", + "url":"https://raw.githubusercontent.com/fair-research/bdbag/master/test/test-data/test-http-encoded/test\tfetch%25http®.txt", "length":201, "filename":"test\nfetch\nhttp®.txt", "md5":"f3ad851f4213d41ce9690542010bffa0", @@ -18,7 +18,7 @@ "sha512":"1bdbb4fc33dbea55ee0e5473062cbd003221c571c6110e2b3d2ebe5288a95a74ccd60fbbbcbd42fcfa4cd320b980ca27d537e6d29e60f73822e301a7ba6c1bfa" }, { - "url":"https://raw.githubusercontent.com/fair-research/bdbag/master/test/test-data/test-http-encoded/test fetch%http®.txt", + "url":"https://raw.githubusercontent.com/fair-research/bdbag/master/test/test-data/test-http-encoded/test%20fetch%25http®.txt", "length":201, "filename":"test%fetch http®\r\n.txt", "md5":"f3ad851f4213d41ce9690542010bffa0", diff --git a/test/test_remote.py b/test/test_remote.py index 8c43017..06660aa 100644 --- a/test/test_remote.py +++ b/test/test_remote.py @@ -131,7 +131,7 @@ def test_bag_with_unencoded_utf8_remote_file_manifest(self): fetch_txt = ff.read() self.assertIn( u'https://raw.githubusercontent.com/fair-research/bdbag/master/test/test-data/test-http-encoded/' - u'test%20fetch%25http®.txt\t201\tdata/test%0Afetch%0Ahttp®.txt', fetch_txt) + u'test%09fetch%25http®.txt\t201\tdata/test%0Afetch%0Ahttp®.txt', fetch_txt) self.assertIn( u'https://raw.githubusercontent.com/fair-research/bdbag/master/test/test-data/test-http-encoded/' u'test%20fetch%25http®.txt\t201\tdata/test fetch http®.txt', fetch_txt)