Skip to content

Commit

Permalink
Merge pull request Backblaze#503 from reef-technologies/fix_sync_regr…
Browse files Browse the repository at this point in the history
…ession

fix sync reuploading files over and over
  • Loading branch information
mjurbanski-reef authored Jun 19, 2024
2 parents 21f12c4 + 246d274 commit 248edf8
Show file tree
Hide file tree
Showing 8 changed files with 121 additions and 16 deletions.
15 changes: 11 additions & 4 deletions b2sdk/_internal/scan/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def all_files(self, reporter: ProgressReport | None,
policies_manager=DEFAULT_SCAN_MANAGER) -> Iterator[AbstractPath]:
"""
Return an iterator over all of the files in the folder, in
the order that B2 uses.
the order that B2 uses (lexicographic by object path).
It also performs filtering using policies manager.
Expand Down Expand Up @@ -138,11 +138,18 @@ def all_files(self, reporter: ProgressReport | None,
"""
Yield all files.
Yield a File object for each of the files anywhere under this folder, in the
order they would appear in B2, unless the path is excluded by policies manager.
:param reporter: a place to report errors
:param policies_manager: a policy manager object, default is DEFAULT_SCAN_MANAGER
:return: an iterator over all files in the folder in the order they would appear in B2
"""
root_path = Path(self.root)
yield from self._walk_relative_paths(root_path, Path(''), reporter, policies_manager)

local_paths = self._walk_relative_paths(root_path, Path(''), reporter, policies_manager)
# Crucial to return the "correct" order of the files
yield from sorted(local_paths, key=lambda lp: lp.relative_path)

def make_full_path(self, file_name):
"""
Expand Down Expand Up @@ -196,8 +203,8 @@ def _walk_relative_paths(
visited_symlinks: set[int] | None = None,
):
"""
Yield a File object for each of the files anywhere under this folder, in the
order they would appear in B2, unless the path is excluded by policies manager.
Yield a File object for each of the files anywhere under this folder,
unless the path is excluded by policies manager.
:param local_dir: the path to the local directory that we are currently inspecting
:param relative_dir_path: the path of this dir relative to the scan point, or Path('') if at scan point
Expand Down
2 changes: 2 additions & 0 deletions changelog.d/502.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix `LocalFolder` regression (introduced in 2.4.0) which caused `LocalFolder` to not list files by path lexicographical order.
This is also a fix for `synchronizer` re-uploading files on every run in some cases.
7 changes: 7 additions & 0 deletions test/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import http
import http.client
import os
import secrets
from test.integration import get_b2_auth_data
from test.integration.bucket_cleaner import BucketCleaner
from test.integration.helpers import (
Expand Down Expand Up @@ -93,3 +94,9 @@ def bucket(b2_api, bucket_name_prefix, bucket_cleaner):
)
yield bucket
bucket_cleaner.cleanup_bucket(bucket)


@pytest.fixture
def b2_subfolder(bucket, request):
subfolder_name = f"{request.node.name}_{secrets.token_urlsafe(4)}"
return f"b2://{bucket.name}/{subfolder_name}"
67 changes: 67 additions & 0 deletions test/integration/test_sync.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
######################################################################
#
# File: test/integration/test_sync.py
#
# Copyright 2024 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################
from __future__ import annotations

import io
import time

import pytest

from b2sdk.v2 import (
CompareVersionMode,
NewerFileSyncMode,
Synchronizer,
SyncReport,
parse_folder,
)


@pytest.fixture
def local_folder_with_files(tmp_path):
folder = tmp_path / "test"
folder.mkdir()
(folder / "a").mkdir()
(folder / "a" / "foo").write_bytes(b"foo")
# space in the name is important as it influences lexicographical sorting used by B2
(folder / "a b").mkdir()
(folder / "a b" / "bar").write_bytes(b"bar")
return folder


def test_sync_folder(b2_api, local_folder_with_files, b2_subfolder):
source_folder = parse_folder(str(local_folder_with_files), b2_api)
dest_folder = parse_folder(b2_subfolder, b2_api)

synchronizer = Synchronizer(
max_workers=10,
newer_file_mode=NewerFileSyncMode.REPLACE,
compare_version_mode=CompareVersionMode.MODTIME,
compare_threshold=10, # ms
)

def sync_and_report():
buf = io.StringIO()
reporter = SyncReport(buf, no_progress=True)
with reporter:
synchronizer.sync_folders(
source_folder=source_folder,
dest_folder=dest_folder,
now_millis=int(1000 * time.time()),
reporter=reporter,
)
return reporter

report = sync_and_report()
assert report.total_transfer_files == 2
assert report.total_transfer_bytes == 6

second_sync_report = sync_and_report()
assert second_sync_report.total_transfer_files == 0
assert second_sync_report.total_transfer_bytes == 0
4 changes: 2 additions & 2 deletions test/unit/fixtures/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self, test_files):
super().__init__('test-bucket', 'folder', mock.MagicMock())

def get_file_versions(self):
yield from iter(self.file_versions)
yield from sorted(self.file_versions, key=lambda x: x.file_name)

def _file_versions(self, name, mod_times, size=10):
"""
Expand Down Expand Up @@ -70,7 +70,7 @@ def __init__(self, test_files):
self.local_paths = [self._local_path(*test_file) for test_file in test_files]

def all_files(self, reporter, policies_manager=DEFAULT_SCAN_MANAGER):
for single_path in self.local_paths:
for single_path in sorted(self.local_paths, key=lambda x: x.relative_path):
if single_path.relative_path.endswith('/'):
if policies_manager.should_exclude_b2_directory(single_path.relative_path):
continue
Expand Down
22 changes: 22 additions & 0 deletions test/unit/sync/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -969,6 +969,28 @@ def check_path_and_get_size(self):
# Order of indices: call index, pick arguments, pick first argument, first element of the first argument.
assert isinstance(bucket.mock_calls[0][1][0][0], CopySource)

def test_sync_lexicographical_order(self, synchronizer_factory):
"""
Test sync is successful when dir tree order is different from absolute path lexicographical order
Regression test for #502
"""
synchronizer = synchronizer_factory()

files = [('a/foo', [100], 10), ('a b/bar', [100], 10)]
src = self.folder_factory('local', *files)
dst = self.folder_factory('b2')

self.assert_folder_sync_actions(
synchronizer, src, dst, [
'b2_upload(/dir/a b/bar, folder/a b/bar, 100)',
'b2_upload(/dir/a/foo, folder/a/foo, 100)'
]
)

dst_with_files = self.folder_factory('b2', *files)
self.assert_folder_sync_actions(synchronizer, src, dst_with_files, [])


class TstEncryptionSettingsProvider(AbstractSyncEncryptionSettingsProvider):
def __init__(self, source_encryption_setting, destination_encryption_setting):
Expand Down
10 changes: 5 additions & 5 deletions test/unit/v0/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ class TestFolder(TestSync):

NAMES = [
'.dot_file',
'hello.',
os.path.join('hello', 'a', '1'),
os.path.join('hello', 'a', '2'),
os.path.join('hello', 'b'),
'hello.',
'hello0',
os.path.join('inner', 'a.bin'),
os.path.join('inner', 'a.txt'),
Expand Down Expand Up @@ -106,10 +106,10 @@ def assert_filtered_files(self, scan_results, expected_scan_results):
def test_exclusions(self):
expected_list = [
'.dot_file',
'hello.',
'hello/a/1',
'hello/a/2',
'hello/b',
'hello.',
'hello0',
'inner/a.txt',
'inner/b.txt',
Expand All @@ -129,10 +129,10 @@ def test_exclude_all(self):
def test_exclusions_inclusions(self):
expected_list = [
'.dot_file',
'hello.',
'hello/a/1',
'hello/a/2',
'hello/b',
'hello.',
'hello0',
'inner/a.bin',
'inner/a.txt',
Expand All @@ -151,8 +151,8 @@ def test_exclusions_inclusions(self):
def test_exclude_matches_prefix(self):
expected_list = [
'.dot_file',
'hello/b',
'hello.',
'hello/b',
'hello0',
'inner/b.bin',
'inner/b.txt',
Expand Down Expand Up @@ -204,10 +204,10 @@ def test_exclude_directory_trailing_slash_does_not_match(self):
def test_exclusion_with_exact_match(self):
expected_list = [
'.dot_file',
'hello.',
'hello/a/1',
'hello/a/2',
'hello/b',
'hello.',
'inner/a.bin',
'inner/a.txt',
'inner/b.bin',
Expand Down
10 changes: 5 additions & 5 deletions test/unit/v1/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,10 @@ class TestFolder(TestSync):

NAMES = [
'.dot_file',
'hello.',
os.path.join('hello', 'a', '1'),
os.path.join('hello', 'a', '2'),
os.path.join('hello', 'b'),
'hello.',
'hello0',
os.path.join('inner', 'a.bin'),
os.path.join('inner', 'a.txt'),
Expand Down Expand Up @@ -109,10 +109,10 @@ def assert_filtered_files(self, scan_results, expected_scan_results):
def test_exclusions(self):
expected_list = [
'.dot_file',
'hello.',
'hello/a/1',
'hello/a/2',
'hello/b',
'hello.',
'hello0',
'inner/a.txt',
'inner/b.txt',
Expand All @@ -132,10 +132,10 @@ def test_exclude_all(self):
def test_exclusions_inclusions(self):
expected_list = [
'.dot_file',
'hello.',
'hello/a/1',
'hello/a/2',
'hello/b',
'hello.',
'hello0',
'inner/a.bin',
'inner/a.txt',
Expand All @@ -154,8 +154,8 @@ def test_exclusions_inclusions(self):
def test_exclude_matches_prefix(self):
expected_list = [
'.dot_file',
'hello/b',
'hello.',
'hello/b',
'hello0',
'inner/b.bin',
'inner/b.txt',
Expand Down Expand Up @@ -207,10 +207,10 @@ def test_exclude_directory_trailing_slash_does_not_match(self):
def test_exclusion_with_exact_match(self):
expected_list = [
'.dot_file',
'hello.',
'hello/a/1',
'hello/a/2',
'hello/b',
'hello.',
'inner/a.bin',
'inner/a.txt',
'inner/b.bin',
Expand Down

0 comments on commit 248edf8

Please sign in to comment.