From 8d339fbab5ffc0dd583ddadf4feaf6d08c3343f6 Mon Sep 17 00:00:00 2001 From: activesoull Date: Wed, 11 Sep 2024 11:15:46 +0400 Subject: [PATCH 1/5] changes to work with av 13.0.0 --- deeplake/core/compression.py | 57 +++++++++++++------------------ deeplake/requirements/common.txt | 2 +- deeplake/requirements/plugins.txt | 2 +- 3 files changed, 25 insertions(+), 36 deletions(-) diff --git a/deeplake/core/compression.py b/deeplake/core/compression.py index c793a312d9..024414074b 100644 --- a/deeplake/core/compression.py +++ b/deeplake/core/compression.py @@ -961,14 +961,9 @@ def _decompress_video( container, vstream = _open_video(file) nframes, height, width, _ = _read_metadata_from_vstream(container, vstream)[0] - if start is None: - start = 0 - - if stop is None: - stop = nframes - - if step is None: - step = 1 + start = start or 0 + stop = stop or nframes + step = step or 1 nframes = math.ceil((stop - start) / step) @@ -977,37 +972,35 @@ def _decompress_video( seek_target = _frame_to_stamp(start, vstream) step_time = _frame_to_stamp(step, vstream) - gop_size = ( - vstream.codec_context.gop_size - ) # gop size is distance (in frames) between 2 I-frames - if step > gop_size: - step_seeking = True - else: - step_seeking = False + gop_size = 1 + if vstream.codec_context.is_encoder: + gop_size = vstream.codec_context.gop_size + + step_seeking = step > gop_size seekable = True try: container.seek(seek_target, stream=vstream) except av.error.FFmpegError: seekable = False - container, vstream = _open_video(file) # try again but this time don't seek + container, vstream = _open_video(file) # Retry without seeking warning( - "Cannot seek. Possibly a corrupted video file. Retrying with seeking disabled..." + "Cannot seek. Possibly a corrupted video file. Retrying without seeking." ) i = 0 for packet in container.demux(video=0): for frame in packet.decode(): if packet.pts and packet.pts >= seek_target: - arr = frame.to_ndarray(format="rgb24") - video[i] = arr + if (i % step) == 0: + arr = frame.to_ndarray(format="rgb24") + video[i // step] = arr i += 1 seek_target += step_time if step_seeking and seekable: container.seek(seek_target, stream=vstream) - - if i == nframes: - break + if i >= stop: + break if reverse: return video[::-1] @@ -1031,25 +1024,20 @@ def _read_timestamps( step_time = _frame_to_stamp(step, vstream) stamps = [] - if vstream.duration is None: - time_base = 1 / av.time_base # type: ignore - else: - time_base = vstream.time_base.numerator / vstream.time_base.denominator + time_base = vstream.time_base.numerator / vstream.time_base.denominator - gop_size = ( - vstream.codec_context.gop_size - ) # gop size is distance (in frames) between 2 I-frames - if step > gop_size: - step_seeking = True - else: - step_seeking = False + gop_size = 1 + if vstream.codec_context.is_encoder: + gop_size = vstream.codec_context.gop_size + + step_seeking = step > gop_size seekable = True try: container.seek(seek_target, stream=vstream) except av.error.FFmpegError: seekable = False - container, vstream = _open_video(file) # try again but this time don't seek + container, vstream = _open_video(file) # Retry without seeking warning( "Cannot seek. Possibly a corrupted video file. Retrying with seeking disabled..." ) @@ -1070,6 +1058,7 @@ def _read_timestamps( # need to sort because when demuxing, frames are in order of dts (decoder timestamp) # we need it in order of pts (presentation timestamp) stamps.sort() + stamps_arr = np.zeros((nframes,), dtype=np.float32) stamps_arr[: len(stamps)] = stamps diff --git a/deeplake/requirements/common.txt b/deeplake/requirements/common.txt index ceb4fd23b1..0261388e22 100644 --- a/deeplake/requirements/common.txt +++ b/deeplake/requirements/common.txt @@ -11,7 +11,7 @@ pathos humbug>=0.3.1 tqdm lz4 -av>=8.1.0,<=12.3.0; python_version >= '3.7' or sys_platform != 'win32' +av>=8.1.0; python_version >= '3.7' or sys_platform != 'win32' pydicom IPython flask diff --git a/deeplake/requirements/plugins.txt b/deeplake/requirements/plugins.txt index ddf7cb5e51..68757a698a 100644 --- a/deeplake/requirements/plugins.txt +++ b/deeplake/requirements/plugins.txt @@ -10,4 +10,4 @@ mmdet==2.28.1; platform_system == "Linux" and python_version >= "3.7" mmsegmentation==0.30.0; platform_system == "Linux" and python_version >= "3.7" mmengine pandas -av==12.3.0 \ No newline at end of file +av \ No newline at end of file From 19acc9b773196d7b9361f6cc5e8aeeb115fd8566 Mon Sep 17 00:00:00 2001 From: activesoull Date: Wed, 11 Sep 2024 12:38:13 +0400 Subject: [PATCH 2/5] fix --- deeplake/core/compression.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/deeplake/core/compression.py b/deeplake/core/compression.py index 024414074b..a441f5868b 100644 --- a/deeplake/core/compression.py +++ b/deeplake/core/compression.py @@ -992,15 +992,14 @@ def _decompress_video( for packet in container.demux(video=0): for frame in packet.decode(): if packet.pts and packet.pts >= seek_target: - if (i % step) == 0: - arr = frame.to_ndarray(format="rgb24") - video[i // step] = arr + arr = frame.to_ndarray(format="rgb24") + video[i] = arr i += 1 seek_target += step_time if step_seeking and seekable: container.seek(seek_target, stream=vstream) - if i >= stop: - break + if i == nframes: + break if reverse: return video[::-1] From a6592b8b77ed2e2b6eaac9443e8b25d968f01f43 Mon Sep 17 00:00:00 2001 From: activesoull Date: Wed, 11 Sep 2024 14:26:42 +0400 Subject: [PATCH 3/5] fix flaky --- deeplake/tests/path_fixtures.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deeplake/tests/path_fixtures.py b/deeplake/tests/path_fixtures.py index df44af4ea1..95c3ae38db 100644 --- a/deeplake/tests/path_fixtures.py +++ b/deeplake/tests/path_fixtures.py @@ -536,9 +536,11 @@ def corpus_query_pair_path(hub_cloud_dev_token): corpus = f"hub://{HUB_CLOUD_DEV_USERNAME}/deepmemory_test_corpus_managed_2" query = corpus + "_eval_queries_2" delete_if_exists(query, hub_cloud_dev_token) + delete_if_exists(corpus, hub_cloud_dev_token) yield corpus, query delete_if_exists(query, hub_cloud_dev_token) + delete_if_exists(corpus, hub_cloud_dev_token) @pytest.fixture From 0bce30f87f1b5d35649f516cb1705734d0a9b4c1 Mon Sep 17 00:00:00 2001 From: activesoull Date: Wed, 11 Sep 2024 17:03:37 +0400 Subject: [PATCH 4/5] test fix --- deeplake/core/vectorstore/deep_memory/test_deepmemory.py | 9 +++++---- deeplake/tests/path_fixtures.py | 2 -- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/deeplake/core/vectorstore/deep_memory/test_deepmemory.py b/deeplake/core/vectorstore/deep_memory/test_deepmemory.py index 2565ff8b9a..b7a9a5674e 100644 --- a/deeplake/core/vectorstore/deep_memory/test_deepmemory.py +++ b/deeplake/core/vectorstore/deep_memory/test_deepmemory.py @@ -461,7 +461,7 @@ def test_deepmemory_evaluate_without_embedding_function( questions_embeddings_and_relevances, hub_cloud_dev_token, ): - corpus, _, _, query_path = corpus_query_relevances_copy + corpus, _, _, _ = corpus_query_relevances_copy ( questions_embeddings, question_relevances, @@ -618,7 +618,7 @@ def test_unsupported_deepmemory_users(local_ds): def test_deepmemory_list_jobs_with_no_jobs( corpus_query_relevances_copy, hub_cloud_dev_token ): - corpus, queries, relevances, _ = corpus_query_relevances_copy + corpus, _, _, _ = corpus_query_relevances_copy db = VectorStore( path=corpus, @@ -749,7 +749,7 @@ def test_deepmemory_train_with_embedding_function_specified_in_constructor_shoul embedding_function=embedding_fn, ) - job_id = db.deep_memory.train( + _ = db.deep_memory.train( queries=queries, relevance=relevances, ) @@ -776,13 +776,14 @@ def test_deepmemory_evaluate_with_embedding_function_specified_in_constructor_sh runtime={"tensor_db": True}, token=hub_cloud_dev_token, embedding_function=embedding_fn, + overwrite=True, ) queries = queries_vs.dataset[:10].text.data()["value"] relevance = queries_vs.dataset[:10].metadata.data()["value"] relevance = [rel["relevance"] for rel in relevance] - recall = db.deep_memory.evaluate( + _ = db.deep_memory.evaluate( queries=queries, relevance=relevance, ) diff --git a/deeplake/tests/path_fixtures.py b/deeplake/tests/path_fixtures.py index 95c3ae38db..df44af4ea1 100644 --- a/deeplake/tests/path_fixtures.py +++ b/deeplake/tests/path_fixtures.py @@ -536,11 +536,9 @@ def corpus_query_pair_path(hub_cloud_dev_token): corpus = f"hub://{HUB_CLOUD_DEV_USERNAME}/deepmemory_test_corpus_managed_2" query = corpus + "_eval_queries_2" delete_if_exists(query, hub_cloud_dev_token) - delete_if_exists(corpus, hub_cloud_dev_token) yield corpus, query delete_if_exists(query, hub_cloud_dev_token) - delete_if_exists(corpus, hub_cloud_dev_token) @pytest.fixture From e622d9686262c1850aaaddfcc5578b6a374ed584 Mon Sep 17 00:00:00 2001 From: activesoull Date: Wed, 11 Sep 2024 18:36:45 +0400 Subject: [PATCH 5/5] make read_only --- deeplake/core/vectorstore/deep_memory/test_deepmemory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deeplake/core/vectorstore/deep_memory/test_deepmemory.py b/deeplake/core/vectorstore/deep_memory/test_deepmemory.py index b7a9a5674e..bb189eb271 100644 --- a/deeplake/core/vectorstore/deep_memory/test_deepmemory.py +++ b/deeplake/core/vectorstore/deep_memory/test_deepmemory.py @@ -776,7 +776,7 @@ def test_deepmemory_evaluate_with_embedding_function_specified_in_constructor_sh runtime={"tensor_db": True}, token=hub_cloud_dev_token, embedding_function=embedding_fn, - overwrite=True, + read_only=True, ) queries = queries_vs.dataset[:10].text.data()["value"]