diff --git a/deeplake/core/compression.py b/deeplake/core/compression.py index c793a312d9..a441f5868b 100644 --- a/deeplake/core/compression.py +++ b/deeplake/core/compression.py @@ -961,14 +961,9 @@ def _decompress_video( container, vstream = _open_video(file) nframes, height, width, _ = _read_metadata_from_vstream(container, vstream)[0] - if start is None: - start = 0 - - if stop is None: - stop = nframes - - if step is None: - step = 1 + start = start or 0 + stop = stop or nframes + step = step or 1 nframes = math.ceil((stop - start) / step) @@ -977,22 +972,20 @@ def _decompress_video( seek_target = _frame_to_stamp(start, vstream) step_time = _frame_to_stamp(step, vstream) - gop_size = ( - vstream.codec_context.gop_size - ) # gop size is distance (in frames) between 2 I-frames - if step > gop_size: - step_seeking = True - else: - step_seeking = False + gop_size = 1 + if vstream.codec_context.is_encoder: + gop_size = vstream.codec_context.gop_size + + step_seeking = step > gop_size seekable = True try: container.seek(seek_target, stream=vstream) except av.error.FFmpegError: seekable = False - container, vstream = _open_video(file) # try again but this time don't seek + container, vstream = _open_video(file) # Retry without seeking warning( - "Cannot seek. Possibly a corrupted video file. Retrying with seeking disabled..." + "Cannot seek. Possibly a corrupted video file. Retrying without seeking." ) i = 0 @@ -1005,7 +998,6 @@ def _decompress_video( seek_target += step_time if step_seeking and seekable: container.seek(seek_target, stream=vstream) - if i == nframes: break @@ -1031,25 +1023,20 @@ def _read_timestamps( step_time = _frame_to_stamp(step, vstream) stamps = [] - if vstream.duration is None: - time_base = 1 / av.time_base # type: ignore - else: - time_base = vstream.time_base.numerator / vstream.time_base.denominator + time_base = vstream.time_base.numerator / vstream.time_base.denominator - gop_size = ( - vstream.codec_context.gop_size - ) # gop size is distance (in frames) between 2 I-frames - if step > gop_size: - step_seeking = True - else: - step_seeking = False + gop_size = 1 + if vstream.codec_context.is_encoder: + gop_size = vstream.codec_context.gop_size + + step_seeking = step > gop_size seekable = True try: container.seek(seek_target, stream=vstream) except av.error.FFmpegError: seekable = False - container, vstream = _open_video(file) # try again but this time don't seek + container, vstream = _open_video(file) # Retry without seeking warning( "Cannot seek. Possibly a corrupted video file. Retrying with seeking disabled..." ) @@ -1070,6 +1057,7 @@ def _read_timestamps( # need to sort because when demuxing, frames are in order of dts (decoder timestamp) # we need it in order of pts (presentation timestamp) stamps.sort() + stamps_arr = np.zeros((nframes,), dtype=np.float32) stamps_arr[: len(stamps)] = stamps diff --git a/deeplake/core/vectorstore/deep_memory/test_deepmemory.py b/deeplake/core/vectorstore/deep_memory/test_deepmemory.py index 2565ff8b9a..bb189eb271 100644 --- a/deeplake/core/vectorstore/deep_memory/test_deepmemory.py +++ b/deeplake/core/vectorstore/deep_memory/test_deepmemory.py @@ -461,7 +461,7 @@ def test_deepmemory_evaluate_without_embedding_function( questions_embeddings_and_relevances, hub_cloud_dev_token, ): - corpus, _, _, query_path = corpus_query_relevances_copy + corpus, _, _, _ = corpus_query_relevances_copy ( questions_embeddings, question_relevances, @@ -618,7 +618,7 @@ def test_unsupported_deepmemory_users(local_ds): def test_deepmemory_list_jobs_with_no_jobs( corpus_query_relevances_copy, hub_cloud_dev_token ): - corpus, queries, relevances, _ = corpus_query_relevances_copy + corpus, _, _, _ = corpus_query_relevances_copy db = VectorStore( path=corpus, @@ -749,7 +749,7 @@ def test_deepmemory_train_with_embedding_function_specified_in_constructor_shoul embedding_function=embedding_fn, ) - job_id = db.deep_memory.train( + _ = db.deep_memory.train( queries=queries, relevance=relevances, ) @@ -776,13 +776,14 @@ def test_deepmemory_evaluate_with_embedding_function_specified_in_constructor_sh runtime={"tensor_db": True}, token=hub_cloud_dev_token, embedding_function=embedding_fn, + read_only=True, ) queries = queries_vs.dataset[:10].text.data()["value"] relevance = queries_vs.dataset[:10].metadata.data()["value"] relevance = [rel["relevance"] for rel in relevance] - recall = db.deep_memory.evaluate( + _ = db.deep_memory.evaluate( queries=queries, relevance=relevance, ) diff --git a/deeplake/requirements/common.txt b/deeplake/requirements/common.txt index ceb4fd23b1..0261388e22 100644 --- a/deeplake/requirements/common.txt +++ b/deeplake/requirements/common.txt @@ -11,7 +11,7 @@ pathos humbug>=0.3.1 tqdm lz4 -av>=8.1.0,<=12.3.0; python_version >= '3.7' or sys_platform != 'win32' +av>=8.1.0; python_version >= '3.7' or sys_platform != 'win32' pydicom IPython flask diff --git a/deeplake/requirements/plugins.txt b/deeplake/requirements/plugins.txt index ddf7cb5e51..68757a698a 100644 --- a/deeplake/requirements/plugins.txt +++ b/deeplake/requirements/plugins.txt @@ -10,4 +10,4 @@ mmdet==2.28.1; platform_system == "Linux" and python_version >= "3.7" mmsegmentation==0.30.0; platform_system == "Linux" and python_version >= "3.7" mmengine pandas -av==12.3.0 \ No newline at end of file +av \ No newline at end of file