Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

trace_repo() gives a list index out of error message after storing stuff in cache #203

Closed
riya-avronna opened this issue Aug 28, 2024 · 1 comment

Comments

@riya-avronna
Copy link

Description
What happened?
There seems to be a bug in data_extraction.py which crops up as an IndexError

{
"name": "RayTaskError(IndexError)",
"message": "ray::_TracedRepoHelper.parse_traced_file() (pid=37672, ip=10.99.75.46, actor_id=905f0f2588be42075534030b01000000, repr=<lean_dojo.data_extraction.traced_data._TracedRepoHelper object at 0x7ec3aa19d7f0>)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/iwnbaw/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/lean_dojo/data_extraction/traced_data.py", line 990, in parse_traced_file
return TracedFile.from_traced_file(self.root_dir, path, self.repo)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/iwnbaw/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/lean_dojo/data_extraction/traced_data.py", line 536, in from_traced_file
return cls._from_lean4_traced_file(root_dir, json_path, repo)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/iwnbaw/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/lean_dojo/data_extraction/traced_data.py", line 556, in _from_lean4_traced_file
ast = FileNode.from_data(data, lean_file)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/iwnbaw/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/lean_dojo/data_extraction/ast.py", line 257, in from_data
return cls(lean_file, lean_file.start_pos, lean_file.end_pos, children)
^^^^^^^^^^^^^^^^^
File "/home/iwnbaw/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/lean_dojo/data_extraction/lean.py", line 338, in end_pos
column_nb = 1 + len(self.code[-1])
~~~~~~~~~^^^^
IndexError: list index out of range",
"stack": "---------------------------------------------------------------------------
RayTaskError(IndexError) Traceback (most recent call last)
Cell In[4], line 1
----> 1 traced_repo = trace(repo)
2 traced_repo.traced_files_graph

File ~/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/lean_dojo/data_extraction/trace.py:248, in trace(repo, dst_dir, build_deps)
243 dst_dir = Path(dst_dir)
244 assert (
245 not dst_dir.exists()
246 ), f"The destination directory {dst_dir} already exists."
--> 248 cached_path = get_traced_repo_path(repo, build_deps)
249 logger.info(f"Loading the traced repo from {cached_path}")
250 traced_repo = TracedRepo.load_from_disk(cached_path, build_deps)

File ~/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/lean_dojo/data_extraction/trace.py:216, in get_traced_repo_path(repo, build_deps)
214 _trace(repo, build_deps)
215 src_dir = tmp_dir / repo.name
--> 216 traced_repo = TracedRepo.from_traced_files(src_dir, build_deps)
217 traced_repo.save_to_disk()
218 path = cache.store(src_dir, rel_cache_dir)

File ~/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/lean_dojo/data_extraction/traced_data.py:1124, in TracedRepo.from_traced_files(cls, root_dir, build_deps)
1122 else:
1123 with ray_actor_pool(_TracedRepoHelper, root_dir, repo) as pool:
-> 1124 traced_files = list(
1125 tqdm(
1126 pool.map_unordered(
1127 lambda a, p: a.parse_traced_file.remote(p), json_paths
1128 ),
1129 total=len(json_paths),
1130 )
1131 )
1133 dependencies = repo.get_dependencies(root_dir)
1134 if build_deps:

File ~/.local/lib/python3.12/site-packages/tqdm/std.py:1181, in tqdm.iter(self)
1178 time = self._time
1180 try:
-> 1181 for obj in iterable:
1182 yield obj
1183 # Update and possibly print the progressbar.
1184 # Note: does not call self.update(1) for speed optimisation.

File ~/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/ray/util/actor_pool.py:170, in ActorPool.map_unordered..get_generator()
168 def get_generator():
169 while self.has_next():
--> 170 yield self.get_next_unordered()

File ~/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/ray/util/actor_pool.py:370, in ActorPool.get_next_unordered(self, timeout, ignore_if_timedout)
366 if raise_timeout_after_ignore:
367 raise TimeoutError(
368 timeout_msg + ". The task {} has been ignored.".format(future)
369 )
--> 370 return ray.get(future)

File ~/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/ray/_private/auto_init_hook.py:21, in wrap_auto_init..auto_init_wrapper(*args, **kwargs)
18 @wraps(fn)
19 def auto_init_wrapper(*args, **kwargs):
20 auto_init_ray()
---> 21 return fn(*args, **kwargs)

File ~/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/ray/_private/client_mode_hook.py:103, in client_mode_hook..wrapper(*args, **kwargs)
101 if func.name != "init" or is_client_mode_enabled_by_default:
102 return getattr(ray, func.name)(*args, **kwargs)
--> 103 return func(*args, **kwargs)

File ~/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/ray/_private/worker.py:2661, in get(object_refs, timeout)
2655 raise ValueError(
2656 f"Invalid type of object refs, {type(object_refs)}, is given. "
2657 "'object_refs' must either be an ObjectRef or a list of ObjectRefs. "
2658 )
2660 # TODO(ujvl): Consider how to allow user to retrieve the ready objects.
-> 2661 values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
2662 for i, value in enumerate(values):
2663 if isinstance(value, RayError):

File ~/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/ray/_private/worker.py:871, in Worker.get_objects(self, object_refs, timeout, return_exceptions)
869 global_worker.core_worker.dump_object_store_memory_usage()
870 if isinstance(value, RayTaskError):
--> 871 raise value.as_instanceof_cause()
872 else:
873 raise value

RayTaskError(IndexError): ray::_TracedRepoHelper.parse_traced_file() (pid=37672, ip=10.99.75.46, actor_id=905f0f2588be42075534030b01000000, repr=<lean_dojo.data_extraction.traced_data._TracedRepoHelper object at 0x7ec3aa19d7f0>)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/iwnbaw/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/lean_dojo/data_extraction/traced_data.py", line 990, in parse_traced_file
return TracedFile.from_traced_file(self.root_dir, path, self.repo)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/iwnbaw/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/lean_dojo/data_extraction/traced_data.py", line 536, in from_traced_file
return cls._from_lean4_traced_file(root_dir, json_path, repo)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/iwnbaw/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/lean_dojo/data_extraction/traced_data.py", line 556, in _from_lean4_traced_file
ast = FileNode.from_data(data, lean_file)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/iwnbaw/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/lean_dojo/data_extraction/ast.py", line 257, in from_data
return cls(lean_file, lean_file.start_pos, lean_file.end_pos, children)
^^^^^^^^^^^^^^^^^
File "/home/iwnbaw/miniforge3/envs/lean-sat-jupyter/lib/python3.12/site-packages/lean_dojo/data_extraction/lean.py", line 338, in end_pos
column_nb = 1 + len(self.code[-1])
~~~~~~~~~^^^^
IndexError: list index out of range"
}

Detailed Steps to Reproduce the Behavior

I am running a jupyter cell with the following code:

from lean_dojo import *
repo = LeanGitRepo("https://github.com/leanprover/leansat", "30e2ed2")
repo.get_config("lean-toolchain")
traced_repo = trace(repo)

Logs in Debug Mode
Set the environment variable VERBOSE=1 and paste the logs here.

Platform Information

  • OS: [e.g. macOS]
  • Ubuntu 24.04
  • Browser [e.g. chrome, safari]
  • LeanDojo Version [e.g. 1.1.2]
  • 2.1.2
    Lake version 5.0.0-c375e19 (Lean version 4.10.0)
    elan 3.1.1 (71ddc6633 2024-02-22)
@yangky11
Copy link
Member

yangky11 commented Sep 6, 2024

Just fixed it in the latest main branch (can be installed by pip install .). Could you try and let me know if the issue persists?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants