-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Revert "fix: get model info concurrently" (#6)
This reverts commit 5abbce1.
- Loading branch information
Showing
4 changed files
with
164 additions
and
159 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,4 +2,3 @@ bentoml | |
fastapi | ||
comfy-cli | ||
googlesearch-python | ||
anyio |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,63 +1,122 @@ | ||
from __future__ import annotations | ||
|
||
import contextlib | ||
import hashlib | ||
import asyncio | ||
import json | ||
import os | ||
import subprocess | ||
import sys | ||
from concurrent.futures import ThreadPoolExecutor | ||
from datetime import datetime | ||
from typing import TypedDict | ||
|
||
import anyio | ||
from functools import partial | ||
from typing import Dict, List | ||
|
||
from .const import SHA_CACHE_FILE | ||
|
||
CALC_CMD = """ | ||
import hashlib | ||
import sys | ||
filepath = sys.argv[1] | ||
chunk_size = int(sys.argv[2]) | ||
sha256 = hashlib.sha256() | ||
with open(filepath, "rb") as f: | ||
for chunk in iter(lambda: f.read(chunk_size), b""): | ||
sha256.update(chunk) | ||
print(sha256.hexdigest()) | ||
""" | ||
|
||
|
||
def calculate_sha256_worker(filepath: str, chunk_size: int = 4 * 1024 * 1024) -> str: | ||
"""Calculate SHA-256 in a separate process""" | ||
result = subprocess.run( | ||
[sys.executable, "-c", CALC_CMD, filepath, str(chunk_size)], | ||
stdout=subprocess.PIPE, | ||
stderr=subprocess.PIPE, | ||
text=True, | ||
) | ||
assert result.returncode == 0, result.stderr | ||
return result.stdout.strip() | ||
|
||
|
||
def get_sha256(filepath: str) -> str: | ||
return batch_get_sha256([filepath])[filepath] | ||
|
||
|
||
def async_get_sha256(filepath: str) -> str: | ||
return asyncio.run(async_batch_get_sha256([filepath]))[filepath] | ||
|
||
|
||
def batch_get_sha256(filepaths: List[str], cache_only: bool = False) -> Dict[str, str]: | ||
return asyncio.run(async_batch_get_sha256(filepaths, cache_only=cache_only)) | ||
|
||
|
||
async def async_batch_get_sha256( | ||
filepaths: List[str], | ||
cache_only: bool = False, | ||
) -> Dict[str, str]: | ||
# Load cache | ||
cache = {} | ||
if SHA_CACHE_FILE.exists(): | ||
try: | ||
with SHA_CACHE_FILE.open("r") as f: | ||
cache = json.load(f) | ||
except (json.JSONDecodeError, IOError): | ||
pass | ||
|
||
# Initialize process pool | ||
max_workers = max(1, (os.cpu_count() or 1)) | ||
|
||
# Process files | ||
results = {} | ||
new_cache = {} | ||
async with asyncio.Lock(): | ||
with ThreadPoolExecutor(max_workers=max_workers) as pool: | ||
loop = asyncio.get_event_loop() | ||
|
||
for filepath in filepaths: | ||
if not os.path.exists(filepath): | ||
results[filepath] = None | ||
continue | ||
|
||
# Get file info | ||
stat = os.stat(filepath) | ||
current_size = stat.st_size | ||
current_time = stat.st_ctime | ||
|
||
# Check cache | ||
cache_entry = cache.get(filepath) | ||
if cache_entry: | ||
if ( | ||
cache_entry["size"] == current_size | ||
and cache_entry["birthtime"] == current_time | ||
): | ||
results[filepath] = cache_entry["sha256"] | ||
continue | ||
|
||
if cache_only: | ||
results[filepath] = "" | ||
continue | ||
|
||
# Calculate new SHA | ||
calc_func = partial(calculate_sha256_worker, filepath) | ||
sha256 = await loop.run_in_executor(pool, calc_func) | ||
|
||
# Update cache and results | ||
new_cache[filepath] = { | ||
"sha256": sha256, | ||
"size": current_size, | ||
"birthtime": current_time, | ||
"last_verified": datetime.now().isoformat(), | ||
} | ||
results[filepath] = sha256 | ||
|
||
# Save cache | ||
try: | ||
with SHA_CACHE_FILE.open("r") as f: | ||
cache = json.load(f) | ||
cache.update(new_cache) | ||
with SHA_CACHE_FILE.open("w") as f: | ||
json.dump(cache, f, indent=2) | ||
except (IOError, OSError): | ||
pass | ||
|
||
class ModelCache(TypedDict): | ||
sha256: str | ||
size: int | ||
birthtime: float | ||
last_verified: str | ||
|
||
|
||
class ModelHashes: | ||
def __init__(self) -> None: | ||
self._data: dict[str, ModelCache] = {} | ||
|
||
async def load(self) -> None: | ||
path = anyio.Path(SHA_CACHE_FILE) | ||
if not await path.exists(): | ||
return | ||
async with await path.open("r") as f: | ||
self._data = json.loads(await f.read()) | ||
|
||
async def save(self) -> None: | ||
with contextlib.suppress(OSError): | ||
async with await anyio.open_file(SHA_CACHE_FILE, "w") as f: | ||
await f.write(json.dumps(self._data, indent=2)) | ||
|
||
async def get(self, filepath: str, cache_only: bool = False) -> str: | ||
afile = anyio.Path(filepath) | ||
stat = await afile.stat() | ||
entry = self._data.get(filepath) | ||
if ( | ||
entry is not None | ||
and entry["size"] == stat.st_size | ||
and entry["birthtime"] == stat.st_ctime | ||
): | ||
return entry["sha256"] | ||
if cache_only: | ||
return "" | ||
sha256 = await self.calculate_sha256(filepath) | ||
self._data[filepath] = { | ||
"sha256": sha256, | ||
"size": stat.st_size, | ||
"birthtime": stat.st_ctime, | ||
"last_verified": datetime.now().isoformat(), | ||
} | ||
return sha256 | ||
|
||
async def calculate_sha256(self, filepath: str, chunk_size: int = 8192) -> str: | ||
async with await anyio.open_file(filepath, "rb") as f: | ||
sha256 = hashlib.sha256() | ||
while chunk := await f.read(chunk_size): | ||
sha256.update(chunk) | ||
return sha256.hexdigest() | ||
return results |
Oops, something went wrong.