-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update zipped_files to return name as well as contents; update json_r…
…ecords and csv_records to take new structure; bump to 0.2 (#9) * Add more logging; add py.typed file to mark library as typed * Change zipped_files to return ZippedfileRef instead of just open file, update to version 0.2 * Update json_records and csv_records to either take IO or OpenedFileRef
- Loading branch information
Showing
10 changed files
with
130 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api" | |
|
||
[tool.poetry] | ||
name = "pipedata" | ||
version = "0.1.1" | ||
version = "0.2" | ||
description = "Framework for building pipelines for data processing" | ||
authors = ["Simon Wicks <[email protected]>"] | ||
readme = "README.md" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
__version__ = "0.1.1" | ||
__version__ = "0.2" | ||
|
||
__all__ = [ | ||
"__version__", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,32 @@ | ||
import logging | ||
import zipfile | ||
from dataclasses import dataclass | ||
from typing import IO, Iterator | ||
|
||
import fsspec # type: ignore | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def zipped_files(file_refs: Iterator[str]) -> Iterator[IO[bytes]]: | ||
@dataclass | ||
class OpenedFileRef: | ||
name: str | ||
contents: IO[bytes] | ||
|
||
|
||
def zipped_files(file_refs: Iterator[str]) -> Iterator[OpenedFileRef]: | ||
logger.info("Initializing zipped files reader") | ||
for file_ref in file_refs: | ||
logger.info(f"Opening zip file at {file_ref}") | ||
with fsspec.open(file_ref, "rb") as file: | ||
with zipfile.ZipFile(file) as zip_file: | ||
for name in zip_file.namelist(): | ||
infos = zip_file.infolist() | ||
logger.info(f"Found {len(infos)} files in zip file") | ||
for i, info in enumerate(infos): | ||
name = info.filename | ||
logger.info(f"Reading file {i} ({name}) from zip file") | ||
with zip_file.open(name) as inner_file: | ||
yield inner_file | ||
yield OpenedFileRef( | ||
name=name, | ||
contents=inner_file, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters