diff --git a/cmoncrawl/aggregator/index_query.py b/cmoncrawl/aggregator/index_query.py index 009d76e7..afc53b1d 100644 --- a/cmoncrawl/aggregator/index_query.py +++ b/cmoncrawl/aggregator/index_query.py @@ -24,7 +24,7 @@ MatchType, ) -from aiohttp import ClientError, ClientSession, ContentTypeError +from aiohttp import ClientError, ClientSession, ContentTypeError, ServerConnectionError import asyncio import random @@ -32,6 +32,33 @@ class IndexAggregator(AsyncIterable[DomainRecord]): + """ + This class is responsible for aggregating the index files from commoncrawl. + It is an async context manager which can then be used as an async iterator + which yields DomainRecord objects, found in the index files of commoncrawl. + + It uses the commoncrawl index server to find the index files. + + + Args: + domains (List[str]): A list of domains to search for. + cc_indexes_server (str, optional): The commoncrawl index server to use. Defaults to "http://index.commoncrawl.org/collinfo.json". + match_type (MatchType, optional): Match type for cdx-api. Defaults to None. + cc_servers (List[str], optional): A list of commoncrawl servers to use. If [], then indexes will be retrieved from the cc_indexes_server. Defaults to []. + since (datetime, optional): The start date for the search. Defaults to datetime.min. + to (datetime, optional): The end date for the search. Defaults to datetime.max. + limit (int, optional): The maximum number of results to return. Defaults to None. + max_retry (int, optional): The maximum number of retries for a single request. Defaults to 5. + prefetch_size (int, optional): The number of indexes to fetch concurrently. Defaults to 3. + sleep_step (int, optional): Sleep increase time between retries. Defaults to 20. + + Examples: + >>> async with IndexAggregator(["example.com"]) as aggregator: + >>> async for domain_record in aggregator: + >>> print(domain_record) + + """ + def __init__( self, domains: List[str], @@ -142,21 +169,20 @@ def should_retry(retry: int, reason: str, status: int, **args: Any): if not should_retry(retry, reason, status, **args): break else: - try: - content = await response.json( - content_type=content_type, loads=Decoder().decode - ) - except ContentTypeError as e: - all_purpose_logger.error(str(e), exc_info=True) - all_purpose_logger.error(e.message, exc_info=True) - all_purpose_logger.error(response.content) - break + content = await response.json( + content_type=content_type, loads=Decoder().decode + ) all_purpose_logger.info( f"Successfully retrieved page of {domain} from {cdx_server} add_info: {args}" ) break - except (ClientError, TimeoutError) as e: + except ( + ClientError, + TimeoutError, + ServerConnectionError, + ContentTypeError, + ) as e: reason = f"{type(e)} {str(e)}" if not should_retry(retry, reason, 500, **args): break @@ -251,6 +277,9 @@ async def get_captured_responses( @staticmethod async def get_all_CC_indexes(client: ClientSession, cdx_server: str) -> List[str]: + """ + Get all CC index servers from a given CDX server + """ for _ in range(3): async with client.get(cdx_server) as response: r_json = await response.json(content_type="application/json") @@ -308,6 +337,9 @@ def init_crawls_queue( ) async def __prefetch_next_crawl(self) -> int: + """ + Prefetch the next index server + """ while len(self.__crawls_remaining) > 0: next_crawl = self.__crawls_remaining.popleft() @@ -333,6 +365,9 @@ async def __prefetch_next_crawl(self) -> int: return 0 async def __await_next_prefetch(self): + """ + Gets the next index retry + """ # Wait for the next prefetch to finish # Don't prefetch if limit is set to avoid overfetching while len(self.__crawls_remaining) > 0 and ( diff --git a/cmoncrawl/common/types.py b/cmoncrawl/common/types.py index 65772fa0..eb0a994a 100644 --- a/cmoncrawl/common/types.py +++ b/cmoncrawl/common/types.py @@ -12,6 +12,10 @@ @dataclass_json @dataclass class DomainRecord: + """ + Domain record. + """ + filename: str url: str | None offset: int @@ -42,6 +46,10 @@ def __post_init__(self): @dataclass class RetrieveResponse: + """ + Response from retrieve. + """ + status: int content: Any reason: None | str @@ -49,6 +57,10 @@ class RetrieveResponse: @dataclass class DomainCrawl: + """ + Domain crawl. + """ + domain: str = "" cdx_server: str = "" page: int = 0 diff --git a/cmoncrawl/integrations/commands.py b/cmoncrawl/integrations/commands.py index d83e8af3..34ac6d6f 100644 --- a/cmoncrawl/integrations/commands.py +++ b/cmoncrawl/integrations/commands.py @@ -10,7 +10,9 @@ def add_args(parser: argparse.ArgumentParser): - parser.add_argument("--debug", action="store_true", default=False) + parser.add_argument( + "--debug", action="store_true", default=False, help="Debug mode" + ) return parser @@ -23,7 +25,9 @@ def add_subparsers(parser: Any): def get_args(): parser = argparse.ArgumentParser() - subparser = parser.add_subparsers(dest="command", required=True) + subparser = parser.add_subparsers( + dest="command", required=True, help="Command to run" + ) add_subparsers(subparser) return parser diff --git a/cmoncrawl/integrations/download.py b/cmoncrawl/integrations/download.py index f1e2dfe2..faeb3e1e 100644 --- a/cmoncrawl/integrations/download.py +++ b/cmoncrawl/integrations/download.py @@ -8,7 +8,7 @@ from cmoncrawl.processor.pipeline.pipeline import ProcessorPipeline from cmoncrawl.processor.pipeline.streamer import StreamerFileHTML from cmoncrawl.processor.pipeline.extractor import HTMLExtractor, DomainRecordExtractor -from cmoncrawl.middleware.synchronized import index_and_extract +from cmoncrawl.middleware.synchronized import query_and_extract import argparse import asyncio from cmoncrawl.processor.pipeline.streamer import ( @@ -24,35 +24,83 @@ class DownloadOutputFormat(Enum): def add_mode_args(subparser: Any): - record_parser = subparser.add_parser(DownloadOutputFormat.RECORD.value) - record_parser.add_argument("--max_crawls_per_file", type=int, default=500_000) - subparser.add_parser(DownloadOutputFormat.HTML.value) + record_parser = subparser.add_parser( + DownloadOutputFormat.RECORD.value, + help="Download record files from Common Crawl", + ) + record_parser.add_argument( + "--max_crawls_per_file", + type=int, + default=500_000, + help="Max number of domain records per file output", + ) + subparser.add_parser( + DownloadOutputFormat.HTML.value, help="Download HTML files from Common Crawl" + ) return subparser def add_args(subparser: Any): - parser = subparser.add_parser("download") - parser.add_argument("url") - parser.add_argument("output", type=Path) - mode_subparser = parser.add_subparsers(dest="mode", required=True) + parser = subparser.add_parser("download", help="Download data from Common Crawl") + parser.add_argument("url", type=str, help="URL to query") + parser.add_argument("output", type=Path, help="Path to output directory") + mode_subparser = parser.add_subparsers( + dest="mode", required=True, help="Download mode" + ) mode_subparser = add_mode_args(mode_subparser) - parser.add_argument("--limit", type=int, default=5) parser.add_argument( - "--since", type=datetime.fromisoformat, default=str(datetime.min) + "--limit", type=int, default=5, help="Max number of urls to download" + ) + parser.add_argument( + "--since", + type=datetime.fromisoformat, + default=str(datetime.min), + help="Start date in ISO format e.g. 2020-01-01", + ) + parser.add_argument( + "--to", + type=datetime.fromisoformat, + default=str(datetime.max), + help="End date in ISO format e.g. 2020-01-01", + ) + parser.add_argument( + "--cc_server", + nargs="+", + type=str, + default=None, + help="Common Crawl indexes to query, must provide whole url e.g. https://index.commoncrawl.org/CC-MAIN-2023-14-index", + ) + parser.add_argument( + "--max_retry", + type=int, + default=30, + help="Max number of retries for a request, when the requests are failing increase this number", + ) + parser.add_argument( + "--sleep_step", + type=int, + default=4, + help="Number of increased second to add to sleep time between each failed download attempt, increase this number if the server tell you to slow down", ) - parser.add_argument("--to", type=datetime.fromisoformat, default=str(datetime.max)) - parser.add_argument("--cc_server", nargs="+", type=str, default=None) - parser.add_argument("--max_retry", type=int, default=30) - parser.add_argument("--sleep_step", type=int, default=4) # Add option to output to either json or html parser.add_argument( "--match_type", type=MatchType, choices=list(MatchType.__members__.values()), - default=MatchType.PREFIX, + help="Match type for the url, see cdx-api for more info", + ) + parser.add_argument( + "--max_directory_size", + type=int, + default=1000, + help="Max number of files per directory", + ) + parser.add_argument( + "--filter_non_200", + action="store_true", + default=True, + help="Filter out non 200 status code", ) - parser.add_argument("--max_directory_size", type=int, default=1000) - parser.add_argument("--filter_non_200", action="store_true", default=True) parser.set_defaults(func=run_download) @@ -123,7 +171,7 @@ async def url_download( max_retry=max_retry, sleep_step=sleep_step, ) - await index_and_extract(index_agg, pipeline) + await query_and_extract(index_agg, pipeline) def run_download(args: argparse.Namespace): diff --git a/cmoncrawl/integrations/extract.py b/cmoncrawl/integrations/extract.py index efadcce5..25bad429 100644 --- a/cmoncrawl/integrations/extract.py +++ b/cmoncrawl/integrations/extract.py @@ -27,30 +27,72 @@ class ExtractMode(Enum): def add_mode_args(subparser: Any): - record_parser = subparser.add_parser(ExtractMode.RECORD.value) - record_parser.add_argument("--max_retry", type=int, default=30) - record_parser.add_argument("--sleep_step", type=int, default=4) + record_parser = subparser.add_parser( + ExtractMode.RECORD.value, help="Extract data from jsonl record files" + ) + record_parser.add_argument( + "--max_retry", type=int, default=30, help="Max number of warc download attempts" + ) + record_parser.add_argument( + "--sleep_step", + type=int, + default=4, + help="Number of increased second to add to sleep time between each failed download attempt", + ) - html_parser = subparser.add_parser(ExtractMode.HTML.value) + html_parser = subparser.add_parser( + ExtractMode.HTML.value, help="Extract data from HTML files" + ) html_parser.add_argument( - "--date", type=datetime.fromisoformat, default=str(datetime.now()) + "--date", + type=datetime.fromisoformat, + default=str(datetime.now()), + help="Date of extraction of HTML files in iso format e.g. 2021-01-01, default is today", + ) + html_parser.add_argument( + "--url", + type=str, + default="", + help="URL from which the HTML files were downloaded, by default it will try to infer from file content", ) - html_parser.add_argument("--url", type=str, default="") return subparser def add_args(subparser: Any): - parser = subparser.add_parser("extract") + parser = subparser.add_parser( + "extract", help="Extract data from records/html files" + ) parser.add_argument( "config_path", type=Path, + help="Path to config file containing extraction rules", + ) + parser.add_argument("output_path", type=Path, help="Path to output directory") + parser.add_argument( + "files", nargs="+", type=Path, help="Files to extract data from" + ) + parser.add_argument( + "--max_crawls_per_file", + type=int, + default=500_000, + help="Max number of extractions per file output", + ) + parser.add_argument( + "--max_directory_size", + type=int, + default=1000, + help="Max number of extraction files per directory", + ) + parser.add_argument( + "--n_proc", + type=int, + default=1, + help="Number of processes to use for extraction. The paralelization is on file level, thus for single file it's useless to use more than one process.", + ) + + mode_subparser = parser.add_subparsers( + dest="mode", required=True, help="Extraction mode" ) - parser.add_argument("output_path", type=Path) - parser.add_argument("files", nargs="+", type=Path) - parser.add_argument("--max_crawls_per_file", type=int, default=500_000) - parser.add_argument("--max_directory_size", type=int, default=1000) - parser.add_argument("--n_proc", type=int, default=1) - mode_subparser = parser.add_subparsers(dest="mode", required=True) mode_subparser = add_mode_args(mode_subparser) parser.set_defaults(func=run_extract) @@ -91,7 +133,7 @@ def get_domain_records_html( url: str | None, date: datetime | None ) -> List[Tuple[DomainRecord, Dict[str, Any]]]: # Just return dummy as correct crawl will be loaded from dummy downloader - return [DomainRecord("", url=url, offset=0, length=0, timestamp=date), {}] + return [(DomainRecord("", url=url, offset=0, length=0, timestamp=date), {})] def load_config(config_path: Path) -> ExtractConfig: diff --git a/cmoncrawl/middleware/stompware.py b/cmoncrawl/middleware/stompware.py index 59140885..0f054fe2 100644 --- a/cmoncrawl/middleware/stompware.py +++ b/cmoncrawl/middleware/stompware.py @@ -35,6 +35,23 @@ class ListnerStats: class ArtemisAggregator: + """ + Aggregator that listens queries the common crawl index and sends the results to a queue + using the stomp protocol. It the creates a queue + with name `queue.{url}` and sends the results to it. + It also creates a topic with name `topic.poisson_pill.{url}` + and sends a message with type `poisson_pill` to it when it finishes. + + Args: + queue_host (str): The host of the queue + queue_port (int): The port of the queue + url (str): The url of the queue + index_agg (IndexAggregator): The index aggregator + heartbeat (int, optional): The heartbeat of the connection. Defaults to 10000. + + + """ + def __init__( self, queue_host: str, @@ -59,6 +76,11 @@ def _init_connection(self): return conn async def aggregate(self, filter_duplicates: bool = True): + """ + Aggregates the results of the index aggregator and sends them to the queue. + If `filter_duplicates` is True, it will use the `DUPL_ID_HEADER` header, + which Artemis uses to filter duplicates. + """ while True: try: conn = self._init_connection() @@ -104,6 +126,24 @@ async def aggregate(self, filter_duplicates: bool = True): class ArtemisProcessor: + """ + Processor that listens to a queues and processes the messages using a pipeline. + When it receives a message with type enough `poisson_pill` messages, it will + stop listening if it doesn't receive any messages for `timeout` minutes. + + + Args: + queue_host (str): The host of the queue + queue_port (int): The port of the queue + pills_to_die (int, optional): The number of `poisson_pill` messages to receive before dying. Defaults to None. + queue_size (int): The size of the queue + timeout (int): The timeout in minutes + addresses (List[str]): The addresses of the queues + pipeline (ProcessorPipeline): The pipeline to use for processing + heartbeat (int, optional): The heartbeat of the connection. Defaults to 10000. + + """ + def __init__( self, queue_host: str, diff --git a/cmoncrawl/middleware/synchronized.py b/cmoncrawl/middleware/synchronized.py index bb300ea1..74db062f 100644 --- a/cmoncrawl/middleware/synchronized.py +++ b/cmoncrawl/middleware/synchronized.py @@ -9,11 +9,22 @@ import asyncio -async def index_and_extract( +async def query_and_extract( index_agg: IndexAggregator, pipeline: ProcessorPipeline, filter_non_unique_url: bool = False, ): + """ + Query the index and extracts the results using the pipeline + + Args: + index_agg (IndexAggregator): Index aggregator + pipeline (ProcessorPipeline): Pipeline to use + filter_non_unique_url (bool, optional): Filter non unique urls. + if True, only first successful extraction of a url will be processed, + the rest will be skipped. Defaults to False. + + """ processed_urls: Set[str] = set() total_extracted: int = 0 @@ -28,6 +39,7 @@ async def index_and_extract( try: await pipeline.process_domain_record(domain_record, {}) total_extracted += 1 + processed_urls.add(unify_url_id(url)) except KeyboardInterrupt as e: break @@ -35,13 +47,12 @@ async def index_and_extract( all_purpose_logger.error( f"Failed to process {domain_record.url} with {e}" ) - continue - processed_urls.add(unify_url_id(url)) finally: if hasattr(pipeline.downloader, "__aexit__"): await pipeline.downloader.__aexit__(None, None, None) all_purpose_logger.info(f"Extracted {total_extracted} urls") + return processed_urls async def _extract_task( @@ -66,8 +77,18 @@ async def extract( records: List[Tuple[DomainRecord, Dict[str, Any]]], pipeline: ProcessorPipeline, concurrent_length: int = 20, - timeout: int = 5, ): + """ + Extracts the records using the pipeline, with at most `concurrent_length` + records being processed at the same time. + + Args: + records (List[Tuple[DomainRecord, Dict[str, Any]]]): List of records to process and additional info + pipeline (ProcessorPipeline): Pipeline to use + concurrent_length (int, optional): Number of concurrent records to process. + Defaults to 20. + + """ domain_records_iterator = iter(tqdm(records)) domains_exausted = False total_extracted: int = 0 @@ -90,9 +111,7 @@ async def extract( ) ) - done, queue = await asyncio.wait( - queue, timeout=timeout, return_when=asyncio.FIRST_COMPLETED - ) + done, queue = await asyncio.wait(queue, return_when=asyncio.FIRST_COMPLETED) for task in done: try: await task diff --git a/cmoncrawl/processor/extraction/filters.py b/cmoncrawl/processor/extraction/filters.py index 5e0e6405..4469c63f 100644 --- a/cmoncrawl/processor/extraction/filters.py +++ b/cmoncrawl/processor/extraction/filters.py @@ -7,6 +7,11 @@ def must_exist_filter(soup: BeautifulSoup, filter_list: List[str]): This function takes in a BeautifulSoup object and a list of CSS selectors. If all selectors are found in the soup, this function returns True. + + Args: + soup (BeautifulSoup): BeautifulSoup object + filter_list (List[str]): List of CSS selectors + """ must_exist = [soup.select_one(css_selector) for css_selector in filter_list] if any(map(lambda x: x is None, must_exist)): @@ -20,6 +25,9 @@ def must_not_exist_filter(soup: BeautifulSoup, filter_list: List[str]): This function takes in a BeautifulSoup object and a list of CSS selectors. If any selector is found in the soup, this function returns False. + Args: + soup (BeautifulSoup): BeautifulSoup object + filter_list (List[str]): List of CSS selectors """ must_not_exist = [soup.select_one(css_selector) for css_selector in filter_list] if any(map(lambda x: x is not None, must_not_exist)): diff --git a/cmoncrawl/processor/extraction/utils.py b/cmoncrawl/processor/extraction/utils.py index 6852308e..b0b4ffc1 100644 --- a/cmoncrawl/processor/extraction/utils.py +++ b/cmoncrawl/processor/extraction/utils.py @@ -18,6 +18,15 @@ def get_tag_transform(tag_desc: str): + """ + Returns a function that takes a bs4 tag and returns the first tag + that matches the tag_desc. + + Args: + tag_desc (str): CSS selector + + """ + def transform(tag: Tag): return tag.select_one(tag_desc) @@ -25,6 +34,15 @@ def transform(tag: Tag): def get_tags_transform(tag_desc: str): + """ + Returns a function that takes a bs4 tag and returns a list of tags + that match the tag_desc. + + Args: + tag_desc (str): CSS selector + + """ + def transform(tag: Tag): return tag.select(tag_desc) @@ -32,6 +50,14 @@ def transform(tag: Tag): def get_attribute_transform(attr_name: str): + """ + Returns a function that takes a bs4 tag and returns the value + of the attribute `attr_name` or None if the attribute doesn't exist. + + Args: + attr_name (str): Name of the attribute to get from the tag + """ + def transform(tag: Tag): return tag.get(attr_name, None) @@ -39,6 +65,17 @@ def transform(tag: Tag): def get_text_transform(tag: Tag, recursive: bool = True): + """ + Returns text from tag. If recursive is True then + all text from all children is returned. + + Args: + tag (Tag): bs4 tag + recursive (bool, optional): If True then all text from all children is returned. Defaults to True. + + + """ + if recursive: return tag.text tag_text = tag.find(text=True, recursive=False) @@ -49,6 +86,14 @@ def get_text_transform(tag: Tag, recursive: bool = True): def get_text_list_transform(sep: str = ""): + """ + Returns a function that takes a list of bs4 tags and returns + a string with all the text from the tags joined with `sep`. + + Args: + sep (str, optional): Separator to use when joining the text. Defaults to "". + """ + def transform(tag: List[Tag]): return sep.join([tag.text for tag in tag]) @@ -58,10 +103,30 @@ def transform(tag: List[Tag]): def all_same_transform( dict: Dict[str, Any], fc: Callable[[Any], Any] | List[Callable[[Any], Any]] ): + """ + Applies `fc` to all values in `dict` and returns a dict with same keys + but with transformed values. + + Args: + dict (Dict[str, Any]): Dict to transform. + fc (Callable[[Any], Any] | List[Callable[[Any], Any]]): Function to apply to all values in dict. + + + """ return {key: fc for key in dict.keys()} def chain_transforms(trans: List[Callable[[Any], Any]]): + """ + Chains transforms together. If any of the transforms returns None + the chain is broken and None is returned. + + Args: + trans (List[Callable[[Any], Any]]): List of transforms to chain together. + + + """ + def inner(initial_value: Any): result = initial_value for fc in trans: @@ -77,6 +142,17 @@ def transform( dict: Dict[str, Any], transforms: Dict[str, Callable[[Any], Any] | List[Callable[[Any], Any]]], ): + """ + Transforms dict using `transforms` dict. `transforms` dict is of format + `{key: [transform1, transform2, ...]}` where transform is a function that takes previous value + + Args: + dict (Dict[str, Any]): Dict to transform. + transforms (Dict[str, Callable[[Any], Any] | List[Callable[[Any], Any]]]): Dict defining + how to transform the dict. Format is "{name: [transform1, transform2, ...]}" where + transform is a function that takes previous value and returns new value. + """ + def transform_fc(key: str, value: Any): key_trans = transforms.get(key, []) if not isinstance(key_trans, list): @@ -97,6 +173,19 @@ def extract_transform( str, Callable[[Any], Any] | List[Callable[[Any], Any]] ], ) -> Dict[str, Any]: + """ + Extracts data from tag using `extract_dict` defining what to extract and how to name it, + and `extract_transform_dict` defining how to transform the extracted data. + + Args: + tag (Tag | None): Tag to extract data from. + extract_dict (Dict[str, str]): Dict defining what to extract and how to name it. format + is `{"name": "css selector"}`. + extract_transform_dict (Dict[str, Callable[[Any], Any] | List[Callable[[Any], Any]]]): Dict + defining how to transform the extracted data. Format is "{name: [transform1, transform2, ...]}" + where transform is a function that takes previous value and returns new value. + """ + if tag is None: return dict() @@ -109,6 +198,13 @@ def extract_transform( def combine_dicts(dicts: List[Dict[str, Any]]): + """ + Combines list of dictioneries into one. If there are multiple values for the same key + then the first one that is not None is chosen. + + Args: + dicts (List[Dict[str, Any]]): List of dicts to combine. + """ # Combines dicts choose the first one that is not None. def recursive_get(key: str, dicts: List[Dict[str, Any]], i: int) -> Any: if i >= len(dicts): @@ -125,6 +221,20 @@ def recursive_get(key: str, dicts: List[Dict[str, Any]], i: int) -> Any: def check_required( required_fields: Dict[str, bool], extractor_name: str, non_empty: bool = False ): + """ + Checks if required fields are present in the extracted dict. + + Args: + required_fields (Dict[str, bool]): Dict of required fields if defining which + fields must be present and which can be None. + + extractor_name (str): Name of the extractor for logging purposes. + + non_empty (bool, optional): If True then empty strings and empty lists are considered + as not present. Defaults to False. + + """ + def inner(extracted_dict: Dict[Any, Any], metadata: PipeMetadata): for key, value in required_fields.items(): if key not in extracted_dict: diff --git a/cmoncrawl/processor/pipeline/downloader.py b/cmoncrawl/processor/pipeline/downloader.py index d11d4f07..3753e65b 100644 --- a/cmoncrawl/processor/pipeline/downloader.py +++ b/cmoncrawl/processor/pipeline/downloader.py @@ -6,7 +6,7 @@ import random import re from types import TracebackType -from aiohttp import ClientError, ClientSession +from aiohttp import ClientError, ClientSession, ContentTypeError, ServerConnectionError from typing import List, Tuple, Type from aiofiles import open as asyncOpen @@ -24,6 +24,10 @@ class IDownloader: + """ + Base class for all downloaders + """ + async def download( self, domain_record: DomainRecord ) -> (List[Tuple[str, PipeMetadata]]): @@ -31,6 +35,18 @@ async def download( class AsyncDownloader(IDownloader): + """ + Downloader which asynchronously downloads the the data for the domain_record + + Args: + base_url (str, optional): Base url where to download data from. Defaults to "https://data.commoncrawl.org/". + digest_verification (bool, optional): Whether to verify the digest of the downloaded data. Defaults to True. + max_retry (int, optional): Maximum number of retries. Defaults to 5. + sleep_step (int, optional): Sleep increase time between retries. Defaults to 10. + encoding: Default encoding to be used + + """ + def __init__( self, base_url: str = "https://data.commoncrawl.org/", @@ -87,8 +103,10 @@ def should_retry(retry: int, reason: str, status: int, **args: str): except ( ClientError, TimeoutError, + ServerConnectionError, + ContentTypeError, ) as e: - if not should_retry(retry, f"{str(e)} {type(e)}", 0): + if not should_retry(retry, f"{str(e)} {type(e)}", 500): raise e await asyncio.sleep(random.randint(0, (retry + 1) * self.__sleep_step)) ret: List[Tuple[str, PipeMetadata]] = [] @@ -138,6 +156,11 @@ class DownloaderDummy(IDownloader): Dummy downloader for testing It doesn't download anything but return files passed in the constructor and extracts metadata from the file + + Args: + files (List[Path]): List of files to return + url (str, optional): Url to use for metadata. Defaults to None. + date (datetime, optional): Date to add to metadata. Defaults to None. """ def __init__( diff --git a/cmoncrawl/processor/pipeline/extractor.py b/cmoncrawl/processor/pipeline/extractor.py index c2b94fbd..4405fb33 100644 --- a/cmoncrawl/processor/pipeline/extractor.py +++ b/cmoncrawl/processor/pipeline/extractor.py @@ -8,12 +8,32 @@ class IExtractor(ABC): + """ + Base class for all extractors + """ + @abstractmethod def extract(self, response: str, metadata: PipeMetadata) -> Dict[str, Any] | None: + """ + Extracts the data from the response, if the extractor fails to extract the data it should return None + return None + + Args: + response (str): response from the downloader + metadata (PipeMetadata): Metadata of the response + """ raise NotImplementedError() class BaseExtractor(IExtractor, ABC): + """ + Base class for all soup extractors + + Args: + encoding (str, optional): Default encoding to be used. Defaults to None. + + """ + def __init__(self, encoding: str | None = None): self.encoding = encoding @@ -85,6 +105,9 @@ def preprocess(self, response: str, metadata: PipeMetadata) -> str: class HTMLExtractor(BaseExtractor): """ Dummy Extractor which simply extracts the html + + Args: + filter_non_ok (bool, optional): If True, only 200 status codes will be extracted. Defaults to True. """ def __init__(self, filter_non_ok: bool = True): @@ -116,7 +139,10 @@ def filter_raw(self, response: str, metadata: PipeMetadata): class DomainRecordExtractor(BaseExtractor): """ - Dummy Extractor which simply extracts the html + Dummy Extractor which simply extracts the domain record + + Args: + filter_non_ok (bool, optional): If True, only 200 status codes will be extracted. Defaults to True. """ def __init__(self, filter_non_ok: bool = True): diff --git a/cmoncrawl/processor/pipeline/router.py b/cmoncrawl/processor/pipeline/router.py index 69445317..d8001ed4 100644 --- a/cmoncrawl/processor/pipeline/router.py +++ b/cmoncrawl/processor/pipeline/router.py @@ -29,10 +29,17 @@ class Route: class IRouter(ABC): + """ + Base class for all routers + """ + @abstractmethod def route( self, url: str | None, time: datetime | None, metadata: PipeMetadata ) -> IExtractor: + """ + Routes the url to the correct extractor + """ raise NotImplementedError() @@ -56,6 +63,9 @@ def load_module(self, module_path: Path): return module, module_name def load_module_as_extractor(self, module_path: Path): + """ + Loads a module and returns its extractor + """ module, module_name = self.load_module(module_path) name: str = getattr(module, "NAME", module_name) extractor: IExtractor | None = getattr(module, "extractor", None) @@ -88,6 +98,16 @@ def register_route( since: datetime | None = None, to: datetime | None = None, ): + """ + Registers a route for a given extractor name and regex + + Args: + name (str): The name of the extractor + regex (Union[str, List[str]]): The regex to match against + since (datetime | None, optional): The earliest time to route to this extractor. Defaults to None. + to (datetime | None, optional): The latest time to route to this extractor. Defaults to None. + + """ if isinstance(regex, str): regex = [regex] regex_compiled = [re.compile(regex) for regex in regex] @@ -121,6 +141,14 @@ def _as_offset_aware(self, time: datetime) -> datetime: def route( self, url: str | None, time: datetime | None, metadata: PipeMetadata ) -> IExtractor: + """ + Routes the url to the correct extractor based on the url and time + + Args: + url (str | None): The url to route + time (datetime | None): The time to route + metadata (PipeMetadata): The metadata for the current pipeline + """ # check if offset naive datetime if so then convert to utc if url is None: raise ValueError("Url must not be None") diff --git a/docs/build/_templates/module.rst b/docs/build/_templates/module.rst deleted file mode 100644 index b10fefa4..00000000 --- a/docs/build/_templates/module.rst +++ /dev/null @@ -1,66 +0,0 @@ -{{ fullname | escape | underline}} - -.. automodule:: {{ fullname }} - - {% block attributes %} - {% if attributes %} - .. rubric:: Module attributes - - .. autosummary:: - :toctree: - {% for item in attributes %} - {{ item }} - {%- endfor %} - {% endif %} - {% endblock %} - - {% block functions %} - {% if functions %} - .. rubric:: {{ _('Functions') }} - - .. autosummary:: - :toctree: - :nosignatures: - {% for item in functions %} - {{ item }} - {%- endfor %} - {% endif %} - {% endblock %} - - {% block classes %} - {% if classes %} - .. rubric:: {{ _('Classes') }} - - .. autosummary:: - :toctree: - :template: class.rst - :nosignatures: - {% for item in classes %} - {{ item }} - {%- endfor %} - {% endif %} - {% endblock %} - - {% block exceptions %} - {% if exceptions %} - .. rubric:: {{ _('Exceptions') }} - - .. autosummary:: - :toctree: - {% for item in exceptions %} - {{ item }} - {%- endfor %} - {% endif %} - {% endblock %} - -{% block modules %} -{% if modules %} -.. autosummary:: - :toctree: - :template: module.rst - :recursive: -{% for item in modules %} - {{ item }} -{%- endfor %} -{% endif %} -{% endblock %} \ No newline at end of file diff --git a/docs/build/doctrees/api.doctree b/docs/build/doctrees/api.doctree index 95fbc4dc..7b41a2df 100644 Binary files a/docs/build/doctrees/api.doctree and b/docs/build/doctrees/api.doctree differ diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle index 4789c48b..17964d75 100644 Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ diff --git a/docs/build/doctrees/generated/Aggregator.App.doctree b/docs/build/doctrees/generated/Aggregator.App.doctree deleted file mode 100644 index 8a9d83d9..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.DomainCrawl.__init__.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.DomainCrawl.__init__.doctree deleted file mode 100644 index c4cf45ee..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.DomainCrawl.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.DomainCrawl.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.DomainCrawl.doctree deleted file mode 100644 index 4f3fd013..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.DomainCrawl.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.DomainRecord.__init__.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.DomainRecord.__init__.doctree deleted file mode 100644 index 5e1d05d9..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.DomainRecord.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.DomainRecord.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.DomainRecord.doctree deleted file mode 100644 index 83928956..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.DomainRecord.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.__init__.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.__init__.doctree deleted file mode 100644 index f5d79072..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.aclose.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.aclose.doctree deleted file mode 100644 index 147185c3..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.aclose.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.aopen.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.aopen.doctree deleted file mode 100644 index d5284fa1..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.aopen.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.doctree deleted file mode 100644 index d5991c3f..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.get_all_CC_indexes.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.get_all_CC_indexes.doctree deleted file mode 100644 index 9e20071a..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.get_all_CC_indexes.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.get_captured_responses.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.get_captured_responses.doctree deleted file mode 100644 index 5348badd..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.get_captured_responses.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.get_number_of_pages.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.get_number_of_pages.doctree deleted file mode 100644 index d36288a1..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.get_number_of_pages.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.RetrieveResponse.__init__.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.RetrieveResponse.__init__.doctree deleted file mode 100644 index eea165f4..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.RetrieveResponse.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.RetrieveResponse.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.RetrieveResponse.doctree deleted file mode 100644 index b87e2e2f..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.RetrieveResponse.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.doctree deleted file mode 100644 index ce3dfc6a..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.__init__.doctree b/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.__init__.doctree deleted file mode 100644 index 80135542..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.decode.doctree b/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.decode.doctree deleted file mode 100644 index a1cff4bd..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.decode.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.doctree b/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.doctree deleted file mode 100644 index b6370185..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.raw_decode.doctree b/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.raw_decode.doctree deleted file mode 100644 index 2aa49acf..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.raw_decode.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.doctree b/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.doctree deleted file mode 100644 index 42f88e3f..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.App.utils.doctree b/docs/build/doctrees/generated/Aggregator.App.utils.doctree deleted file mode 100644 index 5ff54744..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.App.utils.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.aggregator.doctree b/docs/build/doctrees/generated/Aggregator.aggregator.doctree deleted file mode 100644 index 48d40e59..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.aggregator.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Aggregator.doctree b/docs/build/doctrees/generated/Aggregator.doctree deleted file mode 100644 index 97f03677..00000000 Binary files a/docs/build/doctrees/generated/Aggregator.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.__init__.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.__init__.doctree deleted file mode 100644 index a7f5e39d..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.article_extract.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.article_extract.doctree deleted file mode 100644 index 40a58a2a..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.article_extract.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.check_required.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.check_required.doctree deleted file mode 100644 index 0429497b..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.check_required.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_extract.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_extract.doctree deleted file mode 100644 index ef3b1284..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_extract.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_raw.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_raw.doctree deleted file mode 100644 index 6a978cfb..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_raw.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_soup.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_soup.doctree deleted file mode 100644 index 37a94430..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_soup.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.doctree deleted file mode 100644 index 23199605..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract.doctree deleted file mode 100644 index 1f2173b4..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract_soup.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract_soup.doctree deleted file mode 100644 index ae7093bc..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract_soup.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_raw.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_raw.doctree deleted file mode 100644 index 662f5c20..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_raw.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_soup.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_soup.doctree deleted file mode 100644 index b0d28d18..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_soup.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.preprocess.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.preprocess.doctree deleted file mode 100644 index 9b1f7bf5..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.preprocess.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.doctree deleted file mode 100644 index 38346ba6..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_utils.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_utils.doctree deleted file mode 100644 index 66f2929f..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_utils.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.doctree deleted file mode 100644 index b6e15cf9..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.doctree deleted file mode 100644 index 1be21036..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.Downloader.__init__.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.Downloader.__init__.doctree deleted file mode 100644 index 441b50c4..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.Downloader.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.Downloader.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.Downloader.doctree deleted file mode 100644 index 4a420c04..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.Downloader.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.Downloader.download.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.Downloader.download.doctree deleted file mode 100644 index eb5bc1eb..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.Downloader.download.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.__init__.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.__init__.doctree deleted file mode 100644 index 8d2c7344..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.aclose.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.aclose.doctree deleted file mode 100644 index fc8ac10d..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.aclose.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.aopen.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.aopen.doctree deleted file mode 100644 index 475aa448..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.aopen.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.doctree deleted file mode 100644 index dc834f76..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.download.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.download.doctree deleted file mode 100644 index 5549854e..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.download.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.unwrap.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.unwrap.doctree deleted file mode 100644 index 360bf494..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.unwrap.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.doctree deleted file mode 100644 index 7296ad12..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.__init__.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.__init__.doctree deleted file mode 100644 index c3de8472..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.doctree deleted file mode 100644 index b1911f6c..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.download.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.download.doctree deleted file mode 100644 index 3ff8ed54..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.download.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_url.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_url.doctree deleted file mode 100644 index 26b61be1..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_url.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_year.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_year.doctree deleted file mode 100644 index cfbd32f8..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_year.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.mine_metadata.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.mine_metadata.doctree deleted file mode 100644 index 4ee001d2..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.mine_metadata.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.doctree deleted file mode 100644 index 1756590b..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.doctree deleted file mode 100644 index c20dbe79..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.__init__.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.__init__.doctree deleted file mode 100644 index f38c1ede..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.doctree deleted file mode 100644 index 396e935a..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract.doctree deleted file mode 100644 index 275321c3..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract_soup.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract_soup.doctree deleted file mode 100644 index 67dfacbb..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract_soup.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_raw.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_raw.doctree deleted file mode 100644 index 6b9a0f18..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_raw.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_soup.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_soup.doctree deleted file mode 100644 index 9e06f2df..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_soup.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.preprocess.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.preprocess.doctree deleted file mode 100644 index 4fe1f96b..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.preprocess.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.doctree deleted file mode 100644 index 52ea35a9..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.__init__.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.__init__.doctree deleted file mode 100644 index a4a76ccb..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.doctree deleted file mode 100644 index 34acbe56..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.extract.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.extract.doctree deleted file mode 100644 index 2f5eee7b..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.extract.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.extract_soup.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.extract_soup.doctree deleted file mode 100644 index ddb17171..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.extract_soup.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_raw.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_raw.doctree deleted file mode 100644 index 6bc85c78..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_raw.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_soup.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_soup.doctree deleted file mode 100644 index a21f0a8a..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_soup.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.preprocess.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.preprocess.doctree deleted file mode 100644 index 77c26130..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.preprocess.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.extractor.doctree deleted file mode 100644 index e1826168..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.extractor_utils.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.extractor_utils.doctree deleted file mode 100644 index fb99fee5..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.extractor_utils.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.doctree deleted file mode 100644 index b76245e1..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.__init__.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.__init__.doctree deleted file mode 100644 index ffd7567d..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.clean_up.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.clean_up.doctree deleted file mode 100644 index 197a910a..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.clean_up.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.doctree deleted file mode 100644 index 59d1a0fd..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.stream.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.stream.doctree deleted file mode 100644 index f052a232..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.stream.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.doctree deleted file mode 100644 index b7505f89..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.__init__.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.__init__.doctree deleted file mode 100644 index ff149b6d..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.clean_up.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.clean_up.doctree deleted file mode 100644 index ff4cd35c..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.clean_up.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.doctree deleted file mode 100644 index 2c73c738..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.stream.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.stream.doctree deleted file mode 100644 index 256048bf..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.stream.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.doctree deleted file mode 100644 index 16675dcb..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.__init__.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.__init__.doctree deleted file mode 100644 index 47cbf438..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.clean_up.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.clean_up.doctree deleted file mode 100644 index 1d6a449f..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.clean_up.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.doctree deleted file mode 100644 index f5e24d68..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.get_file_name.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.get_file_name.doctree deleted file mode 100644 index 041a1771..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.get_file_name.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.metadata_to_string.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.metadata_to_string.doctree deleted file mode 100644 index 082285bc..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.metadata_to_string.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.stream.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.stream.doctree deleted file mode 100644 index da5e7065..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.stream.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.__init__.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.__init__.doctree deleted file mode 100644 index a96eb078..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.clean_up.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.clean_up.doctree deleted file mode 100644 index 803da1c2..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.clean_up.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.doctree deleted file mode 100644 index f98a4e89..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.get_file_name.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.get_file_name.doctree deleted file mode 100644 index 94b85f29..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.get_file_name.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.metadata_to_string.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.metadata_to_string.doctree deleted file mode 100644 index e235b2e7..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.metadata_to_string.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.stream.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.stream.doctree deleted file mode 100644 index b46ce5d4..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.stream.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.__init__.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.__init__.doctree deleted file mode 100644 index 24861d2b..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.clean_up.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.clean_up.doctree deleted file mode 100644 index 84f81b9b..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.clean_up.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.doctree deleted file mode 100644 index e6b71a0e..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.get_file_name.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.get_file_name.doctree deleted file mode 100644 index 7b261d9e..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.get_file_name.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.metadata_to_string.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.metadata_to_string.doctree deleted file mode 100644 index 42be412d..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.metadata_to_string.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.stream.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.stream.doctree deleted file mode 100644 index 9810a4ff..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.stream.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.doctree deleted file mode 100644 index c0bca8f6..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Pipeline.doctree b/docs/build/doctrees/generated/Processor.App.Pipeline.doctree deleted file mode 100644 index 08915d93..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Pipeline.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.__init__.doctree b/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.__init__.doctree deleted file mode 100644 index 277eb958..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.doctree b/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.doctree deleted file mode 100644 index 67e3b0e6..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.process_domain_record.doctree b/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.process_domain_record.doctree deleted file mode 100644 index d77a3264..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.process_domain_record.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.doctree b/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.doctree deleted file mode 100644 index cc6a0c76..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Router.doctree b/docs/build/doctrees/generated/Processor.App.Router.doctree deleted file mode 100644 index fe17917f..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Router.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.Route.__init__.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.Route.__init__.doctree deleted file mode 100644 index 42ea51e0..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.Route.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.Route.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.Route.doctree deleted file mode 100644 index dc3dcb4b..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.Route.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.Router.__init__.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.Router.__init__.doctree deleted file mode 100644 index 8a2b0c24..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.Router.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.Router.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.Router.doctree deleted file mode 100644 index bde97845..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.Router.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.Router.load_module.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.Router.load_module.doctree deleted file mode 100644 index b3d225d0..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.Router.load_module.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.Router.load_modules.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.Router.load_modules.doctree deleted file mode 100644 index 371e2087..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.Router.load_modules.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.Router.register_route.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.Router.register_route.doctree deleted file mode 100644 index 4a1f1184..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.Router.register_route.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.Router.register_routes.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.Router.register_routes.doctree deleted file mode 100644 index 87072183..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.Router.register_routes.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.Router.route.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.Router.route.doctree deleted file mode 100644 index 63d1e5c0..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.Router.route.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.doctree deleted file mode 100644 index 7aa3ad47..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.doctree b/docs/build/doctrees/generated/Processor.App.doctree deleted file mode 100644 index 0d9c9c84..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.processor_utils.DomainRecord.__init__.doctree b/docs/build/doctrees/generated/Processor.App.processor_utils.DomainRecord.__init__.doctree deleted file mode 100644 index 1619f359..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.processor_utils.DomainRecord.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.processor_utils.DomainRecord.doctree b/docs/build/doctrees/generated/Processor.App.processor_utils.DomainRecord.doctree deleted file mode 100644 index aa81adcf..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.processor_utils.DomainRecord.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.processor_utils.PipeMetadata.__init__.doctree b/docs/build/doctrees/generated/Processor.App.processor_utils.PipeMetadata.__init__.doctree deleted file mode 100644 index 27abae50..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.processor_utils.PipeMetadata.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.processor_utils.PipeMetadata.doctree b/docs/build/doctrees/generated/Processor.App.processor_utils.PipeMetadata.doctree deleted file mode 100644 index 4ed8a9b6..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.processor_utils.PipeMetadata.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.App.processor_utils.doctree b/docs/build/doctrees/generated/Processor.App.processor_utils.doctree deleted file mode 100644 index 20a5deb2..00000000 Binary files a/docs/build/doctrees/generated/Processor.App.processor_utils.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.doctree b/docs/build/doctrees/generated/Processor.doctree deleted file mode 100644 index 7829de79..00000000 Binary files a/docs/build/doctrees/generated/Processor.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.process_article.doctree b/docs/build/doctrees/generated/Processor.process_article.doctree deleted file mode 100644 index 541faf14..00000000 Binary files a/docs/build/doctrees/generated/Processor.process_article.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.__init__.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.__init__.doctree deleted file mode 100644 index 15c2ea3b..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.doctree deleted file mode 100644 index 71dcd299..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_before_message.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_before_message.doctree deleted file mode 100644 index 65480dbd..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_before_message.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_connected.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_connected.doctree deleted file mode 100644 index 0548d732..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_connected.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_connecting.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_connecting.doctree deleted file mode 100644 index ea14ea3a..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_connecting.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_disconnected.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_disconnected.doctree deleted file mode 100644 index 229773a1..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_disconnected.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_disconnecting.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_disconnecting.doctree deleted file mode 100644 index 901aed43..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_disconnecting.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_error.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_error.doctree deleted file mode 100644 index 9a89bd63..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_error.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_heartbeat.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_heartbeat.doctree deleted file mode 100644 index 515d6b6c..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_heartbeat.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_heartbeat_timeout.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_heartbeat_timeout.doctree deleted file mode 100644 index 6cdf623e..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_heartbeat_timeout.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_message.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_message.doctree deleted file mode 100644 index 2ff64cc0..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_message.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_receipt.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_receipt.doctree deleted file mode 100644 index ea7fc41b..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_receipt.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_receiver_loop_completed.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_receiver_loop_completed.doctree deleted file mode 100644 index 96857c8e..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_receiver_loop_completed.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_send.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_send.doctree deleted file mode 100644 index 68ac1b4d..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_send.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.ListnerStats.__init__.doctree b/docs/build/doctrees/generated/Processor.processor.ListnerStats.__init__.doctree deleted file mode 100644 index 38ac0c49..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.ListnerStats.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.ListnerStats.doctree b/docs/build/doctrees/generated/Processor.processor.ListnerStats.doctree deleted file mode 100644 index e2aa4cbd..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.ListnerStats.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.Message.__init__.doctree b/docs/build/doctrees/generated/Processor.processor.Message.__init__.doctree deleted file mode 100644 index 57d734d8..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.Message.__init__.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.Message.doctree b/docs/build/doctrees/generated/Processor.processor.Message.doctree deleted file mode 100644 index a5db1259..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.Message.doctree and /dev/null differ diff --git a/docs/build/doctrees/generated/Processor.processor.doctree b/docs/build/doctrees/generated/Processor.processor.doctree deleted file mode 100644 index 15cf4376..00000000 Binary files a/docs/build/doctrees/generated/Processor.processor.doctree and /dev/null differ diff --git a/docs/build/doctrees/index.doctree b/docs/build/doctrees/index.doctree index bdb9102b..68f64384 100644 Binary files a/docs/build/doctrees/index.doctree and b/docs/build/doctrees/index.doctree differ diff --git a/docs/build/doctrees/installation.doctree b/docs/build/doctrees/installation.doctree deleted file mode 100644 index 234c09af..00000000 Binary files a/docs/build/doctrees/installation.doctree and /dev/null differ diff --git a/docs/build/doctrees/quickstart/artemis-queue.doctree b/docs/build/doctrees/quickstart/artemis-queue.doctree deleted file mode 100644 index e05e3710..00000000 Binary files a/docs/build/doctrees/quickstart/artemis-queue.doctree and /dev/null differ diff --git a/docs/build/doctrees/quickstart/download_article.doctree b/docs/build/doctrees/quickstart/download_article.doctree deleted file mode 100644 index a5a2b00b..00000000 Binary files a/docs/build/doctrees/quickstart/download_article.doctree and /dev/null differ diff --git a/docs/build/doctrees/quickstart/index.doctree b/docs/build/doctrees/quickstart/index.doctree deleted file mode 100644 index 86cb124d..00000000 Binary files a/docs/build/doctrees/quickstart/index.doctree and /dev/null differ diff --git a/docs/build/doctrees/quickstart/installation.doctree b/docs/build/doctrees/quickstart/installation.doctree deleted file mode 100644 index 33fe40fe..00000000 Binary files a/docs/build/doctrees/quickstart/installation.doctree and /dev/null differ diff --git a/docs/build/doctrees/quickstart/middleware.doctree b/docs/build/doctrees/quickstart/middleware.doctree deleted file mode 100644 index 28f501cb..00000000 Binary files a/docs/build/doctrees/quickstart/middleware.doctree and /dev/null differ diff --git a/docs/build/doctrees/quickstart/overview.doctree b/docs/build/doctrees/quickstart/overview.doctree deleted file mode 100644 index 34dd3019..00000000 Binary files a/docs/build/doctrees/quickstart/overview.doctree and /dev/null differ diff --git a/docs/build/doctrees/quickstart/quick-start.doctree b/docs/build/doctrees/quickstart/quick-start.doctree deleted file mode 100644 index 58ae2918..00000000 Binary files a/docs/build/doctrees/quickstart/quick-start.doctree and /dev/null differ diff --git a/docs/build/html/.buildinfo b/docs/build/html/.buildinfo index ed36e0d1..251c030d 100644 --- a/docs/build/html/.buildinfo +++ b/docs/build/html/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: f83e2738440146eb7aee758ea06be062 +config: 455dcb0e361cb666a31520aa4f46b2fc tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/docs/build/html/_sources/api.rst.txt b/docs/build/html/_sources/api.rst.txt index 76c4a0e7..2ed3eeb5 100644 --- a/docs/build/html/_sources/api.rst.txt +++ b/docs/build/html/_sources/api.rst.txt @@ -6,8 +6,7 @@ API :toctree: generated - Aggregator - Processor + cmoncrawl diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainCrawl.__init__.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainCrawl.__init__.rst.txt deleted file mode 100644 index 85e4d70b..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainCrawl.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Aggregator.App.index\_query.DomainCrawl.\_\_init\_\_ -==================================================== - -.. currentmodule:: Aggregator.App.index_query - -.. automethod:: DomainCrawl.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainCrawl.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainCrawl.rst.txt deleted file mode 100644 index f72f8295..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainCrawl.rst.txt +++ /dev/null @@ -1,32 +0,0 @@ -Aggregator.App.index\_query.DomainCrawl -======================================= - -.. currentmodule:: Aggregator.App.index_query - -.. autoclass:: DomainCrawl - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~DomainCrawl.__init__ - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~DomainCrawl.cdx_server - ~DomainCrawl.domain - ~DomainCrawl.page - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainRecord.__init__.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainRecord.__init__.rst.txt deleted file mode 100644 index ad2f70fa..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainRecord.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Aggregator.App.index\_query.DomainRecord.\_\_init\_\_ -===================================================== - -.. currentmodule:: Aggregator.App.index_query - -.. automethod:: DomainRecord.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainRecord.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainRecord.rst.txt deleted file mode 100644 index 15196f89..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainRecord.rst.txt +++ /dev/null @@ -1,36 +0,0 @@ -Aggregator.App.index\_query.DomainRecord -======================================== - -.. currentmodule:: Aggregator.App.index_query - -.. autoclass:: DomainRecord - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~DomainRecord.__init__ - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~DomainRecord.digest - ~DomainRecord.encoding - ~DomainRecord.timestamp - ~DomainRecord.filename - ~DomainRecord.url - ~DomainRecord.offset - ~DomainRecord.length - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.__init__.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.__init__.rst.txt deleted file mode 100644 index 9bf74300..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Aggregator.App.index\_query.IndexAggregator.\_\_init\_\_ -======================================================== - -.. currentmodule:: Aggregator.App.index_query - -.. automethod:: IndexAggregator.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.aclose.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.aclose.rst.txt deleted file mode 100644 index 3de6bcdf..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.aclose.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Aggregator.App.index\_query.IndexAggregator.aclose -================================================== - -.. currentmodule:: Aggregator.App.index_query - -.. automethod:: IndexAggregator.aclose \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.aopen.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.aopen.rst.txt deleted file mode 100644 index f62b5568..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.aopen.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Aggregator.App.index\_query.IndexAggregator.aopen -================================================= - -.. currentmodule:: Aggregator.App.index_query - -.. automethod:: IndexAggregator.aopen \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.get_all_CC_indexes.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.get_all_CC_indexes.rst.txt deleted file mode 100644 index 49c1a573..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.get_all_CC_indexes.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Aggregator.App.index\_query.IndexAggregator.get\_all\_CC\_indexes -================================================================= - -.. currentmodule:: Aggregator.App.index_query - -.. automethod:: IndexAggregator.get_all_CC_indexes \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.get_captured_responses.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.get_captured_responses.rst.txt deleted file mode 100644 index 61a404c4..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.get_captured_responses.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Aggregator.App.index\_query.IndexAggregator.get\_captured\_responses -==================================================================== - -.. currentmodule:: Aggregator.App.index_query - -.. automethod:: IndexAggregator.get_captured_responses \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.get_number_of_pages.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.get_number_of_pages.rst.txt deleted file mode 100644 index 2a2427bd..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.get_number_of_pages.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Aggregator.App.index\_query.IndexAggregator.get\_number\_of\_pages -================================================================== - -.. currentmodule:: Aggregator.App.index_query - -.. automethod:: IndexAggregator.get_number_of_pages \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.rst.txt deleted file mode 100644 index 33ebd99b..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.rst.txt +++ /dev/null @@ -1,29 +0,0 @@ -Aggregator.App.index\_query.IndexAggregator -=========================================== - -.. currentmodule:: Aggregator.App.index_query - -.. autoclass:: IndexAggregator - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~IndexAggregator.__init__ - ~IndexAggregator.aclose - ~IndexAggregator.aopen - ~IndexAggregator.get_all_CC_indexes - ~IndexAggregator.get_captured_responses - ~IndexAggregator.get_number_of_pages - - - - - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.RetrieveResponse.__init__.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.RetrieveResponse.__init__.rst.txt deleted file mode 100644 index 90655f25..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.index_query.RetrieveResponse.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Aggregator.App.index\_query.RetrieveResponse.\_\_init\_\_ -========================================================= - -.. currentmodule:: Aggregator.App.index_query - -.. automethod:: RetrieveResponse.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.RetrieveResponse.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.RetrieveResponse.rst.txt deleted file mode 100644 index 49c3fbaa..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.index_query.RetrieveResponse.rst.txt +++ /dev/null @@ -1,32 +0,0 @@ -Aggregator.App.index\_query.RetrieveResponse -============================================ - -.. currentmodule:: Aggregator.App.index_query - -.. autoclass:: RetrieveResponse - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~RetrieveResponse.__init__ - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~RetrieveResponse.status - ~RetrieveResponse.content - ~RetrieveResponse.reason - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.rst.txt deleted file mode 100644 index 21d0ce61..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.index_query.rst.txt +++ /dev/null @@ -1,41 +0,0 @@ -Aggregator.App.index\_query -=========================== - -.. automodule:: Aggregator.App.index_query - - - - - - - - .. rubric:: Functions - - .. autosummary:: - - crawl_to_year - timestamp_to_datetime - to_timestamp_format - - - - - - .. rubric:: Classes - - .. autosummary:: - :toctree: - - DomainCrawl - DomainRecord - IndexAggregator - RetrieveResponse - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.__init__.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.__init__.rst.txt deleted file mode 100644 index 9519a827..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Aggregator.App.ndjson\_decoder.Decoder.\_\_init\_\_ -=================================================== - -.. currentmodule:: Aggregator.App.ndjson_decoder - -.. automethod:: Decoder.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.decode.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.decode.rst.txt deleted file mode 100644 index 347d669d..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.decode.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Aggregator.App.ndjson\_decoder.Decoder.decode -============================================= - -.. currentmodule:: Aggregator.App.ndjson_decoder - -.. automethod:: Decoder.decode \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.raw_decode.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.raw_decode.rst.txt deleted file mode 100644 index d8582fe2..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.raw_decode.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Aggregator.App.ndjson\_decoder.Decoder.raw\_decode -================================================== - -.. currentmodule:: Aggregator.App.ndjson_decoder - -.. automethod:: Decoder.raw_decode \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.rst.txt deleted file mode 100644 index 38606e34..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.rst.txt +++ /dev/null @@ -1,26 +0,0 @@ -Aggregator.App.ndjson\_decoder.Decoder -====================================== - -.. currentmodule:: Aggregator.App.ndjson_decoder - -.. autoclass:: Decoder - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~Decoder.__init__ - ~Decoder.decode - ~Decoder.raw_decode - - - - - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.rst.txt deleted file mode 100644 index e23af3d3..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.rst.txt +++ /dev/null @@ -1,30 +0,0 @@ -Aggregator.App.ndjson\_decoder -============================== - -.. automodule:: Aggregator.App.ndjson_decoder - - - - - - - - - - - - .. rubric:: Classes - - .. autosummary:: - :toctree: - - Decoder - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Aggregator.App.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.rst.txt deleted file mode 100644 index d895b720..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.rst.txt +++ /dev/null @@ -1,33 +0,0 @@ -Aggregator.App -============== - -.. automodule:: Aggregator.App - - - - - - - - - - - - - - - - - - - -.. rubric:: Modules - -.. autosummary:: - :toctree: - :recursive: - - Aggregator.App.index_query - Aggregator.App.ndjson_decoder - Aggregator.App.utils - diff --git a/docs/build/html/_sources/generated/Aggregator.App.utils.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.utils.rst.txt deleted file mode 100644 index 98dabc43..00000000 --- a/docs/build/html/_sources/generated/Aggregator.App.utils.rst.txt +++ /dev/null @@ -1,23 +0,0 @@ -Aggregator.App.utils -==================== - -.. automodule:: Aggregator.App.utils - - - - - - - - - - - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Aggregator.aggregator.rst.txt b/docs/build/html/_sources/generated/Aggregator.aggregator.rst.txt deleted file mode 100644 index a181dc26..00000000 --- a/docs/build/html/_sources/generated/Aggregator.aggregator.rst.txt +++ /dev/null @@ -1,31 +0,0 @@ -Aggregator.aggregator -===================== - -.. automodule:: Aggregator.aggregator - - - - - - - - .. rubric:: Functions - - .. autosummary:: - - aggregate - init_connection - unify_url_id - - - - - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Aggregator.rst.txt b/docs/build/html/_sources/generated/Aggregator.rst.txt deleted file mode 100644 index 32185271..00000000 --- a/docs/build/html/_sources/generated/Aggregator.rst.txt +++ /dev/null @@ -1,32 +0,0 @@ -Aggregator -========== - -.. automodule:: Aggregator - - - - - - - - - - - - - - - - - - - -.. rubric:: Modules - -.. autosummary:: - :toctree: - :recursive: - - Aggregator.App - Aggregator.aggregator - diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.__init__.rst.txt deleted file mode 100644 index 0249c1c4..00000000 --- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.\_\_init\_\_ -=========================================================================== - -.. currentmodule:: Processor.App.ArticleUtils.article_extractor - -.. automethod:: ArticleExtractor.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.article_extract.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.article_extract.rst.txt deleted file mode 100644 index 597704ce..00000000 --- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.article_extract.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.article\_extract -=============================================================================== - -.. currentmodule:: Processor.App.ArticleUtils.article_extractor - -.. automethod:: ArticleExtractor.article_extract \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.check_required.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.check_required.rst.txt deleted file mode 100644 index c3b3734c..00000000 --- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.check_required.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.check\_required -============================================================================== - -.. currentmodule:: Processor.App.ArticleUtils.article_extractor - -.. automethod:: ArticleExtractor.check_required \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_extract.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_extract.rst.txt deleted file mode 100644 index abd8be1c..00000000 --- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_extract.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.custom\_extract -============================================================================== - -.. currentmodule:: Processor.App.ArticleUtils.article_extractor - -.. automethod:: ArticleExtractor.custom_extract \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_raw.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_raw.rst.txt deleted file mode 100644 index ad21e89b..00000000 --- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_raw.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.custom\_filter\_raw -================================================================================== - -.. currentmodule:: Processor.App.ArticleUtils.article_extractor - -.. automethod:: ArticleExtractor.custom_filter_raw \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_soup.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_soup.rst.txt deleted file mode 100644 index 046ed0bd..00000000 --- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_soup.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.custom\_filter\_soup -=================================================================================== - -.. currentmodule:: Processor.App.ArticleUtils.article_extractor - -.. automethod:: ArticleExtractor.custom_filter_soup \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract.rst.txt deleted file mode 100644 index 7c40a2f9..00000000 --- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.extract -====================================================================== - -.. currentmodule:: Processor.App.ArticleUtils.article_extractor - -.. automethod:: ArticleExtractor.extract \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract_soup.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract_soup.rst.txt deleted file mode 100644 index 0b613f02..00000000 --- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract_soup.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.extract\_soup -============================================================================ - -.. currentmodule:: Processor.App.ArticleUtils.article_extractor - -.. automethod:: ArticleExtractor.extract_soup \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_raw.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_raw.rst.txt deleted file mode 100644 index 932fc5f9..00000000 --- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_raw.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.filter\_raw -========================================================================== - -.. currentmodule:: Processor.App.ArticleUtils.article_extractor - -.. automethod:: ArticleExtractor.filter_raw \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_soup.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_soup.rst.txt deleted file mode 100644 index e39b8e4e..00000000 --- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_soup.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.filter\_soup -=========================================================================== - -.. currentmodule:: Processor.App.ArticleUtils.article_extractor - -.. automethod:: ArticleExtractor.filter_soup \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.preprocess.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.preprocess.rst.txt deleted file mode 100644 index 19d01c84..00000000 --- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.preprocess.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.preprocess -========================================================================= - -.. currentmodule:: Processor.App.ArticleUtils.article_extractor - -.. automethod:: ArticleExtractor.preprocess \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.rst.txt deleted file mode 100644 index 835252a0..00000000 --- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.rst.txt +++ /dev/null @@ -1,42 +0,0 @@ -Processor.App.ArticleUtils.article\_extractor.ArticleExtractor -============================================================== - -.. currentmodule:: Processor.App.ArticleUtils.article_extractor - -.. autoclass:: ArticleExtractor - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~ArticleExtractor.__init__ - ~ArticleExtractor.article_extract - ~ArticleExtractor.check_required - ~ArticleExtractor.custom_extract - ~ArticleExtractor.custom_filter_raw - ~ArticleExtractor.custom_filter_soup - ~ArticleExtractor.extract - ~ArticleExtractor.extract_soup - ~ArticleExtractor.filter_raw - ~ArticleExtractor.filter_soup - ~ArticleExtractor.preprocess - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~ArticleExtractor.ENCODING - ~ArticleExtractor.SINCE - ~ArticleExtractor.TO - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.rst.txt deleted file mode 100644 index bb03ee57..00000000 --- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.rst.txt +++ /dev/null @@ -1,30 +0,0 @@ -Processor.App.ArticleUtils.article\_extractor -============================================= - -.. automodule:: Processor.App.ArticleUtils.article_extractor - - - - - - - - - - - - .. rubric:: Classes - - .. autosummary:: - :toctree: - - ArticleExtractor - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_utils.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_utils.rst.txt deleted file mode 100644 index 6b1370a8..00000000 --- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_utils.rst.txt +++ /dev/null @@ -1,45 +0,0 @@ -Processor.App.ArticleUtils.article\_utils -========================================= - -.. automodule:: Processor.App.ArticleUtils.article_utils - - - - - - - - .. rubric:: Functions - - .. autosummary:: - - article_content_transform - author_transform - brief_transform - category_transform - comments_num_transform - cz_date_transform - date_complex_extract - format_date_transform - headline_transform - iso_date_transform - keywords_transform - must_exist_filter - must_not_exist_filter - remove_day_transform - text_unification_transform - text_unifications_transform - url_category_transform - - - - - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.rst.txt deleted file mode 100644 index 6a0e4336..00000000 --- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.rst.txt +++ /dev/null @@ -1,32 +0,0 @@ -Processor.App.ArticleUtils -========================== - -.. automodule:: Processor.App.ArticleUtils - - - - - - - - - - - - - - - - - - - -.. rubric:: Modules - -.. autosummary:: - :toctree: - :recursive: - - Processor.App.ArticleUtils.article_extractor - Processor.App.ArticleUtils.article_utils - diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.Downloader.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.Downloader.__init__.rst.txt deleted file mode 100644 index ea9c52eb..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.Downloader.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Downloader.downloader.Downloader.\_\_init\_\_ -=========================================================== - -.. currentmodule:: Processor.App.Downloader.downloader - -.. automethod:: Downloader.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.Downloader.download.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.Downloader.download.rst.txt deleted file mode 100644 index 9762af81..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.Downloader.download.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Downloader.downloader.Downloader.download -======================================================= - -.. currentmodule:: Processor.App.Downloader.downloader - -.. automethod:: Downloader.download \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.Downloader.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.Downloader.rst.txt deleted file mode 100644 index 0e2076a2..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.Downloader.rst.txt +++ /dev/null @@ -1,25 +0,0 @@ -Processor.App.Downloader.downloader.Downloader -============================================== - -.. currentmodule:: Processor.App.Downloader.downloader - -.. autoclass:: Downloader - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~Downloader.__init__ - ~Downloader.download - - - - - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.__init__.rst.txt deleted file mode 100644 index c7c7c660..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Downloader.downloader.DownloaderFull.\_\_init\_\_ -=============================================================== - -.. currentmodule:: Processor.App.Downloader.downloader - -.. automethod:: DownloaderFull.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.aclose.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.aclose.rst.txt deleted file mode 100644 index 829f1b2d..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.aclose.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Downloader.downloader.DownloaderFull.aclose -========================================================= - -.. currentmodule:: Processor.App.Downloader.downloader - -.. automethod:: DownloaderFull.aclose \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.aopen.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.aopen.rst.txt deleted file mode 100644 index b5086136..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.aopen.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Downloader.downloader.DownloaderFull.aopen -======================================================== - -.. currentmodule:: Processor.App.Downloader.downloader - -.. automethod:: DownloaderFull.aopen \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.download.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.download.rst.txt deleted file mode 100644 index ef0762f8..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.download.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Downloader.downloader.DownloaderFull.download -=========================================================== - -.. currentmodule:: Processor.App.Downloader.downloader - -.. automethod:: DownloaderFull.download \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.rst.txt deleted file mode 100644 index e52f8a01..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.rst.txt +++ /dev/null @@ -1,28 +0,0 @@ -Processor.App.Downloader.downloader.DownloaderFull -================================================== - -.. currentmodule:: Processor.App.Downloader.downloader - -.. autoclass:: DownloaderFull - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~DownloaderFull.__init__ - ~DownloaderFull.aclose - ~DownloaderFull.aopen - ~DownloaderFull.download - ~DownloaderFull.unwrap - - - - - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.unwrap.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.unwrap.rst.txt deleted file mode 100644 index 3aa67a89..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.unwrap.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Downloader.downloader.DownloaderFull.unwrap -========================================================= - -.. currentmodule:: Processor.App.Downloader.downloader - -.. automethod:: DownloaderFull.unwrap \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.rst.txt deleted file mode 100644 index d6740402..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.rst.txt +++ /dev/null @@ -1,31 +0,0 @@ -Processor.App.Downloader.downloader -=================================== - -.. automodule:: Processor.App.Downloader.downloader - - - - - - - - - - - - .. rubric:: Classes - - .. autosummary:: - :toctree: - - Downloader - DownloaderFull - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.__init__.rst.txt deleted file mode 100644 index f7e15cdd..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Downloader.dummy\_downloader.DownloaderDummy.\_\_init\_\_ -======================================================================= - -.. currentmodule:: Processor.App.Downloader.dummy_downloader - -.. automethod:: DownloaderDummy.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.download.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.download.rst.txt deleted file mode 100644 index a83e022e..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.download.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Downloader.dummy\_downloader.DownloaderDummy.download -=================================================================== - -.. currentmodule:: Processor.App.Downloader.dummy_downloader - -.. automethod:: DownloaderDummy.download \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_url.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_url.rst.txt deleted file mode 100644 index 325a504c..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_url.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Downloader.dummy\_downloader.DownloaderDummy.extract\_url -======================================================================= - -.. currentmodule:: Processor.App.Downloader.dummy_downloader - -.. automethod:: DownloaderDummy.extract_url \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_year.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_year.rst.txt deleted file mode 100644 index d9d7db93..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_year.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Downloader.dummy\_downloader.DownloaderDummy.extract\_year -======================================================================== - -.. currentmodule:: Processor.App.Downloader.dummy_downloader - -.. automethod:: DownloaderDummy.extract_year \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.mine_metadata.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.mine_metadata.rst.txt deleted file mode 100644 index 9ec464b6..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.mine_metadata.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Downloader.dummy\_downloader.DownloaderDummy.mine\_metadata -========================================================================= - -.. currentmodule:: Processor.App.Downloader.dummy_downloader - -.. automethod:: DownloaderDummy.mine_metadata \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.rst.txt deleted file mode 100644 index fdc29bc9..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.rst.txt +++ /dev/null @@ -1,28 +0,0 @@ -Processor.App.Downloader.dummy\_downloader.DownloaderDummy -========================================================== - -.. currentmodule:: Processor.App.Downloader.dummy_downloader - -.. autoclass:: DownloaderDummy - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~DownloaderDummy.__init__ - ~DownloaderDummy.download - ~DownloaderDummy.extract_url - ~DownloaderDummy.extract_year - ~DownloaderDummy.mine_metadata - - - - - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.rst.txt deleted file mode 100644 index 05782b33..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.rst.txt +++ /dev/null @@ -1,30 +0,0 @@ -Processor.App.Downloader.dummy\_downloader -========================================== - -.. automodule:: Processor.App.Downloader.dummy_downloader - - - - - - - - - - - - .. rubric:: Classes - - .. autosummary:: - :toctree: - - DownloaderDummy - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.rst.txt deleted file mode 100644 index f3f6bf57..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Downloader.rst.txt +++ /dev/null @@ -1,32 +0,0 @@ -Processor.App.Downloader -======================== - -.. automodule:: Processor.App.Downloader - - - - - - - - - - - - - - - - - - - -.. rubric:: Modules - -.. autosummary:: - :toctree: - :recursive: - - Processor.App.Downloader.downloader - Processor.App.Downloader.dummy_downloader - diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.__init__.rst.txt deleted file mode 100644 index 722a74df..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Extractor.dummy\_extractor.Extractor.\_\_init\_\_ -=============================================================== - -.. currentmodule:: Processor.App.Extractor.dummy_extractor - -.. automethod:: Extractor.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract.rst.txt deleted file mode 100644 index 020a42ef..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Extractor.dummy\_extractor.Extractor.extract -========================================================== - -.. currentmodule:: Processor.App.Extractor.dummy_extractor - -.. automethod:: Extractor.extract \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract_soup.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract_soup.rst.txt deleted file mode 100644 index d63c6eae..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract_soup.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Extractor.dummy\_extractor.Extractor.extract\_soup -================================================================ - -.. currentmodule:: Processor.App.Extractor.dummy_extractor - -.. automethod:: Extractor.extract_soup \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_raw.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_raw.rst.txt deleted file mode 100644 index 9dee323f..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_raw.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Extractor.dummy\_extractor.Extractor.filter\_raw -============================================================== - -.. currentmodule:: Processor.App.Extractor.dummy_extractor - -.. automethod:: Extractor.filter_raw \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_soup.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_soup.rst.txt deleted file mode 100644 index 45f63d7f..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_soup.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Extractor.dummy\_extractor.Extractor.filter\_soup -=============================================================== - -.. currentmodule:: Processor.App.Extractor.dummy_extractor - -.. automethod:: Extractor.filter_soup \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.preprocess.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.preprocess.rst.txt deleted file mode 100644 index 75d1ef77..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.preprocess.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Extractor.dummy\_extractor.Extractor.preprocess -============================================================= - -.. currentmodule:: Processor.App.Extractor.dummy_extractor - -.. automethod:: Extractor.preprocess \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.rst.txt deleted file mode 100644 index ed37ef1b..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.rst.txt +++ /dev/null @@ -1,37 +0,0 @@ -Processor.App.Extractor.dummy\_extractor.Extractor -================================================== - -.. currentmodule:: Processor.App.Extractor.dummy_extractor - -.. autoclass:: Extractor - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~Extractor.__init__ - ~Extractor.extract - ~Extractor.extract_soup - ~Extractor.filter_raw - ~Extractor.filter_soup - ~Extractor.preprocess - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~Extractor.ENCODING - ~Extractor.SINCE - ~Extractor.TO - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.rst.txt deleted file mode 100644 index 9d0a2a94..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.rst.txt +++ /dev/null @@ -1,30 +0,0 @@ -Processor.App.Extractor.dummy\_extractor -======================================== - -.. automodule:: Processor.App.Extractor.dummy_extractor - - - - - - - - - - - - .. rubric:: Classes - - .. autosummary:: - :toctree: - - Extractor - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.__init__.rst.txt deleted file mode 100644 index e26ae44c..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Extractor.extractor.BaseExtractor.\_\_init\_\_ -============================================================ - -.. currentmodule:: Processor.App.Extractor.extractor - -.. automethod:: BaseExtractor.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.extract.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.extract.rst.txt deleted file mode 100644 index 498e1eef..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.extract.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Extractor.extractor.BaseExtractor.extract -======================================================= - -.. currentmodule:: Processor.App.Extractor.extractor - -.. automethod:: BaseExtractor.extract \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.extract_soup.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.extract_soup.rst.txt deleted file mode 100644 index f843eff5..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.extract_soup.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Extractor.extractor.BaseExtractor.extract\_soup -============================================================= - -.. currentmodule:: Processor.App.Extractor.extractor - -.. automethod:: BaseExtractor.extract_soup \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_raw.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_raw.rst.txt deleted file mode 100644 index c211b6d0..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_raw.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Extractor.extractor.BaseExtractor.filter\_raw -=========================================================== - -.. currentmodule:: Processor.App.Extractor.extractor - -.. automethod:: BaseExtractor.filter_raw \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_soup.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_soup.rst.txt deleted file mode 100644 index 5d0cb513..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_soup.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Extractor.extractor.BaseExtractor.filter\_soup -============================================================ - -.. currentmodule:: Processor.App.Extractor.extractor - -.. automethod:: BaseExtractor.filter_soup \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.preprocess.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.preprocess.rst.txt deleted file mode 100644 index bf213dc6..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.preprocess.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Extractor.extractor.BaseExtractor.preprocess -========================================================== - -.. currentmodule:: Processor.App.Extractor.extractor - -.. automethod:: BaseExtractor.preprocess \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.rst.txt deleted file mode 100644 index 61c49d40..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.rst.txt +++ /dev/null @@ -1,37 +0,0 @@ -Processor.App.Extractor.extractor.BaseExtractor -=============================================== - -.. currentmodule:: Processor.App.Extractor.extractor - -.. autoclass:: BaseExtractor - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~BaseExtractor.__init__ - ~BaseExtractor.extract - ~BaseExtractor.extract_soup - ~BaseExtractor.filter_raw - ~BaseExtractor.filter_soup - ~BaseExtractor.preprocess - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~BaseExtractor.ENCODING - ~BaseExtractor.SINCE - ~BaseExtractor.TO - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.rst.txt deleted file mode 100644 index 96733184..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.rst.txt +++ /dev/null @@ -1,30 +0,0 @@ -Processor.App.Extractor.extractor -================================= - -.. automodule:: Processor.App.Extractor.extractor - - - - - - - - - - - - .. rubric:: Classes - - .. autosummary:: - :toctree: - - BaseExtractor - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor_utils.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.extractor_utils.rst.txt deleted file mode 100644 index 636bbbfe..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor_utils.rst.txt +++ /dev/null @@ -1,37 +0,0 @@ -Processor.App.Extractor.extractor\_utils -======================================== - -.. automodule:: Processor.App.Extractor.extractor_utils - - - - - - - - .. rubric:: Functions - - .. autosummary:: - - all_same_transform - combine_dicts - extract_transform - get_attribute_transform - get_tag_transform - get_tags_transform - get_text_list_transform - get_text_transform - transform - - - - - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.rst.txt deleted file mode 100644 index 27e0561d..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Extractor.rst.txt +++ /dev/null @@ -1,33 +0,0 @@ -Processor.App.Extractor -======================= - -.. automodule:: Processor.App.Extractor - - - - - - - - - - - - - - - - - - - -.. rubric:: Modules - -.. autosummary:: - :toctree: - :recursive: - - Processor.App.Extractor.dummy_extractor - Processor.App.Extractor.extractor - Processor.App.Extractor.extractor_utils - diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.__init__.rst.txt deleted file mode 100644 index 965c0790..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.dummy\_streamer.DummyStreamer.\_\_init\_\_ -==================================================================== - -.. currentmodule:: Processor.App.OutStreamer.dummy_streamer - -.. automethod:: DummyStreamer.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.clean_up.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.clean_up.rst.txt deleted file mode 100644 index ddb9fc08..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.clean_up.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.dummy\_streamer.DummyStreamer.clean\_up -================================================================= - -.. currentmodule:: Processor.App.OutStreamer.dummy_streamer - -.. automethod:: DummyStreamer.clean_up \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.rst.txt deleted file mode 100644 index ad575bea..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.rst.txt +++ /dev/null @@ -1,26 +0,0 @@ -Processor.App.OutStreamer.dummy\_streamer.DummyStreamer -======================================================= - -.. currentmodule:: Processor.App.OutStreamer.dummy_streamer - -.. autoclass:: DummyStreamer - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~DummyStreamer.__init__ - ~DummyStreamer.clean_up - ~DummyStreamer.stream - - - - - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.stream.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.stream.rst.txt deleted file mode 100644 index 7b1278c6..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.stream.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.dummy\_streamer.DummyStreamer.stream -============================================================== - -.. currentmodule:: Processor.App.OutStreamer.dummy_streamer - -.. automethod:: DummyStreamer.stream \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.rst.txt deleted file mode 100644 index d724469a..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.rst.txt +++ /dev/null @@ -1,30 +0,0 @@ -Processor.App.OutStreamer.dummy\_streamer -========================================= - -.. automodule:: Processor.App.OutStreamer.dummy_streamer - - - - - - - - - - - - .. rubric:: Classes - - .. autosummary:: - :toctree: - - DummyStreamer - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.__init__.rst.txt deleted file mode 100644 index ba586959..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.outstreamer.OutStreamer.\_\_init\_\_ -============================================================== - -.. currentmodule:: Processor.App.OutStreamer.outstreamer - -.. automethod:: OutStreamer.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.clean_up.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.clean_up.rst.txt deleted file mode 100644 index 57076dc4..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.clean_up.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.outstreamer.OutStreamer.clean\_up -=========================================================== - -.. currentmodule:: Processor.App.OutStreamer.outstreamer - -.. automethod:: OutStreamer.clean_up \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.rst.txt deleted file mode 100644 index f4ed6b19..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.rst.txt +++ /dev/null @@ -1,26 +0,0 @@ -Processor.App.OutStreamer.outstreamer.OutStreamer -================================================= - -.. currentmodule:: Processor.App.OutStreamer.outstreamer - -.. autoclass:: OutStreamer - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~OutStreamer.__init__ - ~OutStreamer.clean_up - ~OutStreamer.stream - - - - - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.stream.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.stream.rst.txt deleted file mode 100644 index 2b776c8f..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.stream.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.outstreamer.OutStreamer.stream -======================================================== - -.. currentmodule:: Processor.App.OutStreamer.outstreamer - -.. automethod:: OutStreamer.stream \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.rst.txt deleted file mode 100644 index c96dca60..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.rst.txt +++ /dev/null @@ -1,30 +0,0 @@ -Processor.App.OutStreamer.outstreamer -===================================== - -.. automodule:: Processor.App.OutStreamer.outstreamer - - - - - - - - - - - - .. rubric:: Classes - - .. autosummary:: - :toctree: - - OutStreamer - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.rst.txt deleted file mode 100644 index f554c110..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.rst.txt +++ /dev/null @@ -1,33 +0,0 @@ -Processor.App.OutStreamer -========================= - -.. automodule:: Processor.App.OutStreamer - - - - - - - - - - - - - - - - - - - -.. rubric:: Modules - -.. autosummary:: - :toctree: - :recursive: - - Processor.App.OutStreamer.dummy_streamer - Processor.App.OutStreamer.outstreamer - Processor.App.OutStreamer.stream_to_file - diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.__init__.rst.txt deleted file mode 100644 index 3e71a599..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileDefault.\_\_init\_\_ -============================================================================== - -.. currentmodule:: Processor.App.OutStreamer.stream_to_file - -.. automethod:: OutStreamerFileDefault.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.clean_up.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.clean_up.rst.txt deleted file mode 100644 index 96db9ed7..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.clean_up.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileDefault.clean\_up -=========================================================================== - -.. currentmodule:: Processor.App.OutStreamer.stream_to_file - -.. automethod:: OutStreamerFileDefault.clean_up \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.get_file_name.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.get_file_name.rst.txt deleted file mode 100644 index 2b06059f..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.get_file_name.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileDefault.get\_file\_name -================================================================================= - -.. currentmodule:: Processor.App.OutStreamer.stream_to_file - -.. automethod:: OutStreamerFileDefault.get_file_name \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.metadata_to_string.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.metadata_to_string.rst.txt deleted file mode 100644 index 6c994f6b..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.metadata_to_string.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileDefault.metadata\_to\_string -====================================================================================== - -.. currentmodule:: Processor.App.OutStreamer.stream_to_file - -.. automethod:: OutStreamerFileDefault.metadata_to_string \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.rst.txt deleted file mode 100644 index 81e7a152..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.rst.txt +++ /dev/null @@ -1,28 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileDefault -================================================================= - -.. currentmodule:: Processor.App.OutStreamer.stream_to_file - -.. autoclass:: OutStreamerFileDefault - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~OutStreamerFileDefault.__init__ - ~OutStreamerFileDefault.clean_up - ~OutStreamerFileDefault.get_file_name - ~OutStreamerFileDefault.metadata_to_string - ~OutStreamerFileDefault.stream - - - - - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.stream.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.stream.rst.txt deleted file mode 100644 index 61876e12..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.stream.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileDefault.stream -======================================================================== - -.. currentmodule:: Processor.App.OutStreamer.stream_to_file - -.. automethod:: OutStreamerFileDefault.stream \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.__init__.rst.txt deleted file mode 100644 index 492ac309..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileHTMLContent.\_\_init\_\_ -================================================================================== - -.. currentmodule:: Processor.App.OutStreamer.stream_to_file - -.. automethod:: OutStreamerFileHTMLContent.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.clean_up.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.clean_up.rst.txt deleted file mode 100644 index 5ee2f762..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.clean_up.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileHTMLContent.clean\_up -=============================================================================== - -.. currentmodule:: Processor.App.OutStreamer.stream_to_file - -.. automethod:: OutStreamerFileHTMLContent.clean_up \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.get_file_name.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.get_file_name.rst.txt deleted file mode 100644 index e8fc71a1..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.get_file_name.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileHTMLContent.get\_file\_name -===================================================================================== - -.. currentmodule:: Processor.App.OutStreamer.stream_to_file - -.. automethod:: OutStreamerFileHTMLContent.get_file_name \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.metadata_to_string.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.metadata_to_string.rst.txt deleted file mode 100644 index 5041edba..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.metadata_to_string.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileHTMLContent.metadata\_to\_string -========================================================================================== - -.. currentmodule:: Processor.App.OutStreamer.stream_to_file - -.. automethod:: OutStreamerFileHTMLContent.metadata_to_string \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.rst.txt deleted file mode 100644 index 6c387be1..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.rst.txt +++ /dev/null @@ -1,28 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileHTMLContent -===================================================================== - -.. currentmodule:: Processor.App.OutStreamer.stream_to_file - -.. autoclass:: OutStreamerFileHTMLContent - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~OutStreamerFileHTMLContent.__init__ - ~OutStreamerFileHTMLContent.clean_up - ~OutStreamerFileHTMLContent.get_file_name - ~OutStreamerFileHTMLContent.metadata_to_string - ~OutStreamerFileHTMLContent.stream - - - - - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.stream.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.stream.rst.txt deleted file mode 100644 index 76b773a4..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.stream.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileHTMLContent.stream -============================================================================ - -.. currentmodule:: Processor.App.OutStreamer.stream_to_file - -.. automethod:: OutStreamerFileHTMLContent.stream \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.__init__.rst.txt deleted file mode 100644 index fb03c5ca..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileJSON.\_\_init\_\_ -=========================================================================== - -.. currentmodule:: Processor.App.OutStreamer.stream_to_file - -.. automethod:: OutStreamerFileJSON.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.clean_up.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.clean_up.rst.txt deleted file mode 100644 index f769b5df..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.clean_up.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileJSON.clean\_up -======================================================================== - -.. currentmodule:: Processor.App.OutStreamer.stream_to_file - -.. automethod:: OutStreamerFileJSON.clean_up \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.get_file_name.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.get_file_name.rst.txt deleted file mode 100644 index be184ebe..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.get_file_name.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileJSON.get\_file\_name -============================================================================== - -.. currentmodule:: Processor.App.OutStreamer.stream_to_file - -.. automethod:: OutStreamerFileJSON.get_file_name \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.metadata_to_string.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.metadata_to_string.rst.txt deleted file mode 100644 index 332848a9..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.metadata_to_string.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileJSON.metadata\_to\_string -=================================================================================== - -.. currentmodule:: Processor.App.OutStreamer.stream_to_file - -.. automethod:: OutStreamerFileJSON.metadata_to_string \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.rst.txt deleted file mode 100644 index f5663dfc..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.rst.txt +++ /dev/null @@ -1,28 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileJSON -============================================================== - -.. currentmodule:: Processor.App.OutStreamer.stream_to_file - -.. autoclass:: OutStreamerFileJSON - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~OutStreamerFileJSON.__init__ - ~OutStreamerFileJSON.clean_up - ~OutStreamerFileJSON.get_file_name - ~OutStreamerFileJSON.metadata_to_string - ~OutStreamerFileJSON.stream - - - - - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.stream.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.stream.rst.txt deleted file mode 100644 index 5f6ef1f9..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.stream.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileJSON.stream -===================================================================== - -.. currentmodule:: Processor.App.OutStreamer.stream_to_file - -.. automethod:: OutStreamerFileJSON.stream \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.rst.txt deleted file mode 100644 index 8a3725d9..00000000 --- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.rst.txt +++ /dev/null @@ -1,32 +0,0 @@ -Processor.App.OutStreamer.stream\_to\_file -========================================== - -.. automodule:: Processor.App.OutStreamer.stream_to_file - - - - - - - - - - - - .. rubric:: Classes - - .. autosummary:: - :toctree: - - OutStreamerFileDefault - OutStreamerFileHTMLContent - OutStreamerFileJSON - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.__init__.rst.txt deleted file mode 100644 index 77b2fd3f..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Pipeline.pipeline.ProcessorPipeline.\_\_init\_\_ -============================================================== - -.. currentmodule:: Processor.App.Pipeline.pipeline - -.. automethod:: ProcessorPipeline.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.process_domain_record.rst.txt b/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.process_domain_record.rst.txt deleted file mode 100644 index 598ebbad..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.process_domain_record.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Pipeline.pipeline.ProcessorPipeline.process\_domain\_record -========================================================================= - -.. currentmodule:: Processor.App.Pipeline.pipeline - -.. automethod:: ProcessorPipeline.process_domain_record \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.rst.txt b/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.rst.txt deleted file mode 100644 index 02b3c1cb..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.rst.txt +++ /dev/null @@ -1,25 +0,0 @@ -Processor.App.Pipeline.pipeline.ProcessorPipeline -================================================= - -.. currentmodule:: Processor.App.Pipeline.pipeline - -.. autoclass:: ProcessorPipeline - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~ProcessorPipeline.__init__ - ~ProcessorPipeline.process_domain_record - - - - - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.rst.txt b/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.rst.txt deleted file mode 100644 index a3c12dc7..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.rst.txt +++ /dev/null @@ -1,30 +0,0 @@ -Processor.App.Pipeline.pipeline -=============================== - -.. automodule:: Processor.App.Pipeline.pipeline - - - - - - - - - - - - .. rubric:: Classes - - .. autosummary:: - :toctree: - - ProcessorPipeline - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Processor.App.Pipeline.rst.txt b/docs/build/html/_sources/generated/Processor.App.Pipeline.rst.txt deleted file mode 100644 index da6f2f8c..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Pipeline.rst.txt +++ /dev/null @@ -1,31 +0,0 @@ -Processor.App.Pipeline -====================== - -.. automodule:: Processor.App.Pipeline - - - - - - - - - - - - - - - - - - - -.. rubric:: Modules - -.. autosummary:: - :toctree: - :recursive: - - Processor.App.Pipeline.pipeline - diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.Route.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.Route.__init__.rst.txt deleted file mode 100644 index 8e322777..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Router.router.Route.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Router.router.Route.\_\_init\_\_ -============================================== - -.. currentmodule:: Processor.App.Router.router - -.. automethod:: Route.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.Route.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.Route.rst.txt deleted file mode 100644 index d5d789f0..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Router.router.Route.rst.txt +++ /dev/null @@ -1,33 +0,0 @@ -Processor.App.Router.router.Route -================================= - -.. currentmodule:: Processor.App.Router.router - -.. autoclass:: Route - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~Route.__init__ - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~Route.name - ~Route.regexes - ~Route.since - ~Route.to - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.Router.__init__.rst.txt deleted file mode 100644 index b83aa285..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Router.router.Router.\_\_init\_\_ -=============================================== - -.. currentmodule:: Processor.App.Router.router - -.. automethod:: Router.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.load_module.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.Router.load_module.rst.txt deleted file mode 100644 index 4732d19f..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.load_module.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Router.router.Router.load\_module -=============================================== - -.. currentmodule:: Processor.App.Router.router - -.. automethod:: Router.load_module \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.load_modules.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.Router.load_modules.rst.txt deleted file mode 100644 index 59d81478..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.load_modules.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Router.router.Router.load\_modules -================================================ - -.. currentmodule:: Processor.App.Router.router - -.. automethod:: Router.load_modules \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.register_route.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.Router.register_route.rst.txt deleted file mode 100644 index 20952e26..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.register_route.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Router.router.Router.register\_route -================================================== - -.. currentmodule:: Processor.App.Router.router - -.. automethod:: Router.register_route \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.register_routes.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.Router.register_routes.rst.txt deleted file mode 100644 index ce3eb617..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.register_routes.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Router.router.Router.register\_routes -=================================================== - -.. currentmodule:: Processor.App.Router.router - -.. automethod:: Router.register_routes \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.route.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.Router.route.rst.txt deleted file mode 100644 index 012c88e8..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.route.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.Router.router.Router.route -======================================== - -.. currentmodule:: Processor.App.Router.router - -.. automethod:: Router.route \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.Router.rst.txt deleted file mode 100644 index 3425cf67..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.rst.txt +++ /dev/null @@ -1,29 +0,0 @@ -Processor.App.Router.router.Router -================================== - -.. currentmodule:: Processor.App.Router.router - -.. autoclass:: Router - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~Router.__init__ - ~Router.load_module - ~Router.load_modules - ~Router.register_route - ~Router.register_routes - ~Router.route - - - - - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.rst.txt deleted file mode 100644 index ff8944bf..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Router.router.rst.txt +++ /dev/null @@ -1,31 +0,0 @@ -Processor.App.Router.router -=========================== - -.. automodule:: Processor.App.Router.router - - - - - - - - - - - - .. rubric:: Classes - - .. autosummary:: - :toctree: - - Route - Router - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Processor.App.Router.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.rst.txt deleted file mode 100644 index 794bc0ca..00000000 --- a/docs/build/html/_sources/generated/Processor.App.Router.rst.txt +++ /dev/null @@ -1,31 +0,0 @@ -Processor.App.Router -==================== - -.. automodule:: Processor.App.Router - - - - - - - - - - - - - - - - - - - -.. rubric:: Modules - -.. autosummary:: - :toctree: - :recursive: - - Processor.App.Router.router - diff --git a/docs/build/html/_sources/generated/Processor.App.processor_utils.DomainRecord.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.processor_utils.DomainRecord.__init__.rst.txt deleted file mode 100644 index 9ec73e9d..00000000 --- a/docs/build/html/_sources/generated/Processor.App.processor_utils.DomainRecord.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.processor\_utils.DomainRecord.\_\_init\_\_ -======================================================== - -.. currentmodule:: Processor.App.processor_utils - -.. automethod:: DomainRecord.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.processor_utils.DomainRecord.rst.txt b/docs/build/html/_sources/generated/Processor.App.processor_utils.DomainRecord.rst.txt deleted file mode 100644 index 90d82a77..00000000 --- a/docs/build/html/_sources/generated/Processor.App.processor_utils.DomainRecord.rst.txt +++ /dev/null @@ -1,36 +0,0 @@ -Processor.App.processor\_utils.DomainRecord -=========================================== - -.. currentmodule:: Processor.App.processor_utils - -.. autoclass:: DomainRecord - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~DomainRecord.__init__ - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~DomainRecord.digest - ~DomainRecord.encoding - ~DomainRecord.timestamp - ~DomainRecord.filename - ~DomainRecord.url - ~DomainRecord.offset - ~DomainRecord.length - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.processor_utils.PipeMetadata.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.processor_utils.PipeMetadata.__init__.rst.txt deleted file mode 100644 index 24a6df00..00000000 --- a/docs/build/html/_sources/generated/Processor.App.processor_utils.PipeMetadata.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.App.processor\_utils.PipeMetadata.\_\_init\_\_ -======================================================== - -.. currentmodule:: Processor.App.processor_utils - -.. automethod:: PipeMetadata.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.processor_utils.PipeMetadata.rst.txt b/docs/build/html/_sources/generated/Processor.App.processor_utils.PipeMetadata.rst.txt deleted file mode 100644 index 3b77b49f..00000000 --- a/docs/build/html/_sources/generated/Processor.App.processor_utils.PipeMetadata.rst.txt +++ /dev/null @@ -1,35 +0,0 @@ -Processor.App.processor\_utils.PipeMetadata -=========================================== - -.. currentmodule:: Processor.App.processor_utils - -.. autoclass:: PipeMetadata - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~PipeMetadata.__init__ - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~PipeMetadata.encoding - ~PipeMetadata.name - ~PipeMetadata.domain_record - ~PipeMetadata.article_data - ~PipeMetadata.warc_header - ~PipeMetadata.http_header - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.App.processor_utils.rst.txt b/docs/build/html/_sources/generated/Processor.App.processor_utils.rst.txt deleted file mode 100644 index a93397d5..00000000 --- a/docs/build/html/_sources/generated/Processor.App.processor_utils.rst.txt +++ /dev/null @@ -1,31 +0,0 @@ -Processor.App.processor\_utils -============================== - -.. automodule:: Processor.App.processor_utils - - - - - - - - - - - - .. rubric:: Classes - - .. autosummary:: - :toctree: - - DomainRecord - PipeMetadata - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Processor.App.rst.txt b/docs/build/html/_sources/generated/Processor.App.rst.txt deleted file mode 100644 index fb937e59..00000000 --- a/docs/build/html/_sources/generated/Processor.App.rst.txt +++ /dev/null @@ -1,37 +0,0 @@ -Processor.App -============= - -.. automodule:: Processor.App - - - - - - - - - - - - - - - - - - - -.. rubric:: Modules - -.. autosummary:: - :toctree: - :recursive: - - Processor.App.Downloader - Processor.App.Extractor - Processor.App.OutStreamer - Processor.App.Pipeline - Processor.App.Router - Processor.App.processor_utils - Processor.App.ArticleUtils - diff --git a/docs/build/html/_sources/generated/Processor.process_article.rst.txt b/docs/build/html/_sources/generated/Processor.process_article.rst.txt deleted file mode 100644 index a11fa785..00000000 --- a/docs/build/html/_sources/generated/Processor.process_article.rst.txt +++ /dev/null @@ -1,30 +0,0 @@ -Processor.process\_article -========================== - -.. automodule:: Processor.process_article - - - - - - - - .. rubric:: Functions - - .. autosummary:: - - article_process - main - - - - - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.__init__.rst.txt deleted file mode 100644 index 260c0116..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.Listener.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.processor.Listener.\_\_init\_\_ -========================================= - -.. currentmodule:: Processor.processor - -.. automethod:: Listener.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_before_message.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_before_message.rst.txt deleted file mode 100644 index c8886fcc..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_before_message.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.processor.Listener.on\_before\_message -================================================ - -.. currentmodule:: Processor.processor - -.. automethod:: Listener.on_before_message \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_connected.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_connected.rst.txt deleted file mode 100644 index 103399f6..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_connected.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.processor.Listener.on\_connected -========================================== - -.. currentmodule:: Processor.processor - -.. automethod:: Listener.on_connected \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_connecting.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_connecting.rst.txt deleted file mode 100644 index d67c467e..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_connecting.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.processor.Listener.on\_connecting -=========================================== - -.. currentmodule:: Processor.processor - -.. automethod:: Listener.on_connecting \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_disconnected.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_disconnected.rst.txt deleted file mode 100644 index 8a4de99f..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_disconnected.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.processor.Listener.on\_disconnected -============================================= - -.. currentmodule:: Processor.processor - -.. automethod:: Listener.on_disconnected \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_disconnecting.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_disconnecting.rst.txt deleted file mode 100644 index 732d2212..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_disconnecting.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.processor.Listener.on\_disconnecting -============================================== - -.. currentmodule:: Processor.processor - -.. automethod:: Listener.on_disconnecting \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_error.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_error.rst.txt deleted file mode 100644 index 878082af..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_error.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.processor.Listener.on\_error -====================================== - -.. currentmodule:: Processor.processor - -.. automethod:: Listener.on_error \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_heartbeat.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_heartbeat.rst.txt deleted file mode 100644 index a3b6d6c3..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_heartbeat.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.processor.Listener.on\_heartbeat -========================================== - -.. currentmodule:: Processor.processor - -.. automethod:: Listener.on_heartbeat \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_heartbeat_timeout.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_heartbeat_timeout.rst.txt deleted file mode 100644 index d5cbc54e..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_heartbeat_timeout.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.processor.Listener.on\_heartbeat\_timeout -=================================================== - -.. currentmodule:: Processor.processor - -.. automethod:: Listener.on_heartbeat_timeout \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_message.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_message.rst.txt deleted file mode 100644 index 5132b722..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_message.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.processor.Listener.on\_message -======================================== - -.. currentmodule:: Processor.processor - -.. automethod:: Listener.on_message \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_receipt.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_receipt.rst.txt deleted file mode 100644 index 4d19d384..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_receipt.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.processor.Listener.on\_receipt -======================================== - -.. currentmodule:: Processor.processor - -.. automethod:: Listener.on_receipt \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_receiver_loop_completed.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_receiver_loop_completed.rst.txt deleted file mode 100644 index 54925ad9..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_receiver_loop_completed.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.processor.Listener.on\_receiver\_loop\_completed -========================================================== - -.. currentmodule:: Processor.processor - -.. automethod:: Listener.on_receiver_loop_completed \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_send.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_send.rst.txt deleted file mode 100644 index 33268c0b..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_send.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.processor.Listener.on\_send -===================================== - -.. currentmodule:: Processor.processor - -.. automethod:: Listener.on_send \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.rst.txt deleted file mode 100644 index 78923da9..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.Listener.rst.txt +++ /dev/null @@ -1,36 +0,0 @@ -Processor.processor.Listener -============================ - -.. currentmodule:: Processor.processor - -.. autoclass:: Listener - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~Listener.__init__ - ~Listener.on_before_message - ~Listener.on_connected - ~Listener.on_connecting - ~Listener.on_disconnected - ~Listener.on_disconnecting - ~Listener.on_error - ~Listener.on_heartbeat - ~Listener.on_heartbeat_timeout - ~Listener.on_message - ~Listener.on_receipt - ~Listener.on_receiver_loop_completed - ~Listener.on_send - - - - - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.processor.ListnerStats.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.processor.ListnerStats.__init__.rst.txt deleted file mode 100644 index 3db79398..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.ListnerStats.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.processor.ListnerStats.\_\_init\_\_ -============================================= - -.. currentmodule:: Processor.processor - -.. automethod:: ListnerStats.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.processor.ListnerStats.rst.txt b/docs/build/html/_sources/generated/Processor.processor.ListnerStats.rst.txt deleted file mode 100644 index 415cdbd2..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.ListnerStats.rst.txt +++ /dev/null @@ -1,31 +0,0 @@ -Processor.processor.ListnerStats -================================ - -.. currentmodule:: Processor.processor - -.. autoclass:: ListnerStats - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~ListnerStats.__init__ - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~ListnerStats.last_message_time - ~ListnerStats.messages - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.processor.Message.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Message.__init__.rst.txt deleted file mode 100644 index 8ae337eb..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.Message.__init__.rst.txt +++ /dev/null @@ -1,6 +0,0 @@ -Processor.processor.Message.\_\_init\_\_ -======================================== - -.. currentmodule:: Processor.processor - -.. automethod:: Message.__init__ \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.processor.Message.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Message.rst.txt deleted file mode 100644 index 33a1bd62..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.Message.rst.txt +++ /dev/null @@ -1,31 +0,0 @@ -Processor.processor.Message -=========================== - -.. currentmodule:: Processor.processor - -.. autoclass:: Message - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - :toctree: - - - ~Message.__init__ - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~Message.dr - ~Message.headers - - \ No newline at end of file diff --git a/docs/build/html/_sources/generated/Processor.processor.rst.txt b/docs/build/html/_sources/generated/Processor.processor.rst.txt deleted file mode 100644 index 39334ada..00000000 --- a/docs/build/html/_sources/generated/Processor.processor.rst.txt +++ /dev/null @@ -1,42 +0,0 @@ -Processor.processor -=================== - -.. automodule:: Processor.processor - - - - - - - - .. rubric:: Functions - - .. autosummary:: - - call_pipeline_with_ack - get_hostname_output_path - init_connection - init_pipeline - processor - - - - - - .. rubric:: Classes - - .. autosummary:: - :toctree: - - Listener - ListnerStats - Message - - - - - - - - - diff --git a/docs/build/html/_sources/generated/Processor.rst.txt b/docs/build/html/_sources/generated/Processor.rst.txt deleted file mode 100644 index e7964123..00000000 --- a/docs/build/html/_sources/generated/Processor.rst.txt +++ /dev/null @@ -1,33 +0,0 @@ -Processor -========= - -.. automodule:: Processor - - - - - - - - - - - - - - - - - - - -.. rubric:: Modules - -.. autosummary:: - :toctree: - :recursive: - - Processor.App - Processor.process_article - Processor.processor - diff --git a/docs/build/html/_sources/index.rst.txt b/docs/build/html/_sources/index.rst.txt index ec503ff9..ff1fa92a 100644 --- a/docs/build/html/_sources/index.rst.txt +++ b/docs/build/html/_sources/index.rst.txt @@ -10,15 +10,15 @@ Welcome to CommonCrawl Extractor's documentation! :maxdepth: 3 :caption: Contents: - installation - quickstart/index + usage + cli/index + extraction/index + prog_guide/index + misc/index api - - - Indices and tables ================== diff --git a/docs/build/html/_sources/installation.rst.txt b/docs/build/html/_sources/installation.rst.txt deleted file mode 100644 index b97c2f7e..00000000 --- a/docs/build/html/_sources/installation.rst.txt +++ /dev/null @@ -1,17 +0,0 @@ -Installation -============ - - -The project was developed using Python 3.10. It's not recommended to use oldered versions of Python as dataclass and type hints are used. -To install packages required to run the project you can use either pip or conda. - -1. Install packages using pip: `$ pip install -r requirements.txt` - -2. Install packages using conda: `$ conda env create -f environment.yml` -> `$ conda activate extractor` - -====== -Docker -====== -While it's possible to run the project without Docker it's recommended to use it as it will make the process of running the project much easier. -Please install docker and docker-compose before running the project. - diff --git a/docs/build/html/_sources/quickstart/artemis-queue.rst.txt b/docs/build/html/_sources/quickstart/artemis-queue.rst.txt deleted file mode 100644 index b3b266c5..00000000 --- a/docs/build/html/_sources/quickstart/artemis-queue.rst.txt +++ /dev/null @@ -1,13 +0,0 @@ -Artemis Queue -============= - -`Artemis `_ is a message queue that is designed to be fast, scalable, and easy to use - -We use artemis for communication between Aggregator and Processor. -If you use docker approach you don't need to install artemis manually. -However it's good to know that artemis provides a web console for monitoring and management. -You can connect to this console while running the docker containers by opening the following URL in your browser: `Console `_. -The password is `admin` and the username is `admin`. There you can see the status of the queues. - - - diff --git a/docs/build/html/_sources/quickstart/download_article.rst.txt b/docs/build/html/_sources/quickstart/download_article.rst.txt deleted file mode 100644 index a99225fe..00000000 --- a/docs/build/html/_sources/quickstart/download_article.rst.txt +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: download_article \ No newline at end of file diff --git a/docs/build/html/_sources/quickstart/index.rst.txt b/docs/build/html/_sources/quickstart/index.rst.txt deleted file mode 100644 index ce5b60e9..00000000 --- a/docs/build/html/_sources/quickstart/index.rst.txt +++ /dev/null @@ -1,10 +0,0 @@ -Quick Start Guide -================= - -.. toctree:: - :maxdepth: 2 - :caption: Contents: - - overview - quick-start - artemis-queue \ No newline at end of file diff --git a/docs/build/html/_sources/quickstart/installation.rst.txt b/docs/build/html/_sources/quickstart/installation.rst.txt deleted file mode 100644 index 976968fa..00000000 --- a/docs/build/html/_sources/quickstart/installation.rst.txt +++ /dev/null @@ -1,10 +0,0 @@ -Installation -============ - - -The project was developed using Python 3.10. It's not recommended to use oldered versions of Python as dataclass and type hints are used. -To install packages required to run the project you can use either pip or conda. -1. Install packages using pip: `$ pip install -r requirements.txt` -2. Install packages using conda: `$ conda env create -f environment.yml` -> `$ conda activate extractor` - - diff --git a/docs/build/html/_sources/quickstart/middleware.rst.txt b/docs/build/html/_sources/quickstart/middleware.rst.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/build/html/_sources/quickstart/overview.rst.txt b/docs/build/html/_sources/quickstart/overview.rst.txt deleted file mode 100644 index 5b17d502..00000000 --- a/docs/build/html/_sources/quickstart/overview.rst.txt +++ /dev/null @@ -1,113 +0,0 @@ -Quick Overview -============== - -The process of getting one parsed web page from CommonCrawl can be described as a pipeline. - -1. Query CommmonCrawl to find a link to a file that contains the web page we want. -2. Download a file -3. Choose parser for the web page -4. Filter out the web page if not matching the conditions -5. Extract fields from the page -6. Save the fields to a file - - - -The first step is handled by `Aggregator` while the rest is handled by `Processor`. -We will now go through each step in detail. We will be describing the usage where we use the Artemis Queue as middleware between the `Aggregator` and `Processor` to communicate. -However it should not be much different with other middleware of your choice. Please refere to `download_article.py` which should give you an idea how to create a custom pipeline where you can insert your own middleware. - - - -======================= -1. Querying CommonCrawl -======================= -what WARC File how - `WARC `_ is a file format that is used for storing multitudes of web resources. - In our case these files contain a bunch of downloaded web pages and their metadata. - It's possible to get only part of the file by specifying the offset in file and length of the part we want. - - -what - Common Crawl Index -how - A CommonCrawl index is a collection which maps crawled urls to WARC file which contain the crawl of that url. - -Every month a CommonCrawl releases a new index which contains all links to web pages that were crawled that month. - -.. warning:: - It is important to understand that even if the index was released in a certain month, it can contain the links to web pages that might be older. - -Thus in order to download an page we query the index to get link to respective WARC file, offset and length of page. -Since there are multiples of the indexes we should query all of them to make sure we don't miss the page. -With the link to the WARC and offset and length we can continue to another step. - -All this is handled by :py:class:`Aggregator.App.index_query.IndexAggregator`. But for basic use you will not need to use it directly. - - -The :py:mod:`Aggregator.aggregator` is the file you will work with. -It's command line utility which will start and instance of aggregator. Such an instance will query the specified indexes with given url and send the results to the Middleware(Artemis Queue). - - -.. code-block:: bash - :caption: This will start and instance of aggregator which will query the indexes from 2014-12-30 to 2016-01-30 for the url bbc.com - - $ python -m Aggregator.aggregator --url bbc.com --since=2014-12-30 --to=2016-01-30 - - - - -===================== -2. Downloading a file -===================== -The Processor node than downloads the url and related information from queue and downloads the appropriate WARC file. -This step is handled by :py:mod:`Processor.App.Downloader.downloader.Downloader`. -It simply downloads and extracts the page from the WARC file. - - -================ -3. Choose parser -================ - -Once the page is downloaded we first need to choose a parser for it. -Parsers are dynamically loaded based on definitions in config file. By default such a config -is `Processor/config.json`. All loaded processors are then matched against the url and publication date and first matching is used. -This functionality is handled by :py:class:`Processor.App.Router.router.Router`. - - -============================= -4. Filtering out the web page -============================= - -Once the parser is chosen the filtering function defined by the extractor is used to either drop or pass a page. - -=============================== -5. Extract fields from the page -=============================== - -The extracting function defined by the extractor is used to extract the fields from the page. -The extracting rules can be defined in 2 ways. - -1. Using parsed version of the page (BeautifulSoup) and the extracting the respected fields yourself. -2. Using predefined transfomations. - -Using first method should be straightforward. Just extract the values and return them in dict. - -To use transformations we first need to define what html tags we want to extract from head and body respectively. -Then for each such a tag we need to provide a list of transformations. These transformations then run in sequence where previous result is passed to the next one as parameter. -Whenever the None is encountered the transformation will not raise and error but will not continue and set respective field to None. -You can think about it as a composition of functions. - -When we were developing our extractors we found the second approached to be much more readable and easier to use -because it's very obnoxious to handle the None values with complex logic. - - -============== -6. File saving -============== -With the field extracted we need to save them to a file. -By default the fields are saved in json file. -The way the file is saved is defined by outstreamers. We have implemented the json outstreamer -and field per line outstreamer -:py:class:`Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON` and :py:class:`Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault` respectively. - -If you would like different format you can create your own saver by inheriting from :py:class:`Processor.App.OutStreamer.outstreamer.OutStreamer` and then changing pipeline creation with your new outstreamer. \ No newline at end of file diff --git a/docs/build/html/_sources/quickstart/quick-start.rst.txt b/docs/build/html/_sources/quickstart/quick-start.rst.txt deleted file mode 100644 index 67d6bc20..00000000 --- a/docs/build/html/_sources/quickstart/quick-start.rst.txt +++ /dev/null @@ -1,301 +0,0 @@ -Quickstart -========== - -In this chapter we will show how to use the program to fetch a data from an url. -We will show this by an example. - - -========= -Extractor -========= - -We would like to fetch all data from bbc.com containing the word "war" fetched since 20.1.2021 to 20.3.2021. - -.. note:: It's important to emphasis the program can only fetch data based on crawl time. There is no way for program to know when was article published. It's possible to first find the published date and then drop all articles that are out of range. However all since/to dates are based on crawl time NOT published time. - - -For such pages we would like to extract the title and the content of the page. - -Having our task established we will first have to write an extractor for the bbc pages. -This is done by creating a class that extends the class :py:class:`Processor.App.ArticleUtils.article_extractor.ArticleExtractor`. -We will create this class in the file :file:`Processor/UserDefined/Extractors/bbc_extractor.py`. - - - - -.. code-block:: python - - from Processor.App.ArticleUtils.article_extractor import ArticleExtractor - from datetime import datetime - - class BBCExtractor(ArticleExtractor): - SINCE = datetime(2021, 1 , 20) - TO = datetime(2021, 3, 20) - - def __init__(self): - pass - - extractor = BBCExtractor() - - -As you can see we have also created and instance which is required as it allows parametric constructors for extractors. - - -===================== -`download_article.py` -===================== - -But what now ? We have no idea how the bcc site looked at the time of extracting. -That's is why the :py:mod:`download_article` exists! -Thus we run: - -.. code-block:: bash - :caption: This will download 1000 articles from bbc.com and save them to the directory `out1`. - - $ python download_article.py --since=2021-01-20 --to=2021-03-20 --limit=1000 bbc.com out1 - -Bear in mind that it will take some time to download all the pages. -It's possible that it will download some pages without any content but it's impossible to filter them. - -.. note:: In my case it didn't download any english articles only ones in chinese and arabic. However it shouldn't matter as we only care about structure. If you want english articles just raise up the limit and wait. - -Once downloaded we can inspect the pages in a browser. -We can see that title can be found in `h1#content` tag. -The article content can be found under `main[role=main]` tag and -the text is mostly in

tags(Usually you want to be more precise with this be we assume this for simplicity). - -.. warning:: Always make sure that the tags you found are unique. Cross-check this with other articles fetched. - -============================ -Extracting (Transformations) -============================ - - -With this information we can write the extractor. - -.. code-block:: python - - - from Processor.App.ArticleUtils.article_utils import aritcle_content_transform, headline_transform, get_text_transform - REQUIRED_FIELDS = { - "title": False, - "content": True - } - - def content_transform(soup): - return [p.text for p in soup.find_all("p", recursive=True)] - - - def __init__(self): - super().__init__( - article_css_dict={ - "title": "h1#content", - "content": "main[role=main]", - }, - # Here we define how to transform the content of the tag into a string. - article_extract_dict= { - "title": [get_text_transform, headline_transform], - "content": [content_transform, text_unifications_transform, lambda lines : "\n".join(lines)] - }, - - - # Here we define how to bind a tag that containt all fields we will use in article_css_dict - # If you don't know just use body - article_css_selector="body", - required_fields=REQUIRED_FIELDS, - non_empty = True - ) - - -`REQUIRED_FIELDS` is a dictionary that defines which fields must be extracted (Must be contained in resulting dictionary). -This is useful if you write multiple extractors and you want to make sure that all of them contain the same fields. -As you can see we have set the title to False this means that it's value can be None. We have set article to False which -means that is must not be None value. Because we have set non_empty to True the title also cannot be empty string or empty list. - -`article_css_dict` define where to find the title and content. -`article_extract_dict` defines how to extract the title and content from the tag. -We have used some predfedined function from :py:mod:`Processor.App.ArticleUtils.article_utils` to help us with this. -Please look to the :py:mod:`Processor.App.ArticleUtils.article_utils` to check what exactly the transformations do! Should be clear from the code. -For content we created our transform which returns a list of text in p. - - -`article_css_selector` simply defines where to start looking for the tags defined in `article_css_dict`. - -.. note:: `header_css_dict` and `header_extract_dict` can also be set in constructor for extracting from html tag. The get_attribute method is used to extract the attribute value. - - -======================== -Extracting( BS4 version) -======================== - -Now the extracting part is finished. If it feels too complicated then you can you BeautifulSoup approach. -In that case you don't set `article_css_dict` and `article_extract_dict` but you have to implement the :py:meth:`Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_extract` method. - -.. code-block:: python - - def custom_extract(self, soup, metadata): - # Find the field with the title - title = soup.find("div", {"class": "title"}) - title_text = title.text if title else None - return {"title": title_text} - -In this case you can also access metadata from the warc file! - -========= -Filtering -========= - - -We almost forgot that we want to filter the articles by the word "war". -We just need to override the :py:meth:`Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_soup` method. -Let's do it now! - - -.. code-block:: python - - def custom_filter_soup(self, soup, metadata): - result = soup.find_all("p", lambda tag: "war" in tag.text) - if result: - return True - return False - -.. note:: You can also use the :py:meth:`Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_raw` which take the raw html as a parameter. It's usefull if don't need parsed html as processing is faster. - -.. note:: This is where you could filter by date. metadata is instance of :py:class:`Processor.App.processor_utils.PipeMetadata` and it has warc_header property. You could get `Last-Modifier `_ from it but it's no guarantee that is published date. Or you could find it in parsed html. - - -=========== -config.json -=========== -To register our extractor create :file:`Processor/UserDefined/config.json` as: - -.. code-block:: json - - { - "addresses": [ "queue.bbc.com", ], - "extractors_path": "./Extractors" - "routes":[ - { - "regexes": [".*bbc\\.com.*"], - "extractors": ["bbc_extractor.py"] - }, - } - - -This defines three things: - -1. The address of the queue. This is tell artemis that we want to accept message from bbc.com queue. Basically always set this to "queue.{your domain}". - -2. The path to the extractors (w.r.t config.json location) - -3. The routes. This defines which extractor to use for which url. In this case we want to use bbc_extractor.py for all urls that contain bbc.com. - -===================== -Testing our extractor -===================== - -Before we run the extractor we should test it. -That is why we have the :py:mod:`Processsor.process_article` ! - -.. code-block:: bash - :caption: This will process the article and output result to out2 folder if succesful. - - $ python -m Processor.process_article --date=2021-02-01 --config=Processor/UserDefined/config.json out1/directory_1/371_https\:__www.bbc.com_yoruba_afrika-44296108.html out2 - - -.. note:: We have to use date as the extractor has no idea when the article was fetched because we have no infromation from warc anymore. You can also use url to specify the url of the article as again we don't have that infromation from warc. However in most cases it will correctly guess the url. - - -===================== -Running the extractor -===================== -We are now ready to run the extractor! -You can manually run the extractor node, aggregator node and artemis queue. -However it's not really convenient. And also requires some knowledge how to run atemis queue. -We thus added support for docker. -First we need to create :file:`docker-compose.yml`. - -.. code-block:: bash - - version: "3.9" - services: - # Creates the artemis queue - artemis: - build: "./Artemis" - container_name: "artemis" - # Persistent volume - # Make sure you prune to get clean state for test runs - - artemis-data:/var/lib/artemis/data - ports: - - "8161:8161" - # You can set up limits here - # But make sure you also correct java memory in artemis Dockerfile in order to have effect. - deploy: - resources: - reservations: - memory: 8g - - producer-bbc.com: - # This is the producer service, you can have multiple for different urls - # Will run the aggregator part as you can see we set the date range - build: ./Aggregator - command: [ "--to=2021-03-20", "--since=2021-01-20", "bbc.com" ] - depends_on: - - consumer - - # This is consumer spawn as many as you want, you ideally want to have pills set up to number of producers. - # Make sure you use use-hostname-output to have different output folders for each consumer. - consumer: - build: ./Processor - command: - [ - "--use_hostname_output", - "--timeout=1", - "--pills_to_die=1", - "--queue_size=200" - ] - volumes: - - ./output:/output:z - deploy: - # Number of replicas, more = faster processing - replicas: 4 - depends_on: - - artemis - - volumes: - artemis-data: - - -This one is a bit more complex. It's standard docker-compose file so if you have experience with docker it sould be familiar. -It defines three services: - -1. Artemis queue - This is the queue that will be used to communicate between the aggregator and the processor. As one of the goal of the project is reliability we setup persistent volume for the queue in which it will store the urls that it has already processed. However this creates problem when you want to test the extractor as by running it consecutively it will no more process the urls if has already seen. Thus the storage needs to be cleared before each run. This is done by running `$ docker volume rm rocnikovyprojekt_artemis-data`. - -2. Producer - This is the aggregator part of the project. It will fetch the urls from the queue and process them. - -3. Consumer - This is the processor part of the project. It will fetch the urls from the queue and extract them to folder output. As we want the consumers to automatically close when the producers are done we set the `pills_to_die` to 1. This will make the consumer to die when it receives 1 pill from the queue. Every queue produces exactly one pill when it has no more urls to process. This is why we set the `queue_size` to 1. This will make the queue to produce pill when it has no more urls to process. - - -Now we just need to run the `$ docker-compose up`. -We have also create shell script that will prune before running the docker-compose. -You can run it by `$ ./run.sh` (Probably the best idea). - - - -.. note:: It's good idea to clear the output folder when testing extractor. The problem is that it is created by docker container so might need to raise up to admin priviilge to remove it. - - -.. note:: Files created at this tutorial can be found at :file:`examples/extractor_tutorial` - - - - - - - - - - - - - diff --git a/docs/build/html/_static/documentation_options.js b/docs/build/html/_static/documentation_options.js index aecf594a..8bd721dd 100644 --- a/docs/build/html/_static/documentation_options.js +++ b/docs/build/html/_static/documentation_options.js @@ -1,6 +1,6 @@ var DOCUMENTATION_OPTIONS = { URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), - VERSION: '1.0', + VERSION: '0.9.3', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html', diff --git a/docs/build/html/api.html b/docs/build/html/api.html index ca00c77d..bac1b09c 100644 --- a/docs/build/html/api.html +++ b/docs/build/html/api.html @@ -6,7 +6,7 @@ - API — CommonCrawl Extractor 1.0 documentation + API — CmonCrawl 0.9.3 documentation @@ -39,8 +39,8 @@ - - + + @@ -79,7 +79,7 @@ -

CommonCrawl Extractor 1.0 documentation

+

CmonCrawl 0.9.3 documentation