diff --git a/cmoncrawl/aggregator/index_query.py b/cmoncrawl/aggregator/index_query.py
index 009d76e7..afc53b1d 100644
--- a/cmoncrawl/aggregator/index_query.py
+++ b/cmoncrawl/aggregator/index_query.py
@@ -24,7 +24,7 @@
MatchType,
)
-from aiohttp import ClientError, ClientSession, ContentTypeError
+from aiohttp import ClientError, ClientSession, ContentTypeError, ServerConnectionError
import asyncio
import random
@@ -32,6 +32,33 @@
class IndexAggregator(AsyncIterable[DomainRecord]):
+ """
+ This class is responsible for aggregating the index files from commoncrawl.
+ It is an async context manager which can then be used as an async iterator
+ which yields DomainRecord objects, found in the index files of commoncrawl.
+
+ It uses the commoncrawl index server to find the index files.
+
+
+ Args:
+ domains (List[str]): A list of domains to search for.
+ cc_indexes_server (str, optional): The commoncrawl index server to use. Defaults to "http://index.commoncrawl.org/collinfo.json".
+ match_type (MatchType, optional): Match type for cdx-api. Defaults to None.
+ cc_servers (List[str], optional): A list of commoncrawl servers to use. If [], then indexes will be retrieved from the cc_indexes_server. Defaults to [].
+ since (datetime, optional): The start date for the search. Defaults to datetime.min.
+ to (datetime, optional): The end date for the search. Defaults to datetime.max.
+ limit (int, optional): The maximum number of results to return. Defaults to None.
+ max_retry (int, optional): The maximum number of retries for a single request. Defaults to 5.
+ prefetch_size (int, optional): The number of indexes to fetch concurrently. Defaults to 3.
+ sleep_step (int, optional): Sleep increase time between retries. Defaults to 20.
+
+ Examples:
+ >>> async with IndexAggregator(["example.com"]) as aggregator:
+ >>> async for domain_record in aggregator:
+ >>> print(domain_record)
+
+ """
+
def __init__(
self,
domains: List[str],
@@ -142,21 +169,20 @@ def should_retry(retry: int, reason: str, status: int, **args: Any):
if not should_retry(retry, reason, status, **args):
break
else:
- try:
- content = await response.json(
- content_type=content_type, loads=Decoder().decode
- )
- except ContentTypeError as e:
- all_purpose_logger.error(str(e), exc_info=True)
- all_purpose_logger.error(e.message, exc_info=True)
- all_purpose_logger.error(response.content)
- break
+ content = await response.json(
+ content_type=content_type, loads=Decoder().decode
+ )
all_purpose_logger.info(
f"Successfully retrieved page of {domain} from {cdx_server} add_info: {args}"
)
break
- except (ClientError, TimeoutError) as e:
+ except (
+ ClientError,
+ TimeoutError,
+ ServerConnectionError,
+ ContentTypeError,
+ ) as e:
reason = f"{type(e)} {str(e)}"
if not should_retry(retry, reason, 500, **args):
break
@@ -251,6 +277,9 @@ async def get_captured_responses(
@staticmethod
async def get_all_CC_indexes(client: ClientSession, cdx_server: str) -> List[str]:
+ """
+ Get all CC index servers from a given CDX server
+ """
for _ in range(3):
async with client.get(cdx_server) as response:
r_json = await response.json(content_type="application/json")
@@ -308,6 +337,9 @@ def init_crawls_queue(
)
async def __prefetch_next_crawl(self) -> int:
+ """
+ Prefetch the next index server
+ """
while len(self.__crawls_remaining) > 0:
next_crawl = self.__crawls_remaining.popleft()
@@ -333,6 +365,9 @@ async def __prefetch_next_crawl(self) -> int:
return 0
async def __await_next_prefetch(self):
+ """
+ Gets the next index retry
+ """
# Wait for the next prefetch to finish
# Don't prefetch if limit is set to avoid overfetching
while len(self.__crawls_remaining) > 0 and (
diff --git a/cmoncrawl/common/types.py b/cmoncrawl/common/types.py
index 65772fa0..eb0a994a 100644
--- a/cmoncrawl/common/types.py
+++ b/cmoncrawl/common/types.py
@@ -12,6 +12,10 @@
@dataclass_json
@dataclass
class DomainRecord:
+ """
+ Domain record.
+ """
+
filename: str
url: str | None
offset: int
@@ -42,6 +46,10 @@ def __post_init__(self):
@dataclass
class RetrieveResponse:
+ """
+ Response from retrieve.
+ """
+
status: int
content: Any
reason: None | str
@@ -49,6 +57,10 @@ class RetrieveResponse:
@dataclass
class DomainCrawl:
+ """
+ Domain crawl.
+ """
+
domain: str = ""
cdx_server: str = ""
page: int = 0
diff --git a/cmoncrawl/integrations/commands.py b/cmoncrawl/integrations/commands.py
index d83e8af3..34ac6d6f 100644
--- a/cmoncrawl/integrations/commands.py
+++ b/cmoncrawl/integrations/commands.py
@@ -10,7 +10,9 @@
def add_args(parser: argparse.ArgumentParser):
- parser.add_argument("--debug", action="store_true", default=False)
+ parser.add_argument(
+ "--debug", action="store_true", default=False, help="Debug mode"
+ )
return parser
@@ -23,7 +25,9 @@ def add_subparsers(parser: Any):
def get_args():
parser = argparse.ArgumentParser()
- subparser = parser.add_subparsers(dest="command", required=True)
+ subparser = parser.add_subparsers(
+ dest="command", required=True, help="Command to run"
+ )
add_subparsers(subparser)
return parser
diff --git a/cmoncrawl/integrations/download.py b/cmoncrawl/integrations/download.py
index f1e2dfe2..faeb3e1e 100644
--- a/cmoncrawl/integrations/download.py
+++ b/cmoncrawl/integrations/download.py
@@ -8,7 +8,7 @@
from cmoncrawl.processor.pipeline.pipeline import ProcessorPipeline
from cmoncrawl.processor.pipeline.streamer import StreamerFileHTML
from cmoncrawl.processor.pipeline.extractor import HTMLExtractor, DomainRecordExtractor
-from cmoncrawl.middleware.synchronized import index_and_extract
+from cmoncrawl.middleware.synchronized import query_and_extract
import argparse
import asyncio
from cmoncrawl.processor.pipeline.streamer import (
@@ -24,35 +24,83 @@ class DownloadOutputFormat(Enum):
def add_mode_args(subparser: Any):
- record_parser = subparser.add_parser(DownloadOutputFormat.RECORD.value)
- record_parser.add_argument("--max_crawls_per_file", type=int, default=500_000)
- subparser.add_parser(DownloadOutputFormat.HTML.value)
+ record_parser = subparser.add_parser(
+ DownloadOutputFormat.RECORD.value,
+ help="Download record files from Common Crawl",
+ )
+ record_parser.add_argument(
+ "--max_crawls_per_file",
+ type=int,
+ default=500_000,
+ help="Max number of domain records per file output",
+ )
+ subparser.add_parser(
+ DownloadOutputFormat.HTML.value, help="Download HTML files from Common Crawl"
+ )
return subparser
def add_args(subparser: Any):
- parser = subparser.add_parser("download")
- parser.add_argument("url")
- parser.add_argument("output", type=Path)
- mode_subparser = parser.add_subparsers(dest="mode", required=True)
+ parser = subparser.add_parser("download", help="Download data from Common Crawl")
+ parser.add_argument("url", type=str, help="URL to query")
+ parser.add_argument("output", type=Path, help="Path to output directory")
+ mode_subparser = parser.add_subparsers(
+ dest="mode", required=True, help="Download mode"
+ )
mode_subparser = add_mode_args(mode_subparser)
- parser.add_argument("--limit", type=int, default=5)
parser.add_argument(
- "--since", type=datetime.fromisoformat, default=str(datetime.min)
+ "--limit", type=int, default=5, help="Max number of urls to download"
+ )
+ parser.add_argument(
+ "--since",
+ type=datetime.fromisoformat,
+ default=str(datetime.min),
+ help="Start date in ISO format e.g. 2020-01-01",
+ )
+ parser.add_argument(
+ "--to",
+ type=datetime.fromisoformat,
+ default=str(datetime.max),
+ help="End date in ISO format e.g. 2020-01-01",
+ )
+ parser.add_argument(
+ "--cc_server",
+ nargs="+",
+ type=str,
+ default=None,
+ help="Common Crawl indexes to query, must provide whole url e.g. https://index.commoncrawl.org/CC-MAIN-2023-14-index",
+ )
+ parser.add_argument(
+ "--max_retry",
+ type=int,
+ default=30,
+ help="Max number of retries for a request, when the requests are failing increase this number",
+ )
+ parser.add_argument(
+ "--sleep_step",
+ type=int,
+ default=4,
+ help="Number of increased second to add to sleep time between each failed download attempt, increase this number if the server tell you to slow down",
)
- parser.add_argument("--to", type=datetime.fromisoformat, default=str(datetime.max))
- parser.add_argument("--cc_server", nargs="+", type=str, default=None)
- parser.add_argument("--max_retry", type=int, default=30)
- parser.add_argument("--sleep_step", type=int, default=4)
# Add option to output to either json or html
parser.add_argument(
"--match_type",
type=MatchType,
choices=list(MatchType.__members__.values()),
- default=MatchType.PREFIX,
+ help="Match type for the url, see cdx-api for more info",
+ )
+ parser.add_argument(
+ "--max_directory_size",
+ type=int,
+ default=1000,
+ help="Max number of files per directory",
+ )
+ parser.add_argument(
+ "--filter_non_200",
+ action="store_true",
+ default=True,
+ help="Filter out non 200 status code",
)
- parser.add_argument("--max_directory_size", type=int, default=1000)
- parser.add_argument("--filter_non_200", action="store_true", default=True)
parser.set_defaults(func=run_download)
@@ -123,7 +171,7 @@ async def url_download(
max_retry=max_retry,
sleep_step=sleep_step,
)
- await index_and_extract(index_agg, pipeline)
+ await query_and_extract(index_agg, pipeline)
def run_download(args: argparse.Namespace):
diff --git a/cmoncrawl/integrations/extract.py b/cmoncrawl/integrations/extract.py
index efadcce5..25bad429 100644
--- a/cmoncrawl/integrations/extract.py
+++ b/cmoncrawl/integrations/extract.py
@@ -27,30 +27,72 @@ class ExtractMode(Enum):
def add_mode_args(subparser: Any):
- record_parser = subparser.add_parser(ExtractMode.RECORD.value)
- record_parser.add_argument("--max_retry", type=int, default=30)
- record_parser.add_argument("--sleep_step", type=int, default=4)
+ record_parser = subparser.add_parser(
+ ExtractMode.RECORD.value, help="Extract data from jsonl record files"
+ )
+ record_parser.add_argument(
+ "--max_retry", type=int, default=30, help="Max number of warc download attempts"
+ )
+ record_parser.add_argument(
+ "--sleep_step",
+ type=int,
+ default=4,
+ help="Number of increased second to add to sleep time between each failed download attempt",
+ )
- html_parser = subparser.add_parser(ExtractMode.HTML.value)
+ html_parser = subparser.add_parser(
+ ExtractMode.HTML.value, help="Extract data from HTML files"
+ )
html_parser.add_argument(
- "--date", type=datetime.fromisoformat, default=str(datetime.now())
+ "--date",
+ type=datetime.fromisoformat,
+ default=str(datetime.now()),
+ help="Date of extraction of HTML files in iso format e.g. 2021-01-01, default is today",
+ )
+ html_parser.add_argument(
+ "--url",
+ type=str,
+ default="",
+ help="URL from which the HTML files were downloaded, by default it will try to infer from file content",
)
- html_parser.add_argument("--url", type=str, default="")
return subparser
def add_args(subparser: Any):
- parser = subparser.add_parser("extract")
+ parser = subparser.add_parser(
+ "extract", help="Extract data from records/html files"
+ )
parser.add_argument(
"config_path",
type=Path,
+ help="Path to config file containing extraction rules",
+ )
+ parser.add_argument("output_path", type=Path, help="Path to output directory")
+ parser.add_argument(
+ "files", nargs="+", type=Path, help="Files to extract data from"
+ )
+ parser.add_argument(
+ "--max_crawls_per_file",
+ type=int,
+ default=500_000,
+ help="Max number of extractions per file output",
+ )
+ parser.add_argument(
+ "--max_directory_size",
+ type=int,
+ default=1000,
+ help="Max number of extraction files per directory",
+ )
+ parser.add_argument(
+ "--n_proc",
+ type=int,
+ default=1,
+ help="Number of processes to use for extraction. The paralelization is on file level, thus for single file it's useless to use more than one process.",
+ )
+
+ mode_subparser = parser.add_subparsers(
+ dest="mode", required=True, help="Extraction mode"
)
- parser.add_argument("output_path", type=Path)
- parser.add_argument("files", nargs="+", type=Path)
- parser.add_argument("--max_crawls_per_file", type=int, default=500_000)
- parser.add_argument("--max_directory_size", type=int, default=1000)
- parser.add_argument("--n_proc", type=int, default=1)
- mode_subparser = parser.add_subparsers(dest="mode", required=True)
mode_subparser = add_mode_args(mode_subparser)
parser.set_defaults(func=run_extract)
@@ -91,7 +133,7 @@ def get_domain_records_html(
url: str | None, date: datetime | None
) -> List[Tuple[DomainRecord, Dict[str, Any]]]:
# Just return dummy as correct crawl will be loaded from dummy downloader
- return [DomainRecord("", url=url, offset=0, length=0, timestamp=date), {}]
+ return [(DomainRecord("", url=url, offset=0, length=0, timestamp=date), {})]
def load_config(config_path: Path) -> ExtractConfig:
diff --git a/cmoncrawl/middleware/stompware.py b/cmoncrawl/middleware/stompware.py
index 59140885..0f054fe2 100644
--- a/cmoncrawl/middleware/stompware.py
+++ b/cmoncrawl/middleware/stompware.py
@@ -35,6 +35,23 @@ class ListnerStats:
class ArtemisAggregator:
+ """
+ Aggregator that listens queries the common crawl index and sends the results to a queue
+ using the stomp protocol. It the creates a queue
+ with name `queue.{url}` and sends the results to it.
+ It also creates a topic with name `topic.poisson_pill.{url}`
+ and sends a message with type `poisson_pill` to it when it finishes.
+
+ Args:
+ queue_host (str): The host of the queue
+ queue_port (int): The port of the queue
+ url (str): The url of the queue
+ index_agg (IndexAggregator): The index aggregator
+ heartbeat (int, optional): The heartbeat of the connection. Defaults to 10000.
+
+
+ """
+
def __init__(
self,
queue_host: str,
@@ -59,6 +76,11 @@ def _init_connection(self):
return conn
async def aggregate(self, filter_duplicates: bool = True):
+ """
+ Aggregates the results of the index aggregator and sends them to the queue.
+ If `filter_duplicates` is True, it will use the `DUPL_ID_HEADER` header,
+ which Artemis uses to filter duplicates.
+ """
while True:
try:
conn = self._init_connection()
@@ -104,6 +126,24 @@ async def aggregate(self, filter_duplicates: bool = True):
class ArtemisProcessor:
+ """
+ Processor that listens to a queues and processes the messages using a pipeline.
+ When it receives a message with type enough `poisson_pill` messages, it will
+ stop listening if it doesn't receive any messages for `timeout` minutes.
+
+
+ Args:
+ queue_host (str): The host of the queue
+ queue_port (int): The port of the queue
+ pills_to_die (int, optional): The number of `poisson_pill` messages to receive before dying. Defaults to None.
+ queue_size (int): The size of the queue
+ timeout (int): The timeout in minutes
+ addresses (List[str]): The addresses of the queues
+ pipeline (ProcessorPipeline): The pipeline to use for processing
+ heartbeat (int, optional): The heartbeat of the connection. Defaults to 10000.
+
+ """
+
def __init__(
self,
queue_host: str,
diff --git a/cmoncrawl/middleware/synchronized.py b/cmoncrawl/middleware/synchronized.py
index bb300ea1..74db062f 100644
--- a/cmoncrawl/middleware/synchronized.py
+++ b/cmoncrawl/middleware/synchronized.py
@@ -9,11 +9,22 @@
import asyncio
-async def index_and_extract(
+async def query_and_extract(
index_agg: IndexAggregator,
pipeline: ProcessorPipeline,
filter_non_unique_url: bool = False,
):
+ """
+ Query the index and extracts the results using the pipeline
+
+ Args:
+ index_agg (IndexAggregator): Index aggregator
+ pipeline (ProcessorPipeline): Pipeline to use
+ filter_non_unique_url (bool, optional): Filter non unique urls.
+ if True, only first successful extraction of a url will be processed,
+ the rest will be skipped. Defaults to False.
+
+ """
processed_urls: Set[str] = set()
total_extracted: int = 0
@@ -28,6 +39,7 @@ async def index_and_extract(
try:
await pipeline.process_domain_record(domain_record, {})
total_extracted += 1
+ processed_urls.add(unify_url_id(url))
except KeyboardInterrupt as e:
break
@@ -35,13 +47,12 @@ async def index_and_extract(
all_purpose_logger.error(
f"Failed to process {domain_record.url} with {e}"
)
- continue
- processed_urls.add(unify_url_id(url))
finally:
if hasattr(pipeline.downloader, "__aexit__"):
await pipeline.downloader.__aexit__(None, None, None)
all_purpose_logger.info(f"Extracted {total_extracted} urls")
+ return processed_urls
async def _extract_task(
@@ -66,8 +77,18 @@ async def extract(
records: List[Tuple[DomainRecord, Dict[str, Any]]],
pipeline: ProcessorPipeline,
concurrent_length: int = 20,
- timeout: int = 5,
):
+ """
+ Extracts the records using the pipeline, with at most `concurrent_length`
+ records being processed at the same time.
+
+ Args:
+ records (List[Tuple[DomainRecord, Dict[str, Any]]]): List of records to process and additional info
+ pipeline (ProcessorPipeline): Pipeline to use
+ concurrent_length (int, optional): Number of concurrent records to process.
+ Defaults to 20.
+
+ """
domain_records_iterator = iter(tqdm(records))
domains_exausted = False
total_extracted: int = 0
@@ -90,9 +111,7 @@ async def extract(
)
)
- done, queue = await asyncio.wait(
- queue, timeout=timeout, return_when=asyncio.FIRST_COMPLETED
- )
+ done, queue = await asyncio.wait(queue, return_when=asyncio.FIRST_COMPLETED)
for task in done:
try:
await task
diff --git a/cmoncrawl/processor/extraction/filters.py b/cmoncrawl/processor/extraction/filters.py
index 5e0e6405..4469c63f 100644
--- a/cmoncrawl/processor/extraction/filters.py
+++ b/cmoncrawl/processor/extraction/filters.py
@@ -7,6 +7,11 @@ def must_exist_filter(soup: BeautifulSoup, filter_list: List[str]):
This function takes in a BeautifulSoup object and a list of
CSS selectors.
If all selectors are found in the soup, this function returns True.
+
+ Args:
+ soup (BeautifulSoup): BeautifulSoup object
+ filter_list (List[str]): List of CSS selectors
+
"""
must_exist = [soup.select_one(css_selector) for css_selector in filter_list]
if any(map(lambda x: x is None, must_exist)):
@@ -20,6 +25,9 @@ def must_not_exist_filter(soup: BeautifulSoup, filter_list: List[str]):
This function takes in a BeautifulSoup object and a list of
CSS selectors.
If any selector is found in the soup, this function returns False.
+ Args:
+ soup (BeautifulSoup): BeautifulSoup object
+ filter_list (List[str]): List of CSS selectors
"""
must_not_exist = [soup.select_one(css_selector) for css_selector in filter_list]
if any(map(lambda x: x is not None, must_not_exist)):
diff --git a/cmoncrawl/processor/extraction/utils.py b/cmoncrawl/processor/extraction/utils.py
index 6852308e..b0b4ffc1 100644
--- a/cmoncrawl/processor/extraction/utils.py
+++ b/cmoncrawl/processor/extraction/utils.py
@@ -18,6 +18,15 @@
def get_tag_transform(tag_desc: str):
+ """
+ Returns a function that takes a bs4 tag and returns the first tag
+ that matches the tag_desc.
+
+ Args:
+ tag_desc (str): CSS selector
+
+ """
+
def transform(tag: Tag):
return tag.select_one(tag_desc)
@@ -25,6 +34,15 @@ def transform(tag: Tag):
def get_tags_transform(tag_desc: str):
+ """
+ Returns a function that takes a bs4 tag and returns a list of tags
+ that match the tag_desc.
+
+ Args:
+ tag_desc (str): CSS selector
+
+ """
+
def transform(tag: Tag):
return tag.select(tag_desc)
@@ -32,6 +50,14 @@ def transform(tag: Tag):
def get_attribute_transform(attr_name: str):
+ """
+ Returns a function that takes a bs4 tag and returns the value
+ of the attribute `attr_name` or None if the attribute doesn't exist.
+
+ Args:
+ attr_name (str): Name of the attribute to get from the tag
+ """
+
def transform(tag: Tag):
return tag.get(attr_name, None)
@@ -39,6 +65,17 @@ def transform(tag: Tag):
def get_text_transform(tag: Tag, recursive: bool = True):
+ """
+ Returns text from tag. If recursive is True then
+ all text from all children is returned.
+
+ Args:
+ tag (Tag): bs4 tag
+ recursive (bool, optional): If True then all text from all children is returned. Defaults to True.
+
+
+ """
+
if recursive:
return tag.text
tag_text = tag.find(text=True, recursive=False)
@@ -49,6 +86,14 @@ def get_text_transform(tag: Tag, recursive: bool = True):
def get_text_list_transform(sep: str = ""):
+ """
+ Returns a function that takes a list of bs4 tags and returns
+ a string with all the text from the tags joined with `sep`.
+
+ Args:
+ sep (str, optional): Separator to use when joining the text. Defaults to "".
+ """
+
def transform(tag: List[Tag]):
return sep.join([tag.text for tag in tag])
@@ -58,10 +103,30 @@ def transform(tag: List[Tag]):
def all_same_transform(
dict: Dict[str, Any], fc: Callable[[Any], Any] | List[Callable[[Any], Any]]
):
+ """
+ Applies `fc` to all values in `dict` and returns a dict with same keys
+ but with transformed values.
+
+ Args:
+ dict (Dict[str, Any]): Dict to transform.
+ fc (Callable[[Any], Any] | List[Callable[[Any], Any]]): Function to apply to all values in dict.
+
+
+ """
return {key: fc for key in dict.keys()}
def chain_transforms(trans: List[Callable[[Any], Any]]):
+ """
+ Chains transforms together. If any of the transforms returns None
+ the chain is broken and None is returned.
+
+ Args:
+ trans (List[Callable[[Any], Any]]): List of transforms to chain together.
+
+
+ """
+
def inner(initial_value: Any):
result = initial_value
for fc in trans:
@@ -77,6 +142,17 @@ def transform(
dict: Dict[str, Any],
transforms: Dict[str, Callable[[Any], Any] | List[Callable[[Any], Any]]],
):
+ """
+ Transforms dict using `transforms` dict. `transforms` dict is of format
+ `{key: [transform1, transform2, ...]}` where transform is a function that takes previous value
+
+ Args:
+ dict (Dict[str, Any]): Dict to transform.
+ transforms (Dict[str, Callable[[Any], Any] | List[Callable[[Any], Any]]]): Dict defining
+ how to transform the dict. Format is "{name: [transform1, transform2, ...]}" where
+ transform is a function that takes previous value and returns new value.
+ """
+
def transform_fc(key: str, value: Any):
key_trans = transforms.get(key, [])
if not isinstance(key_trans, list):
@@ -97,6 +173,19 @@ def extract_transform(
str, Callable[[Any], Any] | List[Callable[[Any], Any]]
],
) -> Dict[str, Any]:
+ """
+ Extracts data from tag using `extract_dict` defining what to extract and how to name it,
+ and `extract_transform_dict` defining how to transform the extracted data.
+
+ Args:
+ tag (Tag | None): Tag to extract data from.
+ extract_dict (Dict[str, str]): Dict defining what to extract and how to name it. format
+ is `{"name": "css selector"}`.
+ extract_transform_dict (Dict[str, Callable[[Any], Any] | List[Callable[[Any], Any]]]): Dict
+ defining how to transform the extracted data. Format is "{name: [transform1, transform2, ...]}"
+ where transform is a function that takes previous value and returns new value.
+ """
+
if tag is None:
return dict()
@@ -109,6 +198,13 @@ def extract_transform(
def combine_dicts(dicts: List[Dict[str, Any]]):
+ """
+ Combines list of dictioneries into one. If there are multiple values for the same key
+ then the first one that is not None is chosen.
+
+ Args:
+ dicts (List[Dict[str, Any]]): List of dicts to combine.
+ """
# Combines dicts choose the first one that is not None.
def recursive_get(key: str, dicts: List[Dict[str, Any]], i: int) -> Any:
if i >= len(dicts):
@@ -125,6 +221,20 @@ def recursive_get(key: str, dicts: List[Dict[str, Any]], i: int) -> Any:
def check_required(
required_fields: Dict[str, bool], extractor_name: str, non_empty: bool = False
):
+ """
+ Checks if required fields are present in the extracted dict.
+
+ Args:
+ required_fields (Dict[str, bool]): Dict of required fields if defining which
+ fields must be present and which can be None.
+
+ extractor_name (str): Name of the extractor for logging purposes.
+
+ non_empty (bool, optional): If True then empty strings and empty lists are considered
+ as not present. Defaults to False.
+
+ """
+
def inner(extracted_dict: Dict[Any, Any], metadata: PipeMetadata):
for key, value in required_fields.items():
if key not in extracted_dict:
diff --git a/cmoncrawl/processor/pipeline/downloader.py b/cmoncrawl/processor/pipeline/downloader.py
index d11d4f07..3753e65b 100644
--- a/cmoncrawl/processor/pipeline/downloader.py
+++ b/cmoncrawl/processor/pipeline/downloader.py
@@ -6,7 +6,7 @@
import random
import re
from types import TracebackType
-from aiohttp import ClientError, ClientSession
+from aiohttp import ClientError, ClientSession, ContentTypeError, ServerConnectionError
from typing import List, Tuple, Type
from aiofiles import open as asyncOpen
@@ -24,6 +24,10 @@
class IDownloader:
+ """
+ Base class for all downloaders
+ """
+
async def download(
self, domain_record: DomainRecord
) -> (List[Tuple[str, PipeMetadata]]):
@@ -31,6 +35,18 @@ async def download(
class AsyncDownloader(IDownloader):
+ """
+ Downloader which asynchronously downloads the the data for the domain_record
+
+ Args:
+ base_url (str, optional): Base url where to download data from. Defaults to "https://data.commoncrawl.org/".
+ digest_verification (bool, optional): Whether to verify the digest of the downloaded data. Defaults to True.
+ max_retry (int, optional): Maximum number of retries. Defaults to 5.
+ sleep_step (int, optional): Sleep increase time between retries. Defaults to 10.
+ encoding: Default encoding to be used
+
+ """
+
def __init__(
self,
base_url: str = "https://data.commoncrawl.org/",
@@ -87,8 +103,10 @@ def should_retry(retry: int, reason: str, status: int, **args: str):
except (
ClientError,
TimeoutError,
+ ServerConnectionError,
+ ContentTypeError,
) as e:
- if not should_retry(retry, f"{str(e)} {type(e)}", 0):
+ if not should_retry(retry, f"{str(e)} {type(e)}", 500):
raise e
await asyncio.sleep(random.randint(0, (retry + 1) * self.__sleep_step))
ret: List[Tuple[str, PipeMetadata]] = []
@@ -138,6 +156,11 @@ class DownloaderDummy(IDownloader):
Dummy downloader for testing
It doesn't download anything but return files passed in the constructor
and extracts metadata from the file
+
+ Args:
+ files (List[Path]): List of files to return
+ url (str, optional): Url to use for metadata. Defaults to None.
+ date (datetime, optional): Date to add to metadata. Defaults to None.
"""
def __init__(
diff --git a/cmoncrawl/processor/pipeline/extractor.py b/cmoncrawl/processor/pipeline/extractor.py
index c2b94fbd..4405fb33 100644
--- a/cmoncrawl/processor/pipeline/extractor.py
+++ b/cmoncrawl/processor/pipeline/extractor.py
@@ -8,12 +8,32 @@
class IExtractor(ABC):
+ """
+ Base class for all extractors
+ """
+
@abstractmethod
def extract(self, response: str, metadata: PipeMetadata) -> Dict[str, Any] | None:
+ """
+ Extracts the data from the response, if the extractor fails to extract the data it should return None
+ return None
+
+ Args:
+ response (str): response from the downloader
+ metadata (PipeMetadata): Metadata of the response
+ """
raise NotImplementedError()
class BaseExtractor(IExtractor, ABC):
+ """
+ Base class for all soup extractors
+
+ Args:
+ encoding (str, optional): Default encoding to be used. Defaults to None.
+
+ """
+
def __init__(self, encoding: str | None = None):
self.encoding = encoding
@@ -85,6 +105,9 @@ def preprocess(self, response: str, metadata: PipeMetadata) -> str:
class HTMLExtractor(BaseExtractor):
"""
Dummy Extractor which simply extracts the html
+
+ Args:
+ filter_non_ok (bool, optional): If True, only 200 status codes will be extracted. Defaults to True.
"""
def __init__(self, filter_non_ok: bool = True):
@@ -116,7 +139,10 @@ def filter_raw(self, response: str, metadata: PipeMetadata):
class DomainRecordExtractor(BaseExtractor):
"""
- Dummy Extractor which simply extracts the html
+ Dummy Extractor which simply extracts the domain record
+
+ Args:
+ filter_non_ok (bool, optional): If True, only 200 status codes will be extracted. Defaults to True.
"""
def __init__(self, filter_non_ok: bool = True):
diff --git a/cmoncrawl/processor/pipeline/router.py b/cmoncrawl/processor/pipeline/router.py
index 69445317..d8001ed4 100644
--- a/cmoncrawl/processor/pipeline/router.py
+++ b/cmoncrawl/processor/pipeline/router.py
@@ -29,10 +29,17 @@ class Route:
class IRouter(ABC):
+ """
+ Base class for all routers
+ """
+
@abstractmethod
def route(
self, url: str | None, time: datetime | None, metadata: PipeMetadata
) -> IExtractor:
+ """
+ Routes the url to the correct extractor
+ """
raise NotImplementedError()
@@ -56,6 +63,9 @@ def load_module(self, module_path: Path):
return module, module_name
def load_module_as_extractor(self, module_path: Path):
+ """
+ Loads a module and returns its extractor
+ """
module, module_name = self.load_module(module_path)
name: str = getattr(module, "NAME", module_name)
extractor: IExtractor | None = getattr(module, "extractor", None)
@@ -88,6 +98,16 @@ def register_route(
since: datetime | None = None,
to: datetime | None = None,
):
+ """
+ Registers a route for a given extractor name and regex
+
+ Args:
+ name (str): The name of the extractor
+ regex (Union[str, List[str]]): The regex to match against
+ since (datetime | None, optional): The earliest time to route to this extractor. Defaults to None.
+ to (datetime | None, optional): The latest time to route to this extractor. Defaults to None.
+
+ """
if isinstance(regex, str):
regex = [regex]
regex_compiled = [re.compile(regex) for regex in regex]
@@ -121,6 +141,14 @@ def _as_offset_aware(self, time: datetime) -> datetime:
def route(
self, url: str | None, time: datetime | None, metadata: PipeMetadata
) -> IExtractor:
+ """
+ Routes the url to the correct extractor based on the url and time
+
+ Args:
+ url (str | None): The url to route
+ time (datetime | None): The time to route
+ metadata (PipeMetadata): The metadata for the current pipeline
+ """
# check if offset naive datetime if so then convert to utc
if url is None:
raise ValueError("Url must not be None")
diff --git a/docs/build/_templates/module.rst b/docs/build/_templates/module.rst
deleted file mode 100644
index b10fefa4..00000000
--- a/docs/build/_templates/module.rst
+++ /dev/null
@@ -1,66 +0,0 @@
-{{ fullname | escape | underline}}
-
-.. automodule:: {{ fullname }}
-
- {% block attributes %}
- {% if attributes %}
- .. rubric:: Module attributes
-
- .. autosummary::
- :toctree:
- {% for item in attributes %}
- {{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
-
- {% block functions %}
- {% if functions %}
- .. rubric:: {{ _('Functions') }}
-
- .. autosummary::
- :toctree:
- :nosignatures:
- {% for item in functions %}
- {{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
-
- {% block classes %}
- {% if classes %}
- .. rubric:: {{ _('Classes') }}
-
- .. autosummary::
- :toctree:
- :template: class.rst
- :nosignatures:
- {% for item in classes %}
- {{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
-
- {% block exceptions %}
- {% if exceptions %}
- .. rubric:: {{ _('Exceptions') }}
-
- .. autosummary::
- :toctree:
- {% for item in exceptions %}
- {{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
-
-{% block modules %}
-{% if modules %}
-.. autosummary::
- :toctree:
- :template: module.rst
- :recursive:
-{% for item in modules %}
- {{ item }}
-{%- endfor %}
-{% endif %}
-{% endblock %}
\ No newline at end of file
diff --git a/docs/build/doctrees/api.doctree b/docs/build/doctrees/api.doctree
index 95fbc4dc..7b41a2df 100644
Binary files a/docs/build/doctrees/api.doctree and b/docs/build/doctrees/api.doctree differ
diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle
index 4789c48b..17964d75 100644
Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.doctree b/docs/build/doctrees/generated/Aggregator.App.doctree
deleted file mode 100644
index 8a9d83d9..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.DomainCrawl.__init__.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.DomainCrawl.__init__.doctree
deleted file mode 100644
index c4cf45ee..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.DomainCrawl.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.DomainCrawl.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.DomainCrawl.doctree
deleted file mode 100644
index 4f3fd013..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.DomainCrawl.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.DomainRecord.__init__.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.DomainRecord.__init__.doctree
deleted file mode 100644
index 5e1d05d9..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.DomainRecord.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.DomainRecord.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.DomainRecord.doctree
deleted file mode 100644
index 83928956..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.DomainRecord.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.__init__.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.__init__.doctree
deleted file mode 100644
index f5d79072..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.aclose.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.aclose.doctree
deleted file mode 100644
index 147185c3..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.aclose.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.aopen.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.aopen.doctree
deleted file mode 100644
index d5284fa1..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.aopen.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.doctree
deleted file mode 100644
index d5991c3f..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.get_all_CC_indexes.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.get_all_CC_indexes.doctree
deleted file mode 100644
index 9e20071a..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.get_all_CC_indexes.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.get_captured_responses.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.get_captured_responses.doctree
deleted file mode 100644
index 5348badd..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.get_captured_responses.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.get_number_of_pages.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.get_number_of_pages.doctree
deleted file mode 100644
index d36288a1..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.IndexAggregator.get_number_of_pages.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.RetrieveResponse.__init__.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.RetrieveResponse.__init__.doctree
deleted file mode 100644
index eea165f4..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.RetrieveResponse.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.RetrieveResponse.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.RetrieveResponse.doctree
deleted file mode 100644
index b87e2e2f..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.RetrieveResponse.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.index_query.doctree b/docs/build/doctrees/generated/Aggregator.App.index_query.doctree
deleted file mode 100644
index ce3dfc6a..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.index_query.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.__init__.doctree b/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.__init__.doctree
deleted file mode 100644
index 80135542..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.decode.doctree b/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.decode.doctree
deleted file mode 100644
index a1cff4bd..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.decode.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.doctree b/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.doctree
deleted file mode 100644
index b6370185..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.raw_decode.doctree b/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.raw_decode.doctree
deleted file mode 100644
index 2aa49acf..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.Decoder.raw_decode.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.doctree b/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.doctree
deleted file mode 100644
index 42f88e3f..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.ndjson_decoder.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.App.utils.doctree b/docs/build/doctrees/generated/Aggregator.App.utils.doctree
deleted file mode 100644
index 5ff54744..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.App.utils.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.aggregator.doctree b/docs/build/doctrees/generated/Aggregator.aggregator.doctree
deleted file mode 100644
index 48d40e59..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.aggregator.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Aggregator.doctree b/docs/build/doctrees/generated/Aggregator.doctree
deleted file mode 100644
index 97f03677..00000000
Binary files a/docs/build/doctrees/generated/Aggregator.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.__init__.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.__init__.doctree
deleted file mode 100644
index a7f5e39d..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.article_extract.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.article_extract.doctree
deleted file mode 100644
index 40a58a2a..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.article_extract.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.check_required.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.check_required.doctree
deleted file mode 100644
index 0429497b..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.check_required.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_extract.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_extract.doctree
deleted file mode 100644
index ef3b1284..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_extract.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_raw.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_raw.doctree
deleted file mode 100644
index 6a978cfb..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_raw.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_soup.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_soup.doctree
deleted file mode 100644
index 37a94430..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_soup.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.doctree
deleted file mode 100644
index 23199605..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract.doctree
deleted file mode 100644
index 1f2173b4..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract_soup.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract_soup.doctree
deleted file mode 100644
index ae7093bc..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract_soup.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_raw.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_raw.doctree
deleted file mode 100644
index 662f5c20..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_raw.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_soup.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_soup.doctree
deleted file mode 100644
index b0d28d18..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_soup.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.preprocess.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.preprocess.doctree
deleted file mode 100644
index 9b1f7bf5..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.preprocess.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.doctree
deleted file mode 100644
index 38346ba6..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_extractor.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_utils.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_utils.doctree
deleted file mode 100644
index 66f2929f..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.article_utils.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.ArticleUtils.doctree b/docs/build/doctrees/generated/Processor.App.ArticleUtils.doctree
deleted file mode 100644
index b6e15cf9..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.ArticleUtils.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.doctree
deleted file mode 100644
index 1be21036..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.Downloader.__init__.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.Downloader.__init__.doctree
deleted file mode 100644
index 441b50c4..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.Downloader.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.Downloader.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.Downloader.doctree
deleted file mode 100644
index 4a420c04..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.Downloader.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.Downloader.download.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.Downloader.download.doctree
deleted file mode 100644
index eb5bc1eb..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.Downloader.download.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.__init__.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.__init__.doctree
deleted file mode 100644
index 8d2c7344..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.aclose.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.aclose.doctree
deleted file mode 100644
index fc8ac10d..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.aclose.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.aopen.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.aopen.doctree
deleted file mode 100644
index 475aa448..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.aopen.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.doctree
deleted file mode 100644
index dc834f76..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.download.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.download.doctree
deleted file mode 100644
index 5549854e..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.download.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.unwrap.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.unwrap.doctree
deleted file mode 100644
index 360bf494..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.DownloaderFull.unwrap.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.downloader.doctree
deleted file mode 100644
index 7296ad12..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.downloader.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.__init__.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.__init__.doctree
deleted file mode 100644
index c3de8472..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.doctree
deleted file mode 100644
index b1911f6c..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.download.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.download.doctree
deleted file mode 100644
index 3ff8ed54..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.download.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_url.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_url.doctree
deleted file mode 100644
index 26b61be1..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_url.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_year.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_year.doctree
deleted file mode 100644
index cfbd32f8..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_year.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.mine_metadata.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.mine_metadata.doctree
deleted file mode 100644
index 4ee001d2..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.mine_metadata.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.doctree b/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.doctree
deleted file mode 100644
index 1756590b..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Downloader.dummy_downloader.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.doctree
deleted file mode 100644
index c20dbe79..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.__init__.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.__init__.doctree
deleted file mode 100644
index f38c1ede..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.doctree
deleted file mode 100644
index 396e935a..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract.doctree
deleted file mode 100644
index 275321c3..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract_soup.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract_soup.doctree
deleted file mode 100644
index 67dfacbb..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract_soup.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_raw.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_raw.doctree
deleted file mode 100644
index 6b9a0f18..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_raw.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_soup.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_soup.doctree
deleted file mode 100644
index 9e06f2df..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_soup.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.preprocess.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.preprocess.doctree
deleted file mode 100644
index 4fe1f96b..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.Extractor.preprocess.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.doctree
deleted file mode 100644
index 52ea35a9..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.dummy_extractor.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.__init__.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.__init__.doctree
deleted file mode 100644
index a4a76ccb..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.doctree
deleted file mode 100644
index 34acbe56..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.extract.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.extract.doctree
deleted file mode 100644
index 2f5eee7b..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.extract.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.extract_soup.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.extract_soup.doctree
deleted file mode 100644
index ddb17171..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.extract_soup.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_raw.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_raw.doctree
deleted file mode 100644
index 6bc85c78..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_raw.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_soup.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_soup.doctree
deleted file mode 100644
index a21f0a8a..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_soup.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.preprocess.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.preprocess.doctree
deleted file mode 100644
index 77c26130..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.BaseExtractor.preprocess.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.extractor.doctree
deleted file mode 100644
index e1826168..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.extractor.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Extractor.extractor_utils.doctree b/docs/build/doctrees/generated/Processor.App.Extractor.extractor_utils.doctree
deleted file mode 100644
index fb99fee5..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Extractor.extractor_utils.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.doctree
deleted file mode 100644
index b76245e1..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.__init__.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.__init__.doctree
deleted file mode 100644
index ffd7567d..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.clean_up.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.clean_up.doctree
deleted file mode 100644
index 197a910a..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.clean_up.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.doctree
deleted file mode 100644
index 59d1a0fd..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.stream.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.stream.doctree
deleted file mode 100644
index f052a232..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.stream.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.doctree
deleted file mode 100644
index b7505f89..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.dummy_streamer.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.__init__.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.__init__.doctree
deleted file mode 100644
index ff149b6d..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.clean_up.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.clean_up.doctree
deleted file mode 100644
index ff4cd35c..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.clean_up.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.doctree
deleted file mode 100644
index 2c73c738..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.stream.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.stream.doctree
deleted file mode 100644
index 256048bf..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.stream.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.doctree
deleted file mode 100644
index 16675dcb..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.outstreamer.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.__init__.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.__init__.doctree
deleted file mode 100644
index 47cbf438..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.clean_up.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.clean_up.doctree
deleted file mode 100644
index 1d6a449f..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.clean_up.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.doctree
deleted file mode 100644
index f5e24d68..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.get_file_name.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.get_file_name.doctree
deleted file mode 100644
index 041a1771..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.get_file_name.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.metadata_to_string.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.metadata_to_string.doctree
deleted file mode 100644
index 082285bc..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.metadata_to_string.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.stream.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.stream.doctree
deleted file mode 100644
index da5e7065..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.stream.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.__init__.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.__init__.doctree
deleted file mode 100644
index a96eb078..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.clean_up.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.clean_up.doctree
deleted file mode 100644
index 803da1c2..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.clean_up.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.doctree
deleted file mode 100644
index f98a4e89..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.get_file_name.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.get_file_name.doctree
deleted file mode 100644
index 94b85f29..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.get_file_name.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.metadata_to_string.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.metadata_to_string.doctree
deleted file mode 100644
index e235b2e7..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.metadata_to_string.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.stream.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.stream.doctree
deleted file mode 100644
index b46ce5d4..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.stream.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.__init__.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.__init__.doctree
deleted file mode 100644
index 24861d2b..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.clean_up.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.clean_up.doctree
deleted file mode 100644
index 84f81b9b..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.clean_up.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.doctree
deleted file mode 100644
index e6b71a0e..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.get_file_name.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.get_file_name.doctree
deleted file mode 100644
index 7b261d9e..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.get_file_name.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.metadata_to_string.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.metadata_to_string.doctree
deleted file mode 100644
index 42be412d..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.metadata_to_string.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.stream.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.stream.doctree
deleted file mode 100644
index 9810a4ff..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.stream.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.doctree b/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.doctree
deleted file mode 100644
index c0bca8f6..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.OutStreamer.stream_to_file.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Pipeline.doctree b/docs/build/doctrees/generated/Processor.App.Pipeline.doctree
deleted file mode 100644
index 08915d93..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Pipeline.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.__init__.doctree b/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.__init__.doctree
deleted file mode 100644
index 277eb958..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.doctree b/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.doctree
deleted file mode 100644
index 67e3b0e6..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.process_domain_record.doctree b/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.process_domain_record.doctree
deleted file mode 100644
index d77a3264..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.process_domain_record.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.doctree b/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.doctree
deleted file mode 100644
index cc6a0c76..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Pipeline.pipeline.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Router.doctree b/docs/build/doctrees/generated/Processor.App.Router.doctree
deleted file mode 100644
index fe17917f..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Router.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.Route.__init__.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.Route.__init__.doctree
deleted file mode 100644
index 42ea51e0..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.Route.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.Route.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.Route.doctree
deleted file mode 100644
index dc3dcb4b..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.Route.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.Router.__init__.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.Router.__init__.doctree
deleted file mode 100644
index 8a2b0c24..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.Router.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.Router.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.Router.doctree
deleted file mode 100644
index bde97845..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.Router.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.Router.load_module.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.Router.load_module.doctree
deleted file mode 100644
index b3d225d0..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.Router.load_module.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.Router.load_modules.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.Router.load_modules.doctree
deleted file mode 100644
index 371e2087..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.Router.load_modules.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.Router.register_route.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.Router.register_route.doctree
deleted file mode 100644
index 4a1f1184..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.Router.register_route.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.Router.register_routes.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.Router.register_routes.doctree
deleted file mode 100644
index 87072183..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.Router.register_routes.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.Router.route.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.Router.route.doctree
deleted file mode 100644
index 63d1e5c0..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.Router.route.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.Router.router.doctree b/docs/build/doctrees/generated/Processor.App.Router.router.doctree
deleted file mode 100644
index 7aa3ad47..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.Router.router.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.doctree b/docs/build/doctrees/generated/Processor.App.doctree
deleted file mode 100644
index 0d9c9c84..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.processor_utils.DomainRecord.__init__.doctree b/docs/build/doctrees/generated/Processor.App.processor_utils.DomainRecord.__init__.doctree
deleted file mode 100644
index 1619f359..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.processor_utils.DomainRecord.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.processor_utils.DomainRecord.doctree b/docs/build/doctrees/generated/Processor.App.processor_utils.DomainRecord.doctree
deleted file mode 100644
index aa81adcf..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.processor_utils.DomainRecord.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.processor_utils.PipeMetadata.__init__.doctree b/docs/build/doctrees/generated/Processor.App.processor_utils.PipeMetadata.__init__.doctree
deleted file mode 100644
index 27abae50..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.processor_utils.PipeMetadata.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.processor_utils.PipeMetadata.doctree b/docs/build/doctrees/generated/Processor.App.processor_utils.PipeMetadata.doctree
deleted file mode 100644
index 4ed8a9b6..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.processor_utils.PipeMetadata.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.App.processor_utils.doctree b/docs/build/doctrees/generated/Processor.App.processor_utils.doctree
deleted file mode 100644
index 20a5deb2..00000000
Binary files a/docs/build/doctrees/generated/Processor.App.processor_utils.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.doctree b/docs/build/doctrees/generated/Processor.doctree
deleted file mode 100644
index 7829de79..00000000
Binary files a/docs/build/doctrees/generated/Processor.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.process_article.doctree b/docs/build/doctrees/generated/Processor.process_article.doctree
deleted file mode 100644
index 541faf14..00000000
Binary files a/docs/build/doctrees/generated/Processor.process_article.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.__init__.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.__init__.doctree
deleted file mode 100644
index 15c2ea3b..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.doctree
deleted file mode 100644
index 71dcd299..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_before_message.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_before_message.doctree
deleted file mode 100644
index 65480dbd..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_before_message.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_connected.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_connected.doctree
deleted file mode 100644
index 0548d732..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_connected.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_connecting.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_connecting.doctree
deleted file mode 100644
index ea14ea3a..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_connecting.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_disconnected.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_disconnected.doctree
deleted file mode 100644
index 229773a1..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_disconnected.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_disconnecting.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_disconnecting.doctree
deleted file mode 100644
index 901aed43..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_disconnecting.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_error.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_error.doctree
deleted file mode 100644
index 9a89bd63..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_error.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_heartbeat.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_heartbeat.doctree
deleted file mode 100644
index 515d6b6c..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_heartbeat.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_heartbeat_timeout.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_heartbeat_timeout.doctree
deleted file mode 100644
index 6cdf623e..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_heartbeat_timeout.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_message.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_message.doctree
deleted file mode 100644
index 2ff64cc0..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_message.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_receipt.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_receipt.doctree
deleted file mode 100644
index ea7fc41b..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_receipt.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_receiver_loop_completed.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_receiver_loop_completed.doctree
deleted file mode 100644
index 96857c8e..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_receiver_loop_completed.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.Listener.on_send.doctree b/docs/build/doctrees/generated/Processor.processor.Listener.on_send.doctree
deleted file mode 100644
index 68ac1b4d..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.Listener.on_send.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.ListnerStats.__init__.doctree b/docs/build/doctrees/generated/Processor.processor.ListnerStats.__init__.doctree
deleted file mode 100644
index 38ac0c49..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.ListnerStats.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.ListnerStats.doctree b/docs/build/doctrees/generated/Processor.processor.ListnerStats.doctree
deleted file mode 100644
index e2aa4cbd..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.ListnerStats.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.Message.__init__.doctree b/docs/build/doctrees/generated/Processor.processor.Message.__init__.doctree
deleted file mode 100644
index 57d734d8..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.Message.__init__.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.Message.doctree b/docs/build/doctrees/generated/Processor.processor.Message.doctree
deleted file mode 100644
index a5db1259..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.Message.doctree and /dev/null differ
diff --git a/docs/build/doctrees/generated/Processor.processor.doctree b/docs/build/doctrees/generated/Processor.processor.doctree
deleted file mode 100644
index 15cf4376..00000000
Binary files a/docs/build/doctrees/generated/Processor.processor.doctree and /dev/null differ
diff --git a/docs/build/doctrees/index.doctree b/docs/build/doctrees/index.doctree
index bdb9102b..68f64384 100644
Binary files a/docs/build/doctrees/index.doctree and b/docs/build/doctrees/index.doctree differ
diff --git a/docs/build/doctrees/installation.doctree b/docs/build/doctrees/installation.doctree
deleted file mode 100644
index 234c09af..00000000
Binary files a/docs/build/doctrees/installation.doctree and /dev/null differ
diff --git a/docs/build/doctrees/quickstart/artemis-queue.doctree b/docs/build/doctrees/quickstart/artemis-queue.doctree
deleted file mode 100644
index e05e3710..00000000
Binary files a/docs/build/doctrees/quickstart/artemis-queue.doctree and /dev/null differ
diff --git a/docs/build/doctrees/quickstart/download_article.doctree b/docs/build/doctrees/quickstart/download_article.doctree
deleted file mode 100644
index a5a2b00b..00000000
Binary files a/docs/build/doctrees/quickstart/download_article.doctree and /dev/null differ
diff --git a/docs/build/doctrees/quickstart/index.doctree b/docs/build/doctrees/quickstart/index.doctree
deleted file mode 100644
index 86cb124d..00000000
Binary files a/docs/build/doctrees/quickstart/index.doctree and /dev/null differ
diff --git a/docs/build/doctrees/quickstart/installation.doctree b/docs/build/doctrees/quickstart/installation.doctree
deleted file mode 100644
index 33fe40fe..00000000
Binary files a/docs/build/doctrees/quickstart/installation.doctree and /dev/null differ
diff --git a/docs/build/doctrees/quickstart/middleware.doctree b/docs/build/doctrees/quickstart/middleware.doctree
deleted file mode 100644
index 28f501cb..00000000
Binary files a/docs/build/doctrees/quickstart/middleware.doctree and /dev/null differ
diff --git a/docs/build/doctrees/quickstart/overview.doctree b/docs/build/doctrees/quickstart/overview.doctree
deleted file mode 100644
index 34dd3019..00000000
Binary files a/docs/build/doctrees/quickstart/overview.doctree and /dev/null differ
diff --git a/docs/build/doctrees/quickstart/quick-start.doctree b/docs/build/doctrees/quickstart/quick-start.doctree
deleted file mode 100644
index 58ae2918..00000000
Binary files a/docs/build/doctrees/quickstart/quick-start.doctree and /dev/null differ
diff --git a/docs/build/html/.buildinfo b/docs/build/html/.buildinfo
index ed36e0d1..251c030d 100644
--- a/docs/build/html/.buildinfo
+++ b/docs/build/html/.buildinfo
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: f83e2738440146eb7aee758ea06be062
+config: 455dcb0e361cb666a31520aa4f46b2fc
tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/docs/build/html/_sources/api.rst.txt b/docs/build/html/_sources/api.rst.txt
index 76c4a0e7..2ed3eeb5 100644
--- a/docs/build/html/_sources/api.rst.txt
+++ b/docs/build/html/_sources/api.rst.txt
@@ -6,8 +6,7 @@ API
:toctree: generated
- Aggregator
- Processor
+ cmoncrawl
diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainCrawl.__init__.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainCrawl.__init__.rst.txt
deleted file mode 100644
index 85e4d70b..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainCrawl.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Aggregator.App.index\_query.DomainCrawl.\_\_init\_\_
-====================================================
-
-.. currentmodule:: Aggregator.App.index_query
-
-.. automethod:: DomainCrawl.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainCrawl.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainCrawl.rst.txt
deleted file mode 100644
index f72f8295..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainCrawl.rst.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Aggregator.App.index\_query.DomainCrawl
-=======================================
-
-.. currentmodule:: Aggregator.App.index_query
-
-.. autoclass:: DomainCrawl
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~DomainCrawl.__init__
-
-
-
-
-
- .. rubric:: Attributes
-
- .. autosummary::
-
- ~DomainCrawl.cdx_server
- ~DomainCrawl.domain
- ~DomainCrawl.page
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainRecord.__init__.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainRecord.__init__.rst.txt
deleted file mode 100644
index ad2f70fa..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainRecord.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Aggregator.App.index\_query.DomainRecord.\_\_init\_\_
-=====================================================
-
-.. currentmodule:: Aggregator.App.index_query
-
-.. automethod:: DomainRecord.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainRecord.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainRecord.rst.txt
deleted file mode 100644
index 15196f89..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.index_query.DomainRecord.rst.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Aggregator.App.index\_query.DomainRecord
-========================================
-
-.. currentmodule:: Aggregator.App.index_query
-
-.. autoclass:: DomainRecord
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~DomainRecord.__init__
-
-
-
-
-
- .. rubric:: Attributes
-
- .. autosummary::
-
- ~DomainRecord.digest
- ~DomainRecord.encoding
- ~DomainRecord.timestamp
- ~DomainRecord.filename
- ~DomainRecord.url
- ~DomainRecord.offset
- ~DomainRecord.length
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.__init__.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.__init__.rst.txt
deleted file mode 100644
index 9bf74300..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Aggregator.App.index\_query.IndexAggregator.\_\_init\_\_
-========================================================
-
-.. currentmodule:: Aggregator.App.index_query
-
-.. automethod:: IndexAggregator.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.aclose.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.aclose.rst.txt
deleted file mode 100644
index 3de6bcdf..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.aclose.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Aggregator.App.index\_query.IndexAggregator.aclose
-==================================================
-
-.. currentmodule:: Aggregator.App.index_query
-
-.. automethod:: IndexAggregator.aclose
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.aopen.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.aopen.rst.txt
deleted file mode 100644
index f62b5568..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.aopen.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Aggregator.App.index\_query.IndexAggregator.aopen
-=================================================
-
-.. currentmodule:: Aggregator.App.index_query
-
-.. automethod:: IndexAggregator.aopen
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.get_all_CC_indexes.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.get_all_CC_indexes.rst.txt
deleted file mode 100644
index 49c1a573..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.get_all_CC_indexes.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Aggregator.App.index\_query.IndexAggregator.get\_all\_CC\_indexes
-=================================================================
-
-.. currentmodule:: Aggregator.App.index_query
-
-.. automethod:: IndexAggregator.get_all_CC_indexes
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.get_captured_responses.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.get_captured_responses.rst.txt
deleted file mode 100644
index 61a404c4..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.get_captured_responses.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Aggregator.App.index\_query.IndexAggregator.get\_captured\_responses
-====================================================================
-
-.. currentmodule:: Aggregator.App.index_query
-
-.. automethod:: IndexAggregator.get_captured_responses
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.get_number_of_pages.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.get_number_of_pages.rst.txt
deleted file mode 100644
index 2a2427bd..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.get_number_of_pages.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Aggregator.App.index\_query.IndexAggregator.get\_number\_of\_pages
-==================================================================
-
-.. currentmodule:: Aggregator.App.index_query
-
-.. automethod:: IndexAggregator.get_number_of_pages
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.rst.txt
deleted file mode 100644
index 33ebd99b..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.index_query.IndexAggregator.rst.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-Aggregator.App.index\_query.IndexAggregator
-===========================================
-
-.. currentmodule:: Aggregator.App.index_query
-
-.. autoclass:: IndexAggregator
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~IndexAggregator.__init__
- ~IndexAggregator.aclose
- ~IndexAggregator.aopen
- ~IndexAggregator.get_all_CC_indexes
- ~IndexAggregator.get_captured_responses
- ~IndexAggregator.get_number_of_pages
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.RetrieveResponse.__init__.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.RetrieveResponse.__init__.rst.txt
deleted file mode 100644
index 90655f25..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.index_query.RetrieveResponse.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Aggregator.App.index\_query.RetrieveResponse.\_\_init\_\_
-=========================================================
-
-.. currentmodule:: Aggregator.App.index_query
-
-.. automethod:: RetrieveResponse.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.RetrieveResponse.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.RetrieveResponse.rst.txt
deleted file mode 100644
index 49c3fbaa..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.index_query.RetrieveResponse.rst.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Aggregator.App.index\_query.RetrieveResponse
-============================================
-
-.. currentmodule:: Aggregator.App.index_query
-
-.. autoclass:: RetrieveResponse
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~RetrieveResponse.__init__
-
-
-
-
-
- .. rubric:: Attributes
-
- .. autosummary::
-
- ~RetrieveResponse.status
- ~RetrieveResponse.content
- ~RetrieveResponse.reason
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Aggregator.App.index_query.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.index_query.rst.txt
deleted file mode 100644
index 21d0ce61..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.index_query.rst.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-Aggregator.App.index\_query
-===========================
-
-.. automodule:: Aggregator.App.index_query
-
-
-
-
-
-
-
- .. rubric:: Functions
-
- .. autosummary::
-
- crawl_to_year
- timestamp_to_datetime
- to_timestamp_format
-
-
-
-
-
- .. rubric:: Classes
-
- .. autosummary::
- :toctree:
-
- DomainCrawl
- DomainRecord
- IndexAggregator
- RetrieveResponse
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.__init__.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.__init__.rst.txt
deleted file mode 100644
index 9519a827..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Aggregator.App.ndjson\_decoder.Decoder.\_\_init\_\_
-===================================================
-
-.. currentmodule:: Aggregator.App.ndjson_decoder
-
-.. automethod:: Decoder.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.decode.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.decode.rst.txt
deleted file mode 100644
index 347d669d..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.decode.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Aggregator.App.ndjson\_decoder.Decoder.decode
-=============================================
-
-.. currentmodule:: Aggregator.App.ndjson_decoder
-
-.. automethod:: Decoder.decode
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.raw_decode.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.raw_decode.rst.txt
deleted file mode 100644
index d8582fe2..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.raw_decode.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Aggregator.App.ndjson\_decoder.Decoder.raw\_decode
-==================================================
-
-.. currentmodule:: Aggregator.App.ndjson_decoder
-
-.. automethod:: Decoder.raw_decode
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.rst.txt
deleted file mode 100644
index 38606e34..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.Decoder.rst.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Aggregator.App.ndjson\_decoder.Decoder
-======================================
-
-.. currentmodule:: Aggregator.App.ndjson_decoder
-
-.. autoclass:: Decoder
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~Decoder.__init__
- ~Decoder.decode
- ~Decoder.raw_decode
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.rst.txt
deleted file mode 100644
index e23af3d3..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.ndjson_decoder.rst.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Aggregator.App.ndjson\_decoder
-==============================
-
-.. automodule:: Aggregator.App.ndjson_decoder
-
-
-
-
-
-
-
-
-
-
-
- .. rubric:: Classes
-
- .. autosummary::
- :toctree:
-
- Decoder
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Aggregator.App.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.rst.txt
deleted file mode 100644
index d895b720..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.rst.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Aggregator.App
-==============
-
-.. automodule:: Aggregator.App
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-.. rubric:: Modules
-
-.. autosummary::
- :toctree:
- :recursive:
-
- Aggregator.App.index_query
- Aggregator.App.ndjson_decoder
- Aggregator.App.utils
-
diff --git a/docs/build/html/_sources/generated/Aggregator.App.utils.rst.txt b/docs/build/html/_sources/generated/Aggregator.App.utils.rst.txt
deleted file mode 100644
index 98dabc43..00000000
--- a/docs/build/html/_sources/generated/Aggregator.App.utils.rst.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-Aggregator.App.utils
-====================
-
-.. automodule:: Aggregator.App.utils
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Aggregator.aggregator.rst.txt b/docs/build/html/_sources/generated/Aggregator.aggregator.rst.txt
deleted file mode 100644
index a181dc26..00000000
--- a/docs/build/html/_sources/generated/Aggregator.aggregator.rst.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Aggregator.aggregator
-=====================
-
-.. automodule:: Aggregator.aggregator
-
-
-
-
-
-
-
- .. rubric:: Functions
-
- .. autosummary::
-
- aggregate
- init_connection
- unify_url_id
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Aggregator.rst.txt b/docs/build/html/_sources/generated/Aggregator.rst.txt
deleted file mode 100644
index 32185271..00000000
--- a/docs/build/html/_sources/generated/Aggregator.rst.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Aggregator
-==========
-
-.. automodule:: Aggregator
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-.. rubric:: Modules
-
-.. autosummary::
- :toctree:
- :recursive:
-
- Aggregator.App
- Aggregator.aggregator
-
diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.__init__.rst.txt
deleted file mode 100644
index 0249c1c4..00000000
--- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.\_\_init\_\_
-===========================================================================
-
-.. currentmodule:: Processor.App.ArticleUtils.article_extractor
-
-.. automethod:: ArticleExtractor.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.article_extract.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.article_extract.rst.txt
deleted file mode 100644
index 597704ce..00000000
--- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.article_extract.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.article\_extract
-===============================================================================
-
-.. currentmodule:: Processor.App.ArticleUtils.article_extractor
-
-.. automethod:: ArticleExtractor.article_extract
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.check_required.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.check_required.rst.txt
deleted file mode 100644
index c3b3734c..00000000
--- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.check_required.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.check\_required
-==============================================================================
-
-.. currentmodule:: Processor.App.ArticleUtils.article_extractor
-
-.. automethod:: ArticleExtractor.check_required
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_extract.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_extract.rst.txt
deleted file mode 100644
index abd8be1c..00000000
--- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_extract.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.custom\_extract
-==============================================================================
-
-.. currentmodule:: Processor.App.ArticleUtils.article_extractor
-
-.. automethod:: ArticleExtractor.custom_extract
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_raw.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_raw.rst.txt
deleted file mode 100644
index ad21e89b..00000000
--- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_raw.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.custom\_filter\_raw
-==================================================================================
-
-.. currentmodule:: Processor.App.ArticleUtils.article_extractor
-
-.. automethod:: ArticleExtractor.custom_filter_raw
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_soup.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_soup.rst.txt
deleted file mode 100644
index 046ed0bd..00000000
--- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.custom_filter_soup.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.custom\_filter\_soup
-===================================================================================
-
-.. currentmodule:: Processor.App.ArticleUtils.article_extractor
-
-.. automethod:: ArticleExtractor.custom_filter_soup
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract.rst.txt
deleted file mode 100644
index 7c40a2f9..00000000
--- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.extract
-======================================================================
-
-.. currentmodule:: Processor.App.ArticleUtils.article_extractor
-
-.. automethod:: ArticleExtractor.extract
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract_soup.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract_soup.rst.txt
deleted file mode 100644
index 0b613f02..00000000
--- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.extract_soup.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.extract\_soup
-============================================================================
-
-.. currentmodule:: Processor.App.ArticleUtils.article_extractor
-
-.. automethod:: ArticleExtractor.extract_soup
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_raw.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_raw.rst.txt
deleted file mode 100644
index 932fc5f9..00000000
--- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_raw.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.filter\_raw
-==========================================================================
-
-.. currentmodule:: Processor.App.ArticleUtils.article_extractor
-
-.. automethod:: ArticleExtractor.filter_raw
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_soup.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_soup.rst.txt
deleted file mode 100644
index e39b8e4e..00000000
--- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.filter_soup.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.filter\_soup
-===========================================================================
-
-.. currentmodule:: Processor.App.ArticleUtils.article_extractor
-
-.. automethod:: ArticleExtractor.filter_soup
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.preprocess.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.preprocess.rst.txt
deleted file mode 100644
index 19d01c84..00000000
--- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.preprocess.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.ArticleUtils.article\_extractor.ArticleExtractor.preprocess
-=========================================================================
-
-.. currentmodule:: Processor.App.ArticleUtils.article_extractor
-
-.. automethod:: ArticleExtractor.preprocess
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.rst.txt
deleted file mode 100644
index 835252a0..00000000
--- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.ArticleExtractor.rst.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-Processor.App.ArticleUtils.article\_extractor.ArticleExtractor
-==============================================================
-
-.. currentmodule:: Processor.App.ArticleUtils.article_extractor
-
-.. autoclass:: ArticleExtractor
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~ArticleExtractor.__init__
- ~ArticleExtractor.article_extract
- ~ArticleExtractor.check_required
- ~ArticleExtractor.custom_extract
- ~ArticleExtractor.custom_filter_raw
- ~ArticleExtractor.custom_filter_soup
- ~ArticleExtractor.extract
- ~ArticleExtractor.extract_soup
- ~ArticleExtractor.filter_raw
- ~ArticleExtractor.filter_soup
- ~ArticleExtractor.preprocess
-
-
-
-
-
- .. rubric:: Attributes
-
- .. autosummary::
-
- ~ArticleExtractor.ENCODING
- ~ArticleExtractor.SINCE
- ~ArticleExtractor.TO
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.rst.txt
deleted file mode 100644
index bb03ee57..00000000
--- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_extractor.rst.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Processor.App.ArticleUtils.article\_extractor
-=============================================
-
-.. automodule:: Processor.App.ArticleUtils.article_extractor
-
-
-
-
-
-
-
-
-
-
-
- .. rubric:: Classes
-
- .. autosummary::
- :toctree:
-
- ArticleExtractor
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_utils.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_utils.rst.txt
deleted file mode 100644
index 6b1370a8..00000000
--- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.article_utils.rst.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-Processor.App.ArticleUtils.article\_utils
-=========================================
-
-.. automodule:: Processor.App.ArticleUtils.article_utils
-
-
-
-
-
-
-
- .. rubric:: Functions
-
- .. autosummary::
-
- article_content_transform
- author_transform
- brief_transform
- category_transform
- comments_num_transform
- cz_date_transform
- date_complex_extract
- format_date_transform
- headline_transform
- iso_date_transform
- keywords_transform
- must_exist_filter
- must_not_exist_filter
- remove_day_transform
- text_unification_transform
- text_unifications_transform
- url_category_transform
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.rst.txt b/docs/build/html/_sources/generated/Processor.App.ArticleUtils.rst.txt
deleted file mode 100644
index 6a0e4336..00000000
--- a/docs/build/html/_sources/generated/Processor.App.ArticleUtils.rst.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Processor.App.ArticleUtils
-==========================
-
-.. automodule:: Processor.App.ArticleUtils
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-.. rubric:: Modules
-
-.. autosummary::
- :toctree:
- :recursive:
-
- Processor.App.ArticleUtils.article_extractor
- Processor.App.ArticleUtils.article_utils
-
diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.Downloader.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.Downloader.__init__.rst.txt
deleted file mode 100644
index ea9c52eb..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.Downloader.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Downloader.downloader.Downloader.\_\_init\_\_
-===========================================================
-
-.. currentmodule:: Processor.App.Downloader.downloader
-
-.. automethod:: Downloader.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.Downloader.download.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.Downloader.download.rst.txt
deleted file mode 100644
index 9762af81..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.Downloader.download.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Downloader.downloader.Downloader.download
-=======================================================
-
-.. currentmodule:: Processor.App.Downloader.downloader
-
-.. automethod:: Downloader.download
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.Downloader.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.Downloader.rst.txt
deleted file mode 100644
index 0e2076a2..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.Downloader.rst.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-Processor.App.Downloader.downloader.Downloader
-==============================================
-
-.. currentmodule:: Processor.App.Downloader.downloader
-
-.. autoclass:: Downloader
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~Downloader.__init__
- ~Downloader.download
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.__init__.rst.txt
deleted file mode 100644
index c7c7c660..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Downloader.downloader.DownloaderFull.\_\_init\_\_
-===============================================================
-
-.. currentmodule:: Processor.App.Downloader.downloader
-
-.. automethod:: DownloaderFull.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.aclose.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.aclose.rst.txt
deleted file mode 100644
index 829f1b2d..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.aclose.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Downloader.downloader.DownloaderFull.aclose
-=========================================================
-
-.. currentmodule:: Processor.App.Downloader.downloader
-
-.. automethod:: DownloaderFull.aclose
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.aopen.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.aopen.rst.txt
deleted file mode 100644
index b5086136..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.aopen.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Downloader.downloader.DownloaderFull.aopen
-========================================================
-
-.. currentmodule:: Processor.App.Downloader.downloader
-
-.. automethod:: DownloaderFull.aopen
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.download.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.download.rst.txt
deleted file mode 100644
index ef0762f8..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.download.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Downloader.downloader.DownloaderFull.download
-===========================================================
-
-.. currentmodule:: Processor.App.Downloader.downloader
-
-.. automethod:: DownloaderFull.download
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.rst.txt
deleted file mode 100644
index e52f8a01..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.rst.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Processor.App.Downloader.downloader.DownloaderFull
-==================================================
-
-.. currentmodule:: Processor.App.Downloader.downloader
-
-.. autoclass:: DownloaderFull
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~DownloaderFull.__init__
- ~DownloaderFull.aclose
- ~DownloaderFull.aopen
- ~DownloaderFull.download
- ~DownloaderFull.unwrap
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.unwrap.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.unwrap.rst.txt
deleted file mode 100644
index 3aa67a89..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.DownloaderFull.unwrap.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Downloader.downloader.DownloaderFull.unwrap
-=========================================================
-
-.. currentmodule:: Processor.App.Downloader.downloader
-
-.. automethod:: DownloaderFull.unwrap
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.rst.txt
deleted file mode 100644
index d6740402..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Downloader.downloader.rst.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Processor.App.Downloader.downloader
-===================================
-
-.. automodule:: Processor.App.Downloader.downloader
-
-
-
-
-
-
-
-
-
-
-
- .. rubric:: Classes
-
- .. autosummary::
- :toctree:
-
- Downloader
- DownloaderFull
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.__init__.rst.txt
deleted file mode 100644
index f7e15cdd..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Downloader.dummy\_downloader.DownloaderDummy.\_\_init\_\_
-=======================================================================
-
-.. currentmodule:: Processor.App.Downloader.dummy_downloader
-
-.. automethod:: DownloaderDummy.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.download.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.download.rst.txt
deleted file mode 100644
index a83e022e..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.download.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Downloader.dummy\_downloader.DownloaderDummy.download
-===================================================================
-
-.. currentmodule:: Processor.App.Downloader.dummy_downloader
-
-.. automethod:: DownloaderDummy.download
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_url.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_url.rst.txt
deleted file mode 100644
index 325a504c..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_url.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Downloader.dummy\_downloader.DownloaderDummy.extract\_url
-=======================================================================
-
-.. currentmodule:: Processor.App.Downloader.dummy_downloader
-
-.. automethod:: DownloaderDummy.extract_url
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_year.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_year.rst.txt
deleted file mode 100644
index d9d7db93..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.extract_year.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Downloader.dummy\_downloader.DownloaderDummy.extract\_year
-========================================================================
-
-.. currentmodule:: Processor.App.Downloader.dummy_downloader
-
-.. automethod:: DownloaderDummy.extract_year
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.mine_metadata.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.mine_metadata.rst.txt
deleted file mode 100644
index 9ec464b6..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.mine_metadata.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Downloader.dummy\_downloader.DownloaderDummy.mine\_metadata
-=========================================================================
-
-.. currentmodule:: Processor.App.Downloader.dummy_downloader
-
-.. automethod:: DownloaderDummy.mine_metadata
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.rst.txt
deleted file mode 100644
index fdc29bc9..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.DownloaderDummy.rst.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Processor.App.Downloader.dummy\_downloader.DownloaderDummy
-==========================================================
-
-.. currentmodule:: Processor.App.Downloader.dummy_downloader
-
-.. autoclass:: DownloaderDummy
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~DownloaderDummy.__init__
- ~DownloaderDummy.download
- ~DownloaderDummy.extract_url
- ~DownloaderDummy.extract_year
- ~DownloaderDummy.mine_metadata
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.rst.txt
deleted file mode 100644
index 05782b33..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Downloader.dummy_downloader.rst.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Processor.App.Downloader.dummy\_downloader
-==========================================
-
-.. automodule:: Processor.App.Downloader.dummy_downloader
-
-
-
-
-
-
-
-
-
-
-
- .. rubric:: Classes
-
- .. autosummary::
- :toctree:
-
- DownloaderDummy
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Processor.App.Downloader.rst.txt b/docs/build/html/_sources/generated/Processor.App.Downloader.rst.txt
deleted file mode 100644
index f3f6bf57..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Downloader.rst.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Processor.App.Downloader
-========================
-
-.. automodule:: Processor.App.Downloader
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-.. rubric:: Modules
-
-.. autosummary::
- :toctree:
- :recursive:
-
- Processor.App.Downloader.downloader
- Processor.App.Downloader.dummy_downloader
-
diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.__init__.rst.txt
deleted file mode 100644
index 722a74df..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Extractor.dummy\_extractor.Extractor.\_\_init\_\_
-===============================================================
-
-.. currentmodule:: Processor.App.Extractor.dummy_extractor
-
-.. automethod:: Extractor.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract.rst.txt
deleted file mode 100644
index 020a42ef..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Extractor.dummy\_extractor.Extractor.extract
-==========================================================
-
-.. currentmodule:: Processor.App.Extractor.dummy_extractor
-
-.. automethod:: Extractor.extract
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract_soup.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract_soup.rst.txt
deleted file mode 100644
index d63c6eae..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.extract_soup.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Extractor.dummy\_extractor.Extractor.extract\_soup
-================================================================
-
-.. currentmodule:: Processor.App.Extractor.dummy_extractor
-
-.. automethod:: Extractor.extract_soup
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_raw.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_raw.rst.txt
deleted file mode 100644
index 9dee323f..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_raw.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Extractor.dummy\_extractor.Extractor.filter\_raw
-==============================================================
-
-.. currentmodule:: Processor.App.Extractor.dummy_extractor
-
-.. automethod:: Extractor.filter_raw
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_soup.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_soup.rst.txt
deleted file mode 100644
index 45f63d7f..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.filter_soup.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Extractor.dummy\_extractor.Extractor.filter\_soup
-===============================================================
-
-.. currentmodule:: Processor.App.Extractor.dummy_extractor
-
-.. automethod:: Extractor.filter_soup
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.preprocess.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.preprocess.rst.txt
deleted file mode 100644
index 75d1ef77..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.preprocess.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Extractor.dummy\_extractor.Extractor.preprocess
-=============================================================
-
-.. currentmodule:: Processor.App.Extractor.dummy_extractor
-
-.. automethod:: Extractor.preprocess
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.rst.txt
deleted file mode 100644
index ed37ef1b..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.Extractor.rst.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-Processor.App.Extractor.dummy\_extractor.Extractor
-==================================================
-
-.. currentmodule:: Processor.App.Extractor.dummy_extractor
-
-.. autoclass:: Extractor
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~Extractor.__init__
- ~Extractor.extract
- ~Extractor.extract_soup
- ~Extractor.filter_raw
- ~Extractor.filter_soup
- ~Extractor.preprocess
-
-
-
-
-
- .. rubric:: Attributes
-
- .. autosummary::
-
- ~Extractor.ENCODING
- ~Extractor.SINCE
- ~Extractor.TO
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.rst.txt
deleted file mode 100644
index 9d0a2a94..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Extractor.dummy_extractor.rst.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Processor.App.Extractor.dummy\_extractor
-========================================
-
-.. automodule:: Processor.App.Extractor.dummy_extractor
-
-
-
-
-
-
-
-
-
-
-
- .. rubric:: Classes
-
- .. autosummary::
- :toctree:
-
- Extractor
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.__init__.rst.txt
deleted file mode 100644
index e26ae44c..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Extractor.extractor.BaseExtractor.\_\_init\_\_
-============================================================
-
-.. currentmodule:: Processor.App.Extractor.extractor
-
-.. automethod:: BaseExtractor.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.extract.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.extract.rst.txt
deleted file mode 100644
index 498e1eef..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.extract.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Extractor.extractor.BaseExtractor.extract
-=======================================================
-
-.. currentmodule:: Processor.App.Extractor.extractor
-
-.. automethod:: BaseExtractor.extract
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.extract_soup.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.extract_soup.rst.txt
deleted file mode 100644
index f843eff5..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.extract_soup.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Extractor.extractor.BaseExtractor.extract\_soup
-=============================================================
-
-.. currentmodule:: Processor.App.Extractor.extractor
-
-.. automethod:: BaseExtractor.extract_soup
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_raw.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_raw.rst.txt
deleted file mode 100644
index c211b6d0..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_raw.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Extractor.extractor.BaseExtractor.filter\_raw
-===========================================================
-
-.. currentmodule:: Processor.App.Extractor.extractor
-
-.. automethod:: BaseExtractor.filter_raw
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_soup.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_soup.rst.txt
deleted file mode 100644
index 5d0cb513..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.filter_soup.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Extractor.extractor.BaseExtractor.filter\_soup
-============================================================
-
-.. currentmodule:: Processor.App.Extractor.extractor
-
-.. automethod:: BaseExtractor.filter_soup
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.preprocess.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.preprocess.rst.txt
deleted file mode 100644
index bf213dc6..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.preprocess.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Extractor.extractor.BaseExtractor.preprocess
-==========================================================
-
-.. currentmodule:: Processor.App.Extractor.extractor
-
-.. automethod:: BaseExtractor.preprocess
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.rst.txt
deleted file mode 100644
index 61c49d40..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.BaseExtractor.rst.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-Processor.App.Extractor.extractor.BaseExtractor
-===============================================
-
-.. currentmodule:: Processor.App.Extractor.extractor
-
-.. autoclass:: BaseExtractor
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~BaseExtractor.__init__
- ~BaseExtractor.extract
- ~BaseExtractor.extract_soup
- ~BaseExtractor.filter_raw
- ~BaseExtractor.filter_soup
- ~BaseExtractor.preprocess
-
-
-
-
-
- .. rubric:: Attributes
-
- .. autosummary::
-
- ~BaseExtractor.ENCODING
- ~BaseExtractor.SINCE
- ~BaseExtractor.TO
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.rst.txt
deleted file mode 100644
index 96733184..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor.rst.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Processor.App.Extractor.extractor
-=================================
-
-.. automodule:: Processor.App.Extractor.extractor
-
-
-
-
-
-
-
-
-
-
-
- .. rubric:: Classes
-
- .. autosummary::
- :toctree:
-
- BaseExtractor
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor_utils.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.extractor_utils.rst.txt
deleted file mode 100644
index 636bbbfe..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Extractor.extractor_utils.rst.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-Processor.App.Extractor.extractor\_utils
-========================================
-
-.. automodule:: Processor.App.Extractor.extractor_utils
-
-
-
-
-
-
-
- .. rubric:: Functions
-
- .. autosummary::
-
- all_same_transform
- combine_dicts
- extract_transform
- get_attribute_transform
- get_tag_transform
- get_tags_transform
- get_text_list_transform
- get_text_transform
- transform
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Processor.App.Extractor.rst.txt b/docs/build/html/_sources/generated/Processor.App.Extractor.rst.txt
deleted file mode 100644
index 27e0561d..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Extractor.rst.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Processor.App.Extractor
-=======================
-
-.. automodule:: Processor.App.Extractor
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-.. rubric:: Modules
-
-.. autosummary::
- :toctree:
- :recursive:
-
- Processor.App.Extractor.dummy_extractor
- Processor.App.Extractor.extractor
- Processor.App.Extractor.extractor_utils
-
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.__init__.rst.txt
deleted file mode 100644
index 965c0790..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.dummy\_streamer.DummyStreamer.\_\_init\_\_
-====================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.dummy_streamer
-
-.. automethod:: DummyStreamer.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.clean_up.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.clean_up.rst.txt
deleted file mode 100644
index ddb9fc08..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.clean_up.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.dummy\_streamer.DummyStreamer.clean\_up
-=================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.dummy_streamer
-
-.. automethod:: DummyStreamer.clean_up
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.rst.txt
deleted file mode 100644
index ad575bea..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.rst.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Processor.App.OutStreamer.dummy\_streamer.DummyStreamer
-=======================================================
-
-.. currentmodule:: Processor.App.OutStreamer.dummy_streamer
-
-.. autoclass:: DummyStreamer
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~DummyStreamer.__init__
- ~DummyStreamer.clean_up
- ~DummyStreamer.stream
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.stream.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.stream.rst.txt
deleted file mode 100644
index 7b1278c6..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.DummyStreamer.stream.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.dummy\_streamer.DummyStreamer.stream
-==============================================================
-
-.. currentmodule:: Processor.App.OutStreamer.dummy_streamer
-
-.. automethod:: DummyStreamer.stream
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.rst.txt
deleted file mode 100644
index d724469a..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.dummy_streamer.rst.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Processor.App.OutStreamer.dummy\_streamer
-=========================================
-
-.. automodule:: Processor.App.OutStreamer.dummy_streamer
-
-
-
-
-
-
-
-
-
-
-
- .. rubric:: Classes
-
- .. autosummary::
- :toctree:
-
- DummyStreamer
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.__init__.rst.txt
deleted file mode 100644
index ba586959..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.outstreamer.OutStreamer.\_\_init\_\_
-==============================================================
-
-.. currentmodule:: Processor.App.OutStreamer.outstreamer
-
-.. automethod:: OutStreamer.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.clean_up.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.clean_up.rst.txt
deleted file mode 100644
index 57076dc4..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.clean_up.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.outstreamer.OutStreamer.clean\_up
-===========================================================
-
-.. currentmodule:: Processor.App.OutStreamer.outstreamer
-
-.. automethod:: OutStreamer.clean_up
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.rst.txt
deleted file mode 100644
index f4ed6b19..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.rst.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Processor.App.OutStreamer.outstreamer.OutStreamer
-=================================================
-
-.. currentmodule:: Processor.App.OutStreamer.outstreamer
-
-.. autoclass:: OutStreamer
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~OutStreamer.__init__
- ~OutStreamer.clean_up
- ~OutStreamer.stream
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.stream.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.stream.rst.txt
deleted file mode 100644
index 2b776c8f..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.OutStreamer.stream.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.outstreamer.OutStreamer.stream
-========================================================
-
-.. currentmodule:: Processor.App.OutStreamer.outstreamer
-
-.. automethod:: OutStreamer.stream
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.rst.txt
deleted file mode 100644
index c96dca60..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.outstreamer.rst.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Processor.App.OutStreamer.outstreamer
-=====================================
-
-.. automodule:: Processor.App.OutStreamer.outstreamer
-
-
-
-
-
-
-
-
-
-
-
- .. rubric:: Classes
-
- .. autosummary::
- :toctree:
-
- OutStreamer
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.rst.txt
deleted file mode 100644
index f554c110..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.rst.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Processor.App.OutStreamer
-=========================
-
-.. automodule:: Processor.App.OutStreamer
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-.. rubric:: Modules
-
-.. autosummary::
- :toctree:
- :recursive:
-
- Processor.App.OutStreamer.dummy_streamer
- Processor.App.OutStreamer.outstreamer
- Processor.App.OutStreamer.stream_to_file
-
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.__init__.rst.txt
deleted file mode 100644
index 3e71a599..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileDefault.\_\_init\_\_
-==============================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.stream_to_file
-
-.. automethod:: OutStreamerFileDefault.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.clean_up.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.clean_up.rst.txt
deleted file mode 100644
index 96db9ed7..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.clean_up.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileDefault.clean\_up
-===========================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.stream_to_file
-
-.. automethod:: OutStreamerFileDefault.clean_up
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.get_file_name.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.get_file_name.rst.txt
deleted file mode 100644
index 2b06059f..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.get_file_name.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileDefault.get\_file\_name
-=================================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.stream_to_file
-
-.. automethod:: OutStreamerFileDefault.get_file_name
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.metadata_to_string.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.metadata_to_string.rst.txt
deleted file mode 100644
index 6c994f6b..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.metadata_to_string.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileDefault.metadata\_to\_string
-======================================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.stream_to_file
-
-.. automethod:: OutStreamerFileDefault.metadata_to_string
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.rst.txt
deleted file mode 100644
index 81e7a152..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.rst.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileDefault
-=================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.stream_to_file
-
-.. autoclass:: OutStreamerFileDefault
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~OutStreamerFileDefault.__init__
- ~OutStreamerFileDefault.clean_up
- ~OutStreamerFileDefault.get_file_name
- ~OutStreamerFileDefault.metadata_to_string
- ~OutStreamerFileDefault.stream
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.stream.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.stream.rst.txt
deleted file mode 100644
index 61876e12..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileDefault.stream.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileDefault.stream
-========================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.stream_to_file
-
-.. automethod:: OutStreamerFileDefault.stream
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.__init__.rst.txt
deleted file mode 100644
index 492ac309..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileHTMLContent.\_\_init\_\_
-==================================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.stream_to_file
-
-.. automethod:: OutStreamerFileHTMLContent.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.clean_up.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.clean_up.rst.txt
deleted file mode 100644
index 5ee2f762..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.clean_up.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileHTMLContent.clean\_up
-===============================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.stream_to_file
-
-.. automethod:: OutStreamerFileHTMLContent.clean_up
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.get_file_name.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.get_file_name.rst.txt
deleted file mode 100644
index e8fc71a1..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.get_file_name.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileHTMLContent.get\_file\_name
-=====================================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.stream_to_file
-
-.. automethod:: OutStreamerFileHTMLContent.get_file_name
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.metadata_to_string.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.metadata_to_string.rst.txt
deleted file mode 100644
index 5041edba..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.metadata_to_string.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileHTMLContent.metadata\_to\_string
-==========================================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.stream_to_file
-
-.. automethod:: OutStreamerFileHTMLContent.metadata_to_string
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.rst.txt
deleted file mode 100644
index 6c387be1..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.rst.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileHTMLContent
-=====================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.stream_to_file
-
-.. autoclass:: OutStreamerFileHTMLContent
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~OutStreamerFileHTMLContent.__init__
- ~OutStreamerFileHTMLContent.clean_up
- ~OutStreamerFileHTMLContent.get_file_name
- ~OutStreamerFileHTMLContent.metadata_to_string
- ~OutStreamerFileHTMLContent.stream
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.stream.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.stream.rst.txt
deleted file mode 100644
index 76b773a4..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileHTMLContent.stream.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileHTMLContent.stream
-============================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.stream_to_file
-
-.. automethod:: OutStreamerFileHTMLContent.stream
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.__init__.rst.txt
deleted file mode 100644
index fb03c5ca..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileJSON.\_\_init\_\_
-===========================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.stream_to_file
-
-.. automethod:: OutStreamerFileJSON.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.clean_up.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.clean_up.rst.txt
deleted file mode 100644
index f769b5df..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.clean_up.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileJSON.clean\_up
-========================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.stream_to_file
-
-.. automethod:: OutStreamerFileJSON.clean_up
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.get_file_name.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.get_file_name.rst.txt
deleted file mode 100644
index be184ebe..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.get_file_name.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileJSON.get\_file\_name
-==============================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.stream_to_file
-
-.. automethod:: OutStreamerFileJSON.get_file_name
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.metadata_to_string.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.metadata_to_string.rst.txt
deleted file mode 100644
index 332848a9..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.metadata_to_string.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileJSON.metadata\_to\_string
-===================================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.stream_to_file
-
-.. automethod:: OutStreamerFileJSON.metadata_to_string
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.rst.txt
deleted file mode 100644
index f5663dfc..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.rst.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileJSON
-==============================================================
-
-.. currentmodule:: Processor.App.OutStreamer.stream_to_file
-
-.. autoclass:: OutStreamerFileJSON
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~OutStreamerFileJSON.__init__
- ~OutStreamerFileJSON.clean_up
- ~OutStreamerFileJSON.get_file_name
- ~OutStreamerFileJSON.metadata_to_string
- ~OutStreamerFileJSON.stream
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.stream.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.stream.rst.txt
deleted file mode 100644
index 5f6ef1f9..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.OutStreamerFileJSON.stream.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file.OutStreamerFileJSON.stream
-=====================================================================
-
-.. currentmodule:: Processor.App.OutStreamer.stream_to_file
-
-.. automethod:: OutStreamerFileJSON.stream
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.rst.txt b/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.rst.txt
deleted file mode 100644
index 8a3725d9..00000000
--- a/docs/build/html/_sources/generated/Processor.App.OutStreamer.stream_to_file.rst.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Processor.App.OutStreamer.stream\_to\_file
-==========================================
-
-.. automodule:: Processor.App.OutStreamer.stream_to_file
-
-
-
-
-
-
-
-
-
-
-
- .. rubric:: Classes
-
- .. autosummary::
- :toctree:
-
- OutStreamerFileDefault
- OutStreamerFileHTMLContent
- OutStreamerFileJSON
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.__init__.rst.txt
deleted file mode 100644
index 77b2fd3f..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Pipeline.pipeline.ProcessorPipeline.\_\_init\_\_
-==============================================================
-
-.. currentmodule:: Processor.App.Pipeline.pipeline
-
-.. automethod:: ProcessorPipeline.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.process_domain_record.rst.txt b/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.process_domain_record.rst.txt
deleted file mode 100644
index 598ebbad..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.process_domain_record.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Pipeline.pipeline.ProcessorPipeline.process\_domain\_record
-=========================================================================
-
-.. currentmodule:: Processor.App.Pipeline.pipeline
-
-.. automethod:: ProcessorPipeline.process_domain_record
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.rst.txt b/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.rst.txt
deleted file mode 100644
index 02b3c1cb..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.ProcessorPipeline.rst.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-Processor.App.Pipeline.pipeline.ProcessorPipeline
-=================================================
-
-.. currentmodule:: Processor.App.Pipeline.pipeline
-
-.. autoclass:: ProcessorPipeline
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~ProcessorPipeline.__init__
- ~ProcessorPipeline.process_domain_record
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.rst.txt b/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.rst.txt
deleted file mode 100644
index a3c12dc7..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Pipeline.pipeline.rst.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Processor.App.Pipeline.pipeline
-===============================
-
-.. automodule:: Processor.App.Pipeline.pipeline
-
-
-
-
-
-
-
-
-
-
-
- .. rubric:: Classes
-
- .. autosummary::
- :toctree:
-
- ProcessorPipeline
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Processor.App.Pipeline.rst.txt b/docs/build/html/_sources/generated/Processor.App.Pipeline.rst.txt
deleted file mode 100644
index da6f2f8c..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Pipeline.rst.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Processor.App.Pipeline
-======================
-
-.. automodule:: Processor.App.Pipeline
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-.. rubric:: Modules
-
-.. autosummary::
- :toctree:
- :recursive:
-
- Processor.App.Pipeline.pipeline
-
diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.Route.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.Route.__init__.rst.txt
deleted file mode 100644
index 8e322777..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Router.router.Route.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Router.router.Route.\_\_init\_\_
-==============================================
-
-.. currentmodule:: Processor.App.Router.router
-
-.. automethod:: Route.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.Route.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.Route.rst.txt
deleted file mode 100644
index d5d789f0..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Router.router.Route.rst.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Processor.App.Router.router.Route
-=================================
-
-.. currentmodule:: Processor.App.Router.router
-
-.. autoclass:: Route
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~Route.__init__
-
-
-
-
-
- .. rubric:: Attributes
-
- .. autosummary::
-
- ~Route.name
- ~Route.regexes
- ~Route.since
- ~Route.to
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.Router.__init__.rst.txt
deleted file mode 100644
index b83aa285..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Router.router.Router.\_\_init\_\_
-===============================================
-
-.. currentmodule:: Processor.App.Router.router
-
-.. automethod:: Router.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.load_module.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.Router.load_module.rst.txt
deleted file mode 100644
index 4732d19f..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.load_module.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Router.router.Router.load\_module
-===============================================
-
-.. currentmodule:: Processor.App.Router.router
-
-.. automethod:: Router.load_module
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.load_modules.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.Router.load_modules.rst.txt
deleted file mode 100644
index 59d81478..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.load_modules.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Router.router.Router.load\_modules
-================================================
-
-.. currentmodule:: Processor.App.Router.router
-
-.. automethod:: Router.load_modules
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.register_route.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.Router.register_route.rst.txt
deleted file mode 100644
index 20952e26..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.register_route.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Router.router.Router.register\_route
-==================================================
-
-.. currentmodule:: Processor.App.Router.router
-
-.. automethod:: Router.register_route
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.register_routes.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.Router.register_routes.rst.txt
deleted file mode 100644
index ce3eb617..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.register_routes.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Router.router.Router.register\_routes
-===================================================
-
-.. currentmodule:: Processor.App.Router.router
-
-.. automethod:: Router.register_routes
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.route.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.Router.route.rst.txt
deleted file mode 100644
index 012c88e8..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.route.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.Router.router.Router.route
-========================================
-
-.. currentmodule:: Processor.App.Router.router
-
-.. automethod:: Router.route
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.Router.rst.txt
deleted file mode 100644
index 3425cf67..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Router.router.Router.rst.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-Processor.App.Router.router.Router
-==================================
-
-.. currentmodule:: Processor.App.Router.router
-
-.. autoclass:: Router
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~Router.__init__
- ~Router.load_module
- ~Router.load_modules
- ~Router.register_route
- ~Router.register_routes
- ~Router.route
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.Router.router.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.router.rst.txt
deleted file mode 100644
index ff8944bf..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Router.router.rst.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Processor.App.Router.router
-===========================
-
-.. automodule:: Processor.App.Router.router
-
-
-
-
-
-
-
-
-
-
-
- .. rubric:: Classes
-
- .. autosummary::
- :toctree:
-
- Route
- Router
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Processor.App.Router.rst.txt b/docs/build/html/_sources/generated/Processor.App.Router.rst.txt
deleted file mode 100644
index 794bc0ca..00000000
--- a/docs/build/html/_sources/generated/Processor.App.Router.rst.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Processor.App.Router
-====================
-
-.. automodule:: Processor.App.Router
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-.. rubric:: Modules
-
-.. autosummary::
- :toctree:
- :recursive:
-
- Processor.App.Router.router
-
diff --git a/docs/build/html/_sources/generated/Processor.App.processor_utils.DomainRecord.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.processor_utils.DomainRecord.__init__.rst.txt
deleted file mode 100644
index 9ec73e9d..00000000
--- a/docs/build/html/_sources/generated/Processor.App.processor_utils.DomainRecord.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.processor\_utils.DomainRecord.\_\_init\_\_
-========================================================
-
-.. currentmodule:: Processor.App.processor_utils
-
-.. automethod:: DomainRecord.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.processor_utils.DomainRecord.rst.txt b/docs/build/html/_sources/generated/Processor.App.processor_utils.DomainRecord.rst.txt
deleted file mode 100644
index 90d82a77..00000000
--- a/docs/build/html/_sources/generated/Processor.App.processor_utils.DomainRecord.rst.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Processor.App.processor\_utils.DomainRecord
-===========================================
-
-.. currentmodule:: Processor.App.processor_utils
-
-.. autoclass:: DomainRecord
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~DomainRecord.__init__
-
-
-
-
-
- .. rubric:: Attributes
-
- .. autosummary::
-
- ~DomainRecord.digest
- ~DomainRecord.encoding
- ~DomainRecord.timestamp
- ~DomainRecord.filename
- ~DomainRecord.url
- ~DomainRecord.offset
- ~DomainRecord.length
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.processor_utils.PipeMetadata.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.App.processor_utils.PipeMetadata.__init__.rst.txt
deleted file mode 100644
index 24a6df00..00000000
--- a/docs/build/html/_sources/generated/Processor.App.processor_utils.PipeMetadata.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.App.processor\_utils.PipeMetadata.\_\_init\_\_
-========================================================
-
-.. currentmodule:: Processor.App.processor_utils
-
-.. automethod:: PipeMetadata.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.processor_utils.PipeMetadata.rst.txt b/docs/build/html/_sources/generated/Processor.App.processor_utils.PipeMetadata.rst.txt
deleted file mode 100644
index 3b77b49f..00000000
--- a/docs/build/html/_sources/generated/Processor.App.processor_utils.PipeMetadata.rst.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-Processor.App.processor\_utils.PipeMetadata
-===========================================
-
-.. currentmodule:: Processor.App.processor_utils
-
-.. autoclass:: PipeMetadata
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~PipeMetadata.__init__
-
-
-
-
-
- .. rubric:: Attributes
-
- .. autosummary::
-
- ~PipeMetadata.encoding
- ~PipeMetadata.name
- ~PipeMetadata.domain_record
- ~PipeMetadata.article_data
- ~PipeMetadata.warc_header
- ~PipeMetadata.http_header
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.App.processor_utils.rst.txt b/docs/build/html/_sources/generated/Processor.App.processor_utils.rst.txt
deleted file mode 100644
index a93397d5..00000000
--- a/docs/build/html/_sources/generated/Processor.App.processor_utils.rst.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Processor.App.processor\_utils
-==============================
-
-.. automodule:: Processor.App.processor_utils
-
-
-
-
-
-
-
-
-
-
-
- .. rubric:: Classes
-
- .. autosummary::
- :toctree:
-
- DomainRecord
- PipeMetadata
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Processor.App.rst.txt b/docs/build/html/_sources/generated/Processor.App.rst.txt
deleted file mode 100644
index fb937e59..00000000
--- a/docs/build/html/_sources/generated/Processor.App.rst.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-Processor.App
-=============
-
-.. automodule:: Processor.App
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-.. rubric:: Modules
-
-.. autosummary::
- :toctree:
- :recursive:
-
- Processor.App.Downloader
- Processor.App.Extractor
- Processor.App.OutStreamer
- Processor.App.Pipeline
- Processor.App.Router
- Processor.App.processor_utils
- Processor.App.ArticleUtils
-
diff --git a/docs/build/html/_sources/generated/Processor.process_article.rst.txt b/docs/build/html/_sources/generated/Processor.process_article.rst.txt
deleted file mode 100644
index a11fa785..00000000
--- a/docs/build/html/_sources/generated/Processor.process_article.rst.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Processor.process\_article
-==========================
-
-.. automodule:: Processor.process_article
-
-
-
-
-
-
-
- .. rubric:: Functions
-
- .. autosummary::
-
- article_process
- main
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.__init__.rst.txt
deleted file mode 100644
index 260c0116..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.Listener.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.processor.Listener.\_\_init\_\_
-=========================================
-
-.. currentmodule:: Processor.processor
-
-.. automethod:: Listener.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_before_message.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_before_message.rst.txt
deleted file mode 100644
index c8886fcc..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_before_message.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.processor.Listener.on\_before\_message
-================================================
-
-.. currentmodule:: Processor.processor
-
-.. automethod:: Listener.on_before_message
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_connected.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_connected.rst.txt
deleted file mode 100644
index 103399f6..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_connected.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.processor.Listener.on\_connected
-==========================================
-
-.. currentmodule:: Processor.processor
-
-.. automethod:: Listener.on_connected
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_connecting.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_connecting.rst.txt
deleted file mode 100644
index d67c467e..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_connecting.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.processor.Listener.on\_connecting
-===========================================
-
-.. currentmodule:: Processor.processor
-
-.. automethod:: Listener.on_connecting
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_disconnected.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_disconnected.rst.txt
deleted file mode 100644
index 8a4de99f..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_disconnected.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.processor.Listener.on\_disconnected
-=============================================
-
-.. currentmodule:: Processor.processor
-
-.. automethod:: Listener.on_disconnected
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_disconnecting.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_disconnecting.rst.txt
deleted file mode 100644
index 732d2212..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_disconnecting.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.processor.Listener.on\_disconnecting
-==============================================
-
-.. currentmodule:: Processor.processor
-
-.. automethod:: Listener.on_disconnecting
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_error.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_error.rst.txt
deleted file mode 100644
index 878082af..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_error.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.processor.Listener.on\_error
-======================================
-
-.. currentmodule:: Processor.processor
-
-.. automethod:: Listener.on_error
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_heartbeat.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_heartbeat.rst.txt
deleted file mode 100644
index a3b6d6c3..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_heartbeat.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.processor.Listener.on\_heartbeat
-==========================================
-
-.. currentmodule:: Processor.processor
-
-.. automethod:: Listener.on_heartbeat
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_heartbeat_timeout.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_heartbeat_timeout.rst.txt
deleted file mode 100644
index d5cbc54e..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_heartbeat_timeout.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.processor.Listener.on\_heartbeat\_timeout
-===================================================
-
-.. currentmodule:: Processor.processor
-
-.. automethod:: Listener.on_heartbeat_timeout
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_message.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_message.rst.txt
deleted file mode 100644
index 5132b722..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_message.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.processor.Listener.on\_message
-========================================
-
-.. currentmodule:: Processor.processor
-
-.. automethod:: Listener.on_message
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_receipt.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_receipt.rst.txt
deleted file mode 100644
index 4d19d384..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_receipt.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.processor.Listener.on\_receipt
-========================================
-
-.. currentmodule:: Processor.processor
-
-.. automethod:: Listener.on_receipt
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_receiver_loop_completed.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_receiver_loop_completed.rst.txt
deleted file mode 100644
index 54925ad9..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_receiver_loop_completed.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.processor.Listener.on\_receiver\_loop\_completed
-==========================================================
-
-.. currentmodule:: Processor.processor
-
-.. automethod:: Listener.on_receiver_loop_completed
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.on_send.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.on_send.rst.txt
deleted file mode 100644
index 33268c0b..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.Listener.on_send.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.processor.Listener.on\_send
-=====================================
-
-.. currentmodule:: Processor.processor
-
-.. automethod:: Listener.on_send
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.processor.Listener.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Listener.rst.txt
deleted file mode 100644
index 78923da9..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.Listener.rst.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Processor.processor.Listener
-============================
-
-.. currentmodule:: Processor.processor
-
-.. autoclass:: Listener
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~Listener.__init__
- ~Listener.on_before_message
- ~Listener.on_connected
- ~Listener.on_connecting
- ~Listener.on_disconnected
- ~Listener.on_disconnecting
- ~Listener.on_error
- ~Listener.on_heartbeat
- ~Listener.on_heartbeat_timeout
- ~Listener.on_message
- ~Listener.on_receipt
- ~Listener.on_receiver_loop_completed
- ~Listener.on_send
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.processor.ListnerStats.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.processor.ListnerStats.__init__.rst.txt
deleted file mode 100644
index 3db79398..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.ListnerStats.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.processor.ListnerStats.\_\_init\_\_
-=============================================
-
-.. currentmodule:: Processor.processor
-
-.. automethod:: ListnerStats.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.processor.ListnerStats.rst.txt b/docs/build/html/_sources/generated/Processor.processor.ListnerStats.rst.txt
deleted file mode 100644
index 415cdbd2..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.ListnerStats.rst.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Processor.processor.ListnerStats
-================================
-
-.. currentmodule:: Processor.processor
-
-.. autoclass:: ListnerStats
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~ListnerStats.__init__
-
-
-
-
-
- .. rubric:: Attributes
-
- .. autosummary::
-
- ~ListnerStats.last_message_time
- ~ListnerStats.messages
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.processor.Message.__init__.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Message.__init__.rst.txt
deleted file mode 100644
index 8ae337eb..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.Message.__init__.rst.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Processor.processor.Message.\_\_init\_\_
-========================================
-
-.. currentmodule:: Processor.processor
-
-.. automethod:: Message.__init__
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.processor.Message.rst.txt b/docs/build/html/_sources/generated/Processor.processor.Message.rst.txt
deleted file mode 100644
index 33a1bd62..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.Message.rst.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Processor.processor.Message
-===========================
-
-.. currentmodule:: Processor.processor
-
-.. autoclass:: Message
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
- :toctree:
-
-
- ~Message.__init__
-
-
-
-
-
- .. rubric:: Attributes
-
- .. autosummary::
-
- ~Message.dr
- ~Message.headers
-
-
\ No newline at end of file
diff --git a/docs/build/html/_sources/generated/Processor.processor.rst.txt b/docs/build/html/_sources/generated/Processor.processor.rst.txt
deleted file mode 100644
index 39334ada..00000000
--- a/docs/build/html/_sources/generated/Processor.processor.rst.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-Processor.processor
-===================
-
-.. automodule:: Processor.processor
-
-
-
-
-
-
-
- .. rubric:: Functions
-
- .. autosummary::
-
- call_pipeline_with_ack
- get_hostname_output_path
- init_connection
- init_pipeline
- processor
-
-
-
-
-
- .. rubric:: Classes
-
- .. autosummary::
- :toctree:
-
- Listener
- ListnerStats
- Message
-
-
-
-
-
-
-
-
-
diff --git a/docs/build/html/_sources/generated/Processor.rst.txt b/docs/build/html/_sources/generated/Processor.rst.txt
deleted file mode 100644
index e7964123..00000000
--- a/docs/build/html/_sources/generated/Processor.rst.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Processor
-=========
-
-.. automodule:: Processor
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-.. rubric:: Modules
-
-.. autosummary::
- :toctree:
- :recursive:
-
- Processor.App
- Processor.process_article
- Processor.processor
-
diff --git a/docs/build/html/_sources/index.rst.txt b/docs/build/html/_sources/index.rst.txt
index ec503ff9..ff1fa92a 100644
--- a/docs/build/html/_sources/index.rst.txt
+++ b/docs/build/html/_sources/index.rst.txt
@@ -10,15 +10,15 @@ Welcome to CommonCrawl Extractor's documentation!
:maxdepth: 3
:caption: Contents:
- installation
- quickstart/index
+ usage
+ cli/index
+ extraction/index
+ prog_guide/index
+ misc/index
api
-
-
-
Indices and tables
==================
diff --git a/docs/build/html/_sources/installation.rst.txt b/docs/build/html/_sources/installation.rst.txt
deleted file mode 100644
index b97c2f7e..00000000
--- a/docs/build/html/_sources/installation.rst.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-Installation
-============
-
-
-The project was developed using Python 3.10. It's not recommended to use oldered versions of Python as dataclass and type hints are used.
-To install packages required to run the project you can use either pip or conda.
-
-1. Install packages using pip: `$ pip install -r requirements.txt`
-
-2. Install packages using conda: `$ conda env create -f environment.yml` -> `$ conda activate extractor`
-
-======
-Docker
-======
-While it's possible to run the project without Docker it's recommended to use it as it will make the process of running the project much easier.
-Please install docker and docker-compose before running the project.
-
diff --git a/docs/build/html/_sources/quickstart/artemis-queue.rst.txt b/docs/build/html/_sources/quickstart/artemis-queue.rst.txt
deleted file mode 100644
index b3b266c5..00000000
--- a/docs/build/html/_sources/quickstart/artemis-queue.rst.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-Artemis Queue
-=============
-
-`Artemis tags(Usually you want to be more precise with this be we assume this for simplicity).
-
-.. warning:: Always make sure that the tags you found are unique. Cross-check this with other articles fetched.
-
-============================
-Extracting (Transformations)
-============================
-
-
-With this information we can write the extractor.
-
-.. code-block:: python
-
-
- from Processor.App.ArticleUtils.article_utils import aritcle_content_transform, headline_transform, get_text_transform
- REQUIRED_FIELDS = {
- "title": False,
- "content": True
- }
-
- def content_transform(soup):
- return [p.text for p in soup.find_all("p", recursive=True)]
-
-
- def __init__(self):
- super().__init__(
- article_css_dict={
- "title": "h1#content",
- "content": "main[role=main]",
- },
- # Here we define how to transform the content of the tag into a string.
- article_extract_dict= {
- "title": [get_text_transform, headline_transform],
- "content": [content_transform, text_unifications_transform, lambda lines : "\n".join(lines)]
- },
-
-
- # Here we define how to bind a tag that containt all fields we will use in article_css_dict
- # If you don't know just use body
- article_css_selector="body",
- required_fields=REQUIRED_FIELDS,
- non_empty = True
- )
-
-
-`REQUIRED_FIELDS` is a dictionary that defines which fields must be extracted (Must be contained in resulting dictionary).
-This is useful if you write multiple extractors and you want to make sure that all of them contain the same fields.
-As you can see we have set the title to False this means that it's value can be None. We have set article to False which
-means that is must not be None value. Because we have set non_empty to True the title also cannot be empty string or empty list.
-
-`article_css_dict` define where to find the title and content.
-`article_extract_dict` defines how to extract the title and content from the tag.
-We have used some predfedined function from :py:mod:`Processor.App.ArticleUtils.article_utils` to help us with this.
-Please look to the :py:mod:`Processor.App.ArticleUtils.article_utils` to check what exactly the transformations do! Should be clear from the code.
-For content we created our transform which returns a list of text in p.
-
-
-`article_css_selector` simply defines where to start looking for the tags defined in `article_css_dict`.
-
-.. note:: `header_css_dict` and `header_extract_dict` can also be set in constructor for extracting from html CommonCrawl Extractor 1.0 documentation
+ CmonCrawl 0.9.3 documentation