Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/_nerqnacjmdy3obvevyol7qhazkwkv57…
Browse files Browse the repository at this point in the history
…dwqvye5v46k5bcujtfa6sduad'
  • Loading branch information
msmannan00 committed Feb 8, 2025
2 parents c35a6ed + e81e1cd commit 7b8b498
Showing 1 changed file with 13 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,80 +16,42 @@ class _nerqnacjmdy3obvevyol7qhazkwkv57dwqvye5v46k5bcujtfa6sduad(leak_extractor_i
_instance = None

def __init__(self):
"""
Initialize the _shared_sample class instance.
Sets up attributes for storing card data, parsing content, and interacting with Redis.
"""
self._card_data = []
self.soup = None
self._initialized = None
self._redis_instance = redis_controller()

def __new__(cls):
"""
Create a singleton instance of the _shared_sample class.
Ensures only one instance of the class is created during runtime.
"""
if cls._instance is None:
cls._instance = super(_nerqnacjmdy3obvevyol7qhazkwkv57dwqvye5v46k5bcujtfa6sduad, cls).__new__(cls)
cls._instance._initialized = False
return cls._instance

@property
def seed_url(self) -> str:
"""
Returns the seed URL for the data extraction process.
This is the starting point for parsing the required content.
"""
return "http://nerqnacjmdy3obvevyol7qhazkwkv57dwqvye5v46k5bcujtfa6sduad.onion/"
return "http://nerqnacjmdy3obvevyol7qhazkwkv57dwqvye5v46k5bcujtfa6sduad.onion"

@property
def base_url(self) -> str:
"""
Returns the base URL for relative URL resolution.
Used to create absolute URLs during parsing.
"""
return "http://nerqnacjmdy3obvevyol7qhazkwkv57dwqvye5v46k5bcujtfa6sduad.onion/"
return "http://nerqnacjmdy3obvevyol7qhazkwkv57dwqvye5v46k5bcujtfa6sduad.onion"

@property
def rule_config(self) -> RuleModel:
"""
Returns the configuration rules for data fetching.
Specifies the use of TOR as the proxy and Selenium as the fetching mechanism.
"""
return RuleModel(m_fetch_proxy=FetchProxy.TOR, m_fetch_config=FetchConfig.SELENIUM)

@property
def card_data(self) -> List[card_extraction_model]:
"""
Returns the list of extracted card data models.
Stores all parsed information from the leak extraction process.
"""
return self._card_data

def invoke_db(self, command: REDIS_COMMANDS, key: CUSTOM_SCRIPT_REDIS_KEYS, default_value) -> None:
"""
Interacts with the Redis database to perform a specified command.
Args:
command (REDIS_COMMANDS): The Redis command to execute (e.g., GET, SET).
key (CUSTOM_SCRIPT_REDIS_KEYS): The key for the operation.
default_value: The default value to use if the key is not found.
Returns:
None
"""
return self._redis_instance.invoke_trigger(command, [key.value + self.__class__.__name__, default_value])

def contact_page(self) -> str:
"""
Returns the URL of the contact page for the shared sample data source.
Useful for referencing or navigating to the contact page.
"""
return "[email protected]"

def parse_leak_data(self, page: Page):
visited_pages = set()
visited_cards = set()

while True:
current_url = page.url
Expand All @@ -102,6 +64,12 @@ def parse_leak_data(self, page: Page):
card_links = page.locator('.card').all()

for card in card_links:
card_text = card.inner_text()

if card_text in visited_cards:
continue

visited_cards.add(card_text)
card.click()

page.wait_for_selector('.text-block', timeout=5000)
Expand All @@ -110,9 +78,7 @@ def parse_leak_data(self, page: Page):
detail_soup = BeautifulSoup(detail_html, 'html.parser')

title = detail_soup.select_one('.title').text.strip() if detail_soup.select_one('.title') else "N/A"

content = detail_soup.select_one('.desc').text.strip() if detail_soup.select_one('.desc') else "N/A"

website_elem = detail_soup.select_one('.desc a')
website = website_elem['href'].strip() if website_elem else "N/A"

Expand Down Expand Up @@ -144,6 +110,8 @@ def parse_leak_data(self, page: Page):

date_time = detail_soup.select_one('.date').text.strip() if detail_soup.select_one('.date') else "N/A"

dumplinks = [a['href'].strip() for a in detail_soup.find_all('a', href=True) if ".onion" in a['href']]

self._card_data.append(card_extraction_model(
m_title=title,
m_content=content,
Expand All @@ -158,7 +126,7 @@ def parse_leak_data(self, page: Page):
m_company_name=title,
m_network=helper_method.get_network_type(self.base_url).value,
m_important_content=content,
m_dumplink=[],
m_dumplink=dumplinks,
m_email_addresses=helper_method.extract_emails(detail_soup.text),
m_industry=industry,
m_content_type="Leak",
Expand All @@ -174,3 +142,4 @@ def parse_leak_data(self, page: Page):
page.wait_for_selector('.card', timeout=5000)
else:
break

0 comments on commit 7b8b498

Please sign in to comment.