Skip to content

Commit

Permalink
Update _darkfeed.py
Browse files Browse the repository at this point in the history
  • Loading branch information
alphadeveloper12 committed Jan 20, 2025
1 parent ee45561 commit 009f139
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion shared_collector/scripts/_darkfeed.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,14 @@ def parse_leak_data(self, html_content: str, p_data_url: str) -> Tuple[leak_data
data_model = leak_data_model(cards_data=[], contact_link=self.contact_page(), base_url=self.base_url, content_type=["leak"])

today_date = datetime.today().strftime('%Y-%m-%d')
allowed_tags = { "tag-data-breach", "tag-ransomware-intelligence"}

for article in self.soup.find_all("article", class_="elementor-post"):
classes = set(article.get("class", [])) # Convert class list to set for easy comparison

if not allowed_tags.intersection(classes):
continue

title_link = article.find("h3", class_="elementor-post__title").find("a")
url = title_link['href'] if title_link else None
title = title_link.get_text(strip=True) if title_link else None
Expand All @@ -48,7 +54,7 @@ def parse_leak_data(self, html_content: str, p_data_url: str) -> Tuple[leak_data
if url and title and posted_date:
content_message = f"{title}, To visit or explore more visit the website: {url}"

card = card_extraction_model(m_title=title, m_url=url, m_base_url=self.base_url, m_content=content_message, m_content_type="leak", m_logo_or_images=[image_url] if image_url else [], m_last_updated=today_date)
card = card_extraction_model(m_title=title, m_url=url, m_base_url=self.base_url, m_content=content_message, m_content_type="leak", m_logo_or_images=[image_url] if image_url else [], m_last_updated=today_date, )
self.extracted_data.append(card)
data_model.cards_data.append(card)

Expand Down

0 comments on commit 009f139

Please sign in to comment.