Skip to content

Commit

Permalink
Update _in_the_wild.py
Browse files Browse the repository at this point in the history
  • Loading branch information
MHassanAr committed Feb 18, 2025
1 parent c1597d0 commit 449cc66
Showing 1 changed file with 9 additions and 16 deletions.
25 changes: 9 additions & 16 deletions shared_collector/scripts/_in_the_wild.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,8 @@ def parse_leak_data(self, page: Page):
processed_urls.add(vuln_url)

page.wait_for_timeout(200)

with page.expect_navigation(wait_until="domcontentloaded"):
vuln_link.click()

page.wait_for_timeout(200)

reference_element = page.query_selector(
Expand All @@ -114,19 +112,15 @@ def parse_leak_data(self, page: Page):
last_update_date = last_update_element.inner_text().strip() if last_update_element else "Unknown"

report_cards = page.query_selector_all(".css-tbubqa")
website, social_media_profile = None, None

if len(report_cards) > 0:
first_card_link = report_cards[0].query_selector(".chakra-link")
if first_card_link:
website = first_card_link.get_attribute("href")
website = report_cards[0].query_selector(".chakra-link").get_attribute(
"href") if report_cards else None
social_media_profile = None

if len(report_cards) > 1:
second_card_link = report_cards[1].query_selector(".chakra-link")
if second_card_link:
second_card_url = second_card_link.get_attribute("href")
if second_card_url and second_card_url.startswith("https://github.com"):
social_media_profile = second_card_url
second_card_url = second_card_link.get_attribute("href") if second_card_link else None
if second_card_url and "github.com" in second_card_url:

Check failure

Code scanning / CodeQL

Incomplete URL substring sanitization High

The string
github.com
may be at an arbitrary position in the sanitized URL.
social_media_profile = second_card_url

page.wait_for_timeout(200)

Expand All @@ -149,7 +143,6 @@ def parse_leak_data(self, page: Page):
)

page.wait_for_timeout(500)

page.go_back()
page.wait_for_load_state("domcontentloaded")
page.wait_for_selector("table tbody tr", timeout=10000)
Expand All @@ -158,9 +151,8 @@ def parse_leak_data(self, page: Page):
print({e})
continue

for _ in range(3):
page.evaluate("window.scrollBy(0, document.body.scrollHeight)")
page.wait_for_timeout(1000)
page.evaluate("window.scrollBy(0, document.body.scrollHeight)")
page.wait_for_timeout(1000)

except Exception as e:
print({e})
Expand All @@ -176,3 +168,4 @@ def parse_leak_data(self, page: Page):




0 comments on commit 449cc66

Please sign in to comment.