Skip to content

Commit

Permalink
Merge pull request #18 from 4ARMED/seleniumwire
Browse files Browse the repository at this point in the history
Seleniumwire
  • Loading branch information
marcwickenden authored Oct 17, 2024
2 parents b66c91a + c1c9629 commit be240d1
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 11 deletions.
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@ urls = {Homepage = "https://github.com/4armed/sri-check"}
requires-python = ">=3.6"
dependencies = [
"beautifulsoup4>=4.0",
"blinker==1.7.0", # Required for Seleniumwire
"lxml>=4.8",
"requests>=2.0",
"selenium>=4.10",
"selenium>=4.25",
]

[project.readme]
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
beautifulsoup4>=4.0
lxml>=4.8
requests>=2.0
selenium>=4.10
selenium>=4.25
2 changes: 1 addition & 1 deletion sricheck/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.10.0"
__version__ = "1.12.11"
27 changes: 19 additions & 8 deletions sricheck/sricheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import base64
import hashlib
import os
import re
import sys
import requests
Expand Down Expand Up @@ -91,10 +92,9 @@ def is_allowlisted(self, netloc):

def get_html(self):
if self.browser:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from seleniumwire import webdriver

chrome_options = Options()
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
Expand All @@ -104,20 +104,31 @@ def get_html(self):
}
}

browser = webdriver.Chrome(options=chrome_options)
browser = webdriver.Chrome(
options=chrome_options,
seleniumwire_options={
'proxy': {
'http': os.environ.get("http_proxy"),
'https': os.environ.get("https_proxy"),
}
}
)

def interceptor(request):
request.headers.update(self.headers)
for key, value in self.headers.items():
del request.headers[key]
request.headers[key] = value

browser.request_interceptor = interceptor
browser.get(self.url)
return browser.execute_script("return document.documentElement.outerHTML;")
content = browser.execute_script("return document.documentElement.outerHTML;")

browser.quit()
return content
else:
# file deepcode ignore Ssrf: The purpose of the script is to parse remote URLs from the CLI

return requests.get(self.url, headers=self.headers).content


def get_remote_resource_tags(self, html):
soup = BeautifulSoup(html, 'lxml')

Expand Down

0 comments on commit be240d1

Please sign in to comment.