Unable to Fetch XHR Response Body with CDP #2731
-
I'm attempting to integrate Wire and UC, but they're incompatible. So, I'm exploring using CDP to retrieve XHR in a standard Selenium Webdriver setup, with plans to apply the same in SeleniumBase later. I'm also generating a log.txt file with comprehensive information. My goal is to find 'Learn More' in the output, but it's not present in the logs_raw. Here's the code.@mdmintz from selenium import webdriver
import json
import time
options = webdriver.ChromeOptions()
service = webdriver.ChromeService(service_args=["--verbose", "--log-path=log.txt"])
url = 'https://www.facebook.com/ads/library/?id=2567767530063004'
# url = 'https://weatherstack.com/' #<--this url works as expected
options.set_capability(
"goog:loggingPrefs", {"performance": "ALL"}
)
driver = webdriver.Chrome(options=options,service=service)
driver.implicitly_wait(15)
time.sleep(5)
driver.get(url)
time.sleep(30)
# extract requests from logs
logs_raw = driver.get_log("performance")
logs = [json.loads(lr["message"])["message"] for lr in logs_raw]
def log_filter(log_):
return (
# is an actual response
log_["method"] == "Network.responseReceived"
# and json
and "json" in log_["params"]["response"]["mimeType"]
)
for log in filter(log_filter, logs):
request_id = log["params"]["requestId"]
resp_url = log["params"]["response"]["url"]
print(f"Caught {resp_url}")
print(driver.execute_cdp_cmd("Network.getResponseBody", {"requestId": request_id})) |
Beta Was this translation helpful? Give feedback.
Replies: 4 comments 11 replies
-
Examples of fetching responses via CDP: from rich.pretty import pprint
from seleniumbase import Driver
driver = Driver(uc=True, log_cdp=True)
try:
url = "seleniumbase.io/apps/turnstile"
driver.uc_open_with_reconnect(url, 2)
driver.switch_to_frame("iframe")
driver.uc_click("span.mark")
driver.sleep(3)
pprint(driver.get_log("performance"))
finally:
driver.quit() from rich.pretty import pprint
from seleniumbase import BaseCase
BaseCase.main(__name__, __file__, "--uc", "--uc-cdp", "-s")
class CDPTests(BaseCase):
def add_cdp_listener(self):
# (To print everything, use "*". Otherwise select specific headers.)
# self.driver.add_cdp_listener("*", lambda data: print(pformat(data)))
self.driver.add_cdp_listener(
"Network.requestWillBeSentExtraInfo",
lambda data: pprint(data)
)
def click_turnstile_and_verify(sb):
sb.switch_to_frame("iframe")
sb.driver.uc_click("span.mark")
sb.assert_element("img#captcha-success", timeout=3)
sb.highlight("img#captcha-success", loops=8)
def test_display_cdp_events(self):
if not (self.undetectable and self.uc_cdp_events):
self.get_new_driver(undetectable=True, uc_cdp_events=True)
url = "seleniumbase.io/apps/turnstile"
self.driver.uc_open_with_reconnect(url, 2)
self.add_cdp_listener()
self.click_turnstile_and_verify()
self.sleep(1)
self.refresh()
self.sleep(0.5) If you don't need UC Mode, you can use Wire Mode: #2145 |
Beta Was this translation helpful? Give feedback.
-
I have tried both the approaches already with seleniumbase I am not getting any output. |
Beta Was this translation helpful? Give feedback.
-
url = "https://www.facebook.com/ads/library/?id=2567767530063004"
from rich.pretty import pprint
from seleniumbase import Driver
import time
driver = Driver(uc=True, log_cdp=True)
try:
# url = "weatherstack.com"
driver.uc_open_with_reconnect(url, 2)
driver.refresh()
time.sleep(10)
log = driver.get_log("performance")
pprint(log)
with open('Adlog.txt','w') as f:
f.write(str(log))
finally:
driver.quit() I am trying to scrape the ad data from url but in the Adlog.txt the string 'Learn more' is missing.Most Probably the data is logged in bytes as it intercepted using normal selenium-wire with bytes and converted to string.The 'content-encoding' used is either 'br' or 'zstd'(Most likely). from seleniumwire.utils import decode
body = decode(byte_data, 'zstd') Cant decode the byte data to string.Any help is appreciated. |
Beta Was this translation helpful? Give feedback.
-
@AnirbanPatragithub did you find a way to get the response body ? I am not getting it how to do it |
Beta Was this translation helpful? Give feedback.
Examples of fetching responses via CDP: