diff --git a/README.md b/README.md index c4c2f8b..bdc301a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# UForAll +## UForAll **UForAll is a fast url crawler this tool crawl all URLs number of different sources** ### Sources @@ -9,61 +9,48 @@ ## Installation ``` -go install -v github.com/tomnomnom/anew@latest -git clone https://github.com/rix4uni/UForAll.git -cd UForAll -pip3 install -r requirements.txt -chmod +x uforall -mkdir -p ~/bin -if ! grep -qxF 'export PATH="$HOME/bin/UForAll:$PATH"' ~/.bashrc ; then echo -e '\nexport PATH="$HOME/bin/UForAll:$PATH"' >> ~/.bashrc ; fi -cd .. && mv UForAll ~/bin && source ~/.bashrc +go install github.com/rix4uni/uforall@latest ``` -## Setup Api Key `Important` if you not setup api maybe the tool not work properly -``` -# https://urlscan.io/user/signup (Paid/Free) -# open urlscan.py add your api keys -``` - -## Usage -Single URL: +## Download prebuilt binaries ``` -uforall -d testphp.vulnweb.com -t 100 | anew +wget https://github.com/rix4uni/uforall/releases/download/v0.0.2/uforall-linux-amd64-0.0.2.tgz +tar -xvzf uforall-linux-amd64-0.0.2.tgz +rm -rf uforall-linux-amd64-0.0.2.tgz +mv uforall ~/go/bin/uforall ``` +Or download [binary release](https://github.com/rix4uni/uforall/releases) for your platform. -Multiple URLs: +## Compile from source ``` -uforall -l interesting_subs.txt -t 100 | anew +git clone --depth 1 github.com/rix4uni/UForAll.git +cd uforall; go install ``` +## Usage +``` +Usage of uforall: + -silent + silent mode. + -t string + Comma-separated list of tools to run: 'otx', 'archive', 'urlscan', 'commoncrawl', or 'all' (default "all") + -version + Print the version of the tool and exit. ``` - __ __ ______ ______ ______ ______ __ __ - /\ \/\ \ /\ ___\ /\ __ \ /\ == \ /\ __ \ /\ \ /\ \ - \ \ \_\ \ \ \ __\ \ \ \/\ \ \ \ __< \ \ __ \ \ \ \____ \ \ \____ - \ \_____\ \ \_\ \ \_____\ \ \_\ \_\ \ \_\ \_\ \ \_____\ \ \_____\ - \/_____/ \/_/ \/_____/ \/_/ /_/ \/_/\/_/ \/_____/ \/_____/ - coded by @rix4uni in INDIA -OPTIONS: - -d, --domain Single Target domain (domain.com) - -l, --list Multiple Target domain (interesting_subs.txt) - -t, --threads number of threads to use (default 50) - -h, --help Help - Show this help - -USAGE EXAMPLES: - uforall -d domain.com -t 100 - uforall -l interesting_subs.txt -t 100 +## Usage Examples ----If you want to use only one service--- Single URL: - echo "testphp.vulnweb.com" | python3 ~/bin/UForAll/archive.py -t 100 - echo "testphp.vulnweb.com" | python3 ~/bin/UForAll/otx.py - echo "testphp.vulnweb.com" | python3 ~/bin/UForAll/urlscan.py -t 100 - echo "testphp.vulnweb.com" | python3 ~/bin/UForAll/commoncrawl.py -t 100 +``` +echo "testphp.vulnweb.com" | uforall +``` Multiple URLs: - cat interesting_subs.txt | python3 ~/bin/UForAll/archive.py -t 100 - cat interesting_subs.txt | python3 ~/bin/UForAll/otx.py - cat interesting_subs.txt | python3 ~/bin/UForAll/urlscan.py -t 100 - cat interesting_subs.txt | python3 ~/bin/UForAll/commoncrawl.py -t 100 ``` +cat subs.txt | uforall +``` + +Run specific tools: +``` +cat subs.txt | uforall -t otx, urlscan +``` \ No newline at end of file diff --git a/archive.py b/archive.py deleted file mode 100644 index bcdb432..0000000 --- a/archive.py +++ /dev/null @@ -1,37 +0,0 @@ -import sys -import requests -import argparse -import concurrent.futures - -def process_domain(domain): - url = f"http://web.archive.org/cdx/search/cdx?url=*.{domain}/*&output=text&fl=original&collapse=urlkey" - - headers = { - "Content-Type": "application/json", - "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" - } - - response = requests.request("GET", url, headers=headers) - - response_lines = response.text.split() - - # process the response lines - for line in response_lines: - print(line) - -def main(): - # parse the command-line arguments - parser = argparse.ArgumentParser() - parser.add_argument("--threads", "-t", type=int, default=50, help="Number of threads to use") - args = parser.parse_args() - - # read input from stdin - domains = [line.strip() for line in sys.stdin.readlines()] - - # process the input domains using a thread pool - with concurrent.futures.ThreadPoolExecutor(max_workers=args.threads) as executor: - futures = [executor.submit(process_domain, domain) for domain in domains] - concurrent.futures.wait(futures) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/commoncrawl.py b/commoncrawl.py deleted file mode 100644 index eaaf16d..0000000 --- a/commoncrawl.py +++ /dev/null @@ -1,58 +0,0 @@ -import requests -import json -import regex -import sys -import argparse -from concurrent.futures import ThreadPoolExecutor, as_completed - -def get_urls(url, domain): - try: - param = f"?url=*.{domain}&fl=url&output=json&filter=!=status:404" - index_url = url + param - - # Send the request to the index - response = requests.get(index_url) - - # Extract the URLs from the response text - url_pattern = regex.compile(r'"url": "([^"]+)"') - urls = url_pattern.findall(response.text) - - return urls - except requests.exceptions.ConnectionError as e: - print(f"ConnectionError: {e}") - return [] - -if __name__ == "__main__": - # Use argparse to specify the number of threads to use - parser = argparse.ArgumentParser() - parser.add_argument("--threads", "-t", type=int, default=50, help="Number of threads to use") - args = parser.parse_args() - - num_threads = args.threads - - # Read the domain name from stdin - for line in sys.stdin: - domain = line.strip() - - try: - # Perform an HTTP GET request to the URL - response = requests.get("https://index.commoncrawl.org/collinfo.json") - - # Parse the JSON data from the response - data = json.loads(response.text) - - # Create a ThreadPoolExecutor with the specified number of threads - with ThreadPoolExecutor(max_workers=num_threads) as executor: - # Create a list of tasks to submit to the executor - tasks = [] - for item in data: - url = item['cdx-api'] - task = executor.submit(get_urls, url, domain) - tasks.append(task) - - # Iterate over the completed tasks and print the results - for task in as_completed(tasks): - for url in task.result(): - print(url) - except requests.exceptions.RequestException as e: - print(f"RequestException: {e}") diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..59dd4a4 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/rix4uni/UForAll + +go 1.23.0 diff --git a/otx-test/otx.py b/otx-test/otx.py deleted file mode 100644 index c67a124..0000000 --- a/otx-test/otx.py +++ /dev/null @@ -1,46 +0,0 @@ -import sys -import json -import requests -import argparse -from concurrent.futures import ThreadPoolExecutor - -# Parse the command-line arguments -parser = argparse.ArgumentParser() -parser.add_argument('--threads', "-t", type=int, default=50, help='Number of threads to use') -args = parser.parse_args() - -# Read the domain from sys.stdin -domains = [line.strip() for line in sys.stdin.readlines()] - -# Define a function to process a single domain -def process_domain(domain): - site = f"https://otx.alienvault.com/api/v1/indicators/domain/{domain}/url_list?limit=500" - - # Set the headers - headers = { - "Content-Type": "application/json", - "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" - } - - # Send the GET request and get the response - response = requests.get(site, headers=headers) - - # Parse the response as JSON - data = response.json() - - # Extract the page and task URLs from the results - otx_urls = [result['url'] for result in data['url_list']] - - # Merge the page and task URLs and sort them - urls = list(set(otx_urls)) - urls.sort() - - # Print the URLs one per line - for url in urls: - print(url) - -# Create a ThreadPoolExecutor with the specified number of threads -with ThreadPoolExecutor(max_workers=args.threads) as executor: - # Submit the tasks to the executor - for domain in domains: - executor.submit(process_domain, domain) diff --git a/otx-test/otx.sh b/otx-test/otx.sh deleted file mode 100644 index 671beb4..0000000 --- a/otx-test/otx.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash - -domain=$1 - -total_urls=$(curl -s "https://otx.alienvault.com/api/v1/indicators/domain/$domain/url_list?limit=500&page=1" | jq -r '.full_size') -total_pages=$(expr $total_urls / 500 + 1) - -for ((i=1; i<=total_pages; i++)) -do - curl -s "https://otx.alienvault.com/api/v1/indicators/domain/$domain/url_list?limit=500&page=$i" | jq -r '.url_list[].url' -done \ No newline at end of file diff --git a/otx-test/subs.txt b/otx-test/subs.txt deleted file mode 100644 index 31d3e67..0000000 --- a/otx-test/subs.txt +++ /dev/null @@ -1,2 +0,0 @@ -grab.com -bugcrowd.com diff --git a/otx.py b/otx.py deleted file mode 100644 index 164c260..0000000 --- a/otx.py +++ /dev/null @@ -1,29 +0,0 @@ -import requests -import json -import sys - -def get_urls(domain): - url = f"https://otx.alienvault.com/api/v1/indicators/domain/{domain}/url_list" - response = requests.get(url, params={"limit": 500, "page": 1}) - - try: - response.raise_for_status() - data = response.json() - total_urls = data["full_size"] - total_pages = total_urls // 500 + 1 - - for i in range(1, total_pages + 1): - response = requests.get(url, params={"limit": 500, "page": i}) - response.raise_for_status() - data = response.json() - url_list = data["url_list"] - for url_data in url_list: - print(url_data["url"]) - - except (requests.HTTPError, requests.ConnectionError, json.JSONDecodeError) as e: - pass - -if __name__ == "__main__": - for line in sys.stdin: - domain = line.strip() - get_urls(domain) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 925dea9..0000000 --- a/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -regex==2022.10.31 -requests==2.25.1 diff --git a/uforall b/uforall deleted file mode 100644 index c9f9d64..0000000 --- a/uforall +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env bash - -# COLORS -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[0;33m' -BLUE='\033[0;34m' -RESET='\033[0m' - -banner(){ - echo -e "${BLUE} -\t\t __ __ ______ ______ ______ ______ __ __ -\t\t/\ \/\ \ /\ ___\ /\ __ \ /\ == \ /\ __ \ /\ \ /\ \ -\t\t\ \ \_\ \ \ \ __\ \ \ \/\ \ \ \ __< \ \ __ \ \ \ \____ \ \ \____ -\t\t \ \_____\ \ \_\ \ \_____\ \ \_\ \_\ \ \_\ \_\ \ \_____\ \ \_____\ -\t\t \/_____/ \/_/ \/_____/ \/_/ /_/ \/_/\/_/ \/_____/ \/_____/ -\t\t coded by ${YELLOW}@rix4uni${RED} in INDIA${RESET}" -} - -showhelp(){ - banner - echo -e "${GREEN}OPTIONS:${RESET}" - echo -e " -d, --domain Single Target domain (domain.com)" - echo -e " -l, --list Multiple Target domain (interesting_subs.txt)" - echo -e " -t, --threads number of threads to use (default 50)" - echo -e " -h, --help Help - Show this help" - echo -e "" - echo -e "${GREEN}USAGE EXAMPLES:${RESET}" - echo -e " uforall -d domain.com -t 100" - echo -e " uforall -l interesting_subs.txt -t 100" - echo -e "" - echo -e "${GREEN}---If you want to use only one service---${RESET}" - echo -e "${GREEN}Single URL:${RESET}" - echo -e ' echo "testphp.vulnweb.com" | python3 ~/bin/UForAll/archive.py -t 100' - echo -e ' echo "testphp.vulnweb.com" | python3 ~/bin/UForAll/otx.py' - echo -e ' echo "testphp.vulnweb.com" | python3 ~/bin/UForAll/urlscan.py -t 100' - echo -e ' echo "testphp.vulnweb.com" | python3 ~/bin/UForAll/commoncrawl.py -t 100' - echo -e "" - echo -e "${GREEN}Multiple URLs:${RESET}" - echo -e " cat interesting_subs.txt | python3 ~/bin/UForAll/archive.py -t 100" - echo -e " cat interesting_subs.txt | python3 ~/bin/UForAll/otx.py" - echo -e " cat interesting_subs.txt | python3 ~/bin/UForAll/urlscan.py -t 100" - echo -e " cat interesting_subs.txt | python3 ~/bin/UForAll/commoncrawl.py -t 100" - echo -e "" -} - -# Run the domain through the various scripts and write the output to the specified file -SINGLE_DOMAIN(){ - echo "$domain" | python3 ~/bin/UForAll/archive.py $threads | anew - echo "$domain" | python3 ~/bin/UForAll/otx.py $threads | anew - echo "$domain" | python3 ~/bin/UForAll/urlscan.py $threads | anew - echo "$domain" | python3 ~/bin/UForAll/commoncrawl.py $threads | anew -} - -# Run the interesting subdomains through the scripts and append the output to the file -MULTIPLE_DOMAIN(){ - cat "$list" | python3 ~/bin/UForAll/archive.py $threads | anew - cat "$list" | python3 ~/bin/UForAll/otx.py $threads | anew - cat "$list" | python3 ~/bin/UForAll/urlscan.py $threads | anew - cat "$list" | python3 ~/bin/UForAll/commoncrawl.py $threads | anew -} - -while [ -n "$1" ]; do - case $1 in - -d|--domain) - domain=$2 - SINGLE_DOMAIN - exit 1 ;; - - -l|--list) - list=$2 - MULTIPLE_DOMAIN - exit 1 ;; - - -t|--threads) - threads=$3 - exit 1 ;; - - -h|--help) - showhelp - exit 1 ;; - esac -done diff --git a/uforall.go b/uforall.go new file mode 100644 index 0000000..6ee1068 --- /dev/null +++ b/uforall.go @@ -0,0 +1,312 @@ +package main + +import ( + "bufio" + "flag" + "encoding/json" + "fmt" + "io/ioutil" + "net/http" + "os" + "regexp" + "strings" + "sort" +) + +// prints the version message +const version = "0.0.2" + +func printVersion() { + fmt.Printf("Current uforall version %s\n", version) +} + +// Prints the Colorful banner +func printBanner() { + banner := ` + ____ __ __ + __ __ / __/____ _____ ____ _ / // / + / / / // /_ / __ \ / ___// __ // // / +/ /_/ // __// /_/ // / / /_/ // // / +\__,_//_/ \____//_/ \__,_//_//_/ +` +fmt.Printf("%s\n%50s\n\n", banner, "Current uforall version "+version) + +} + +func getArchiveUrls(domain string) { + url := fmt.Sprintf("http://web.archive.org/cdx/search/cdx?url=*.%s/*&output=text&fl=original&collapse=urlkey", domain) + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + fmt.Fprintln(os.Stderr, "Error creating request:", err) + return + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3") + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + fmt.Fprintln(os.Stderr, "Error making request:", err) + return + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + fmt.Fprintln(os.Stderr, "HTTP error:", resp.Status) + return + } + + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + line := scanner.Text() + fmt.Println(line) + } + + if err := scanner.Err(); err != nil { + fmt.Fprintln(os.Stderr, "Error reading response body:", err) + } +} + + +type OtxUrlData struct { + Url string `json:"url"` +} + +type OtxResponseData struct { + FullSize int `json:"full_size"` + UrlList []OtxUrlData `json:"url_list"` +} + +func getOtxUrls(domain string) { + url := fmt.Sprintf("https://otx.alienvault.com/api/v1/indicators/domain/%s/url_list", domain) + client := &http.Client{} + + // Initial request to get total pages + resp, err := client.Get(url + "?limit=500&page=1") + if err != nil { + fmt.Fprintln(os.Stderr, "Error:", err) + return + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + fmt.Fprintln(os.Stderr, "HTTP error:", resp.Status) + return + } + + var data OtxResponseData + if err := json.NewDecoder(resp.Body).Decode(&data); err != nil { + fmt.Fprintln(os.Stderr, "JSON decode error:", err) + return + } + + totalUrls := data.FullSize + totalPages := (totalUrls + 499) / 500 // Using ceiling to calculate total pages + + for i := 1; i <= totalPages; i++ { + resp, err := client.Get(fmt.Sprintf("%s?limit=500&page=%d", url, i)) + if err != nil { + fmt.Fprintln(os.Stderr, "Error:", err) + return + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + fmt.Fprintln(os.Stderr, "HTTP error:", resp.Status) + return + } + + if err := json.NewDecoder(resp.Body).Decode(&data); err != nil { + fmt.Fprintln(os.Stderr, "JSON decode error:", err) + return + } + + for _, urlData := range data.UrlList { + fmt.Println(urlData.Url) + } + } +} + + +type UrlscanResult struct { + Page struct { + URL string `json:"url"` + } `json:"page"` + Task struct { + URL string `json:"url"` + } `json:"task"` +} + +type UrlscanResponseData struct { + Results []UrlscanResult `json:"results"` +} + +func getUrlscanUrls(domain string) { + url := fmt.Sprintf("https://urlscan.io/api/v1/search/?q=domain:%s&size=10000", domain) + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + fmt.Fprintln(os.Stderr, "Error creating request:", err) + return + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3") + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + fmt.Fprintln(os.Stderr, "Error making request:", err) + return + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + fmt.Fprintln(os.Stderr, "HTTP error:", resp.Status) + return + } + + var data UrlscanResponseData + if err := json.NewDecoder(resp.Body).Decode(&data); err != nil { + fmt.Fprintln(os.Stderr, "JSON decode error:", err) + return + } + + // Extract URLs + var urls []string + for _, result := range data.Results { + if result.Page.URL != "" { + urls = append(urls, result.Page.URL) + } + if result.Task.URL != "" { + urls = append(urls, result.Task.URL) + } + } + + // Remove duplicates and sort URLs + urlSet := make(map[string]struct{}) + for _, url := range urls { + urlSet[url] = struct{}{} + } + + var uniqueUrls []string + for url := range urlSet { + uniqueUrls = append(uniqueUrls, url) + } + sort.Strings(uniqueUrls) + + // Print URLs + for _, url := range uniqueUrls { + fmt.Println(url) + } +} + + +// Fetch URLs from Common Crawl +type CdxApi struct { + CDXAPI string `json:"cdx-api"` +} + +func getCommonCrawlUrls(domain string) { + resp, err := http.Get("https://index.commoncrawl.org/collinfo.json") + if err != nil { + fmt.Fprintf(os.Stderr, "RequestException: %v\n", err) + return + } + defer resp.Body.Close() + + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + fmt.Fprintf(os.Stderr, "Error reading response body: %v\n", err) + return + } + + var data []CdxApi + if err := json.Unmarshal(body, &data); err != nil { + fmt.Fprintf(os.Stderr, "JSON decode error: %v\n", err) + return + } + + // Process each URL sequentially + for _, item := range data { + apiUrl := item.CDXAPI + urls := fetchCommonCrawlUrls(apiUrl, domain) + for _, url := range urls { + fmt.Println(url) + } + } +} + +func fetchCommonCrawlUrls(apiUrl, domain string) []string { + var urls []string + param := fmt.Sprintf("?url=*.%s&fl=url&output=json", domain) + indexUrl := apiUrl + param + + resp, err := http.Get(indexUrl) + if err != nil { + fmt.Fprintf(os.Stderr, "ConnectionError: %v\n", err) + return urls + } + defer resp.Body.Close() + + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + fmt.Fprintf(os.Stderr, "Error reading response body: %v\n", err) + return urls + } + + urlPattern := regexp.MustCompile(`"url": "([^"]+)"`) + matches := urlPattern.FindAllStringSubmatch(string(body), -1) + for _, match := range matches { + if len(match) > 1 { + urls = append(urls, match[1]) + } + } + + return urls +} + + +func main() { + toolFlag := flag.String("t", "all", "Comma-separated list of tools to run: 'otx', 'archive', 'urlscan', 'commoncrawl', or 'all'") + version := flag.Bool("version", false, "Print the version of the tool and exit.") + silent := flag.Bool("silent", false, "silent mode.") + flag.Parse() + + // Print version and exit if -version flag is provided + if *version { + printBanner() + printVersion() + return + } + + // Don't Print banner if -silnet flag is provided + if !*silent { + printBanner() + } + + scanner := bufio.NewScanner(os.Stdin) + for scanner.Scan() { + domain := strings.TrimSpace(scanner.Text()) + + // Handle each tool based on the flag + if *toolFlag == "all" || strings.Contains(*toolFlag, "archive") { + getArchiveUrls(domain) + } + if *toolFlag == "all" || strings.Contains(*toolFlag, "otx") { + getOtxUrls(domain) + } + if *toolFlag == "all" || strings.Contains(*toolFlag, "urlscan") { + getUrlscanUrls(domain) + } + if *toolFlag == "all" || strings.Contains(*toolFlag, "commoncrawl") { + getCommonCrawlUrls(domain) + } + } + + if err := scanner.Err(); err != nil { + fmt.Fprintln(os.Stderr, "Error reading input:", err) + } +} diff --git a/urlscan.py b/urlscan.py deleted file mode 100644 index 259c488..0000000 --- a/urlscan.py +++ /dev/null @@ -1,50 +0,0 @@ -import json -import requests -import random -import sys -import argparse -from concurrent.futures import ThreadPoolExecutor - -# Read the domain from sys.stdin -domains = [line.strip() for line in sys.stdin.readlines()] - -# Define a function to process a domain -def process_domain(domain): - site = f"https://urlscan.io/api/v1/search/?q=domain:{domain}&size=10000" - - # Set the API endpoint and API key - api_keys = {'key': ['829e49ac-d524-4464-af9b-53a73a859693', 'a6fc703b-b297-4cdb-a383-c12b211a82ba',]} - api = random.choice(api_keys['key']) - # Set the headers - headers = { - "Content-Type": "application/json", - "API-Key": f"{api}", - "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" - } - - # Send the GET request and get the response - response = requests.get(site, headers=headers) - - # Parse the response as JSON - data = response.json() - - # Extract the page and task URLs from the results - page_urls = [result['page']['url'] for result in data['results']] - task_urls = [result['task']['url'] for result in data['results']] - - # Merge the page and task URLs and sort them - urls = list(set(page_urls + task_urls)) - urls.sort() - - # Print the URLs one per line - for url in urls: - print(url) - -# Use argparse to parse the number of threads as an argument -parser = argparse.ArgumentParser() -parser.add_argument("--threads", "-t", type=int, default=50, help="Number of threads to use") -args = parser.parse_args() - -# Use a ThreadPoolExecutor to process the domains concurrently -with ThreadPoolExecutor(max_workers=args.threads) as executor: - executor.map(process_domain, domains)