From 0a6e72f8593b6bc2c6b8e7b3489ea392fad5afd1 Mon Sep 17 00:00:00 2001 From: Przemek Wiejak Date: Mon, 26 Dec 2022 01:16:33 -0600 Subject: [PATCH 1/2] updates --- README.md | 4 ++-- changelog.txt | 8 ++++++++ python-sitemap-generator.py | 13 +++++++++---- screenshot.png | Bin 4 files changed, 19 insertions(+), 6 deletions(-) mode change 100644 => 100755 README.md mode change 100644 => 100755 changelog.txt mode change 100644 => 100755 screenshot.png diff --git a/README.md b/README.md old mode 100644 new mode 100755 index 54cca6a..33a37b0 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Python Sitemap Generator -- Version: 0.4.1 -- Update: 2021/12/30 +- Version: 0.4.2 +- Update: 2022/12/26 - Author: Przemek Wiejak @ przemek@wiejak.app Python Site Map Generator uses python multi-threaded approach to read all links accessible through the Web site and generate proper sitemap for SEO purposes. diff --git a/changelog.txt b/changelog.txt old mode 100644 new mode 100755 index f4d77cd..030560f --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,11 @@ +Date: 2022/12/26 +Version: 0.4.2 + +- Add MIT license (thank you https://github.com/kylebarney) +- Fixed timeout issues throwing URLError and handling exceptions properly + +~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + Date: 2021/12/30 Version: 0.4.1 diff --git a/python-sitemap-generator.py b/python-sitemap-generator.py index 25258a5..d7ebf0b 100755 --- a/python-sitemap-generator.py +++ b/python-sitemap-generator.py @@ -11,6 +11,7 @@ import sys from urllib.request import urlopen from urllib.request import Request +from urllib.error import URLError from urllib.request import HTTPError from urllib.parse import urljoin from urllib.parse import urlparse @@ -36,10 +37,10 @@ # adjust to your liking # keep values low to prevent firewalls blocking you for flooding -MaxThreads = 5 +MaxThreads = 20 -# DWFINE YOUR URL - CUSTOM URL! -InitialURL = 'HTTPS://SOME_URL.TEST/' +# DEFINE YOUR URL - CUSTOM URL! +InitialURL = 'https://hublist.pwiam.com' InitialURLInfo = urlparse(InitialURL) InitialURLLen = len(InitialURL.split('/')) @@ -231,9 +232,13 @@ def run(self): #var_dump(temp_content) pass - + except URLError as e: + print ('URLError: ', self.obj['url']) + temp_status = 000 + pass except HTTPError as e: + print ('HTTPError: ', self.obj['url']) temp_status = e.code pass diff --git a/screenshot.png b/screenshot.png old mode 100644 new mode 100755 From 6b17218ff8bb5ae984544aed7ac5c1662272624b Mon Sep 17 00:00:00 2001 From: Przemek Wiejak Date: Mon, 26 Dec 2022 01:17:40 -0600 Subject: [PATCH 2/2] updates --- python-sitemap-generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-sitemap-generator.py b/python-sitemap-generator.py index d7ebf0b..418713a 100755 --- a/python-sitemap-generator.py +++ b/python-sitemap-generator.py @@ -40,7 +40,7 @@ MaxThreads = 20 # DEFINE YOUR URL - CUSTOM URL! -InitialURL = 'https://hublist.pwiam.com' +InitialURL = 'HTTPS://SOME_URL.TEST/' InitialURLInfo = urlparse(InitialURL) InitialURLLen = len(InitialURL.split('/'))