From 310cc90e9270ba7a5e81806c4ec73c8ed0caf3c8 Mon Sep 17 00:00:00 2001 From: pietrocapece <99724882+pietrocapece@users.noreply.github.com> Date: Thu, 28 Nov 2024 17:12:12 +0000 Subject: [PATCH] Added Beautifulsoup parsing and cleaned up code --- .../threatmatch/src/requirements.txt | 1 + .../threatmatch/src/threatmatch.py | 29 ++++--------------- 2 files changed, 6 insertions(+), 24 deletions(-) diff --git a/external-import/threatmatch/src/requirements.txt b/external-import/threatmatch/src/requirements.txt index edfe950025..d436fb28e4 100644 --- a/external-import/threatmatch/src/requirements.txt +++ b/external-import/threatmatch/src/requirements.txt @@ -1 +1,2 @@ pycti==6.4.2 +beautifulsoup4==4.12.3 \ No newline at end of file diff --git a/external-import/threatmatch/src/threatmatch.py b/external-import/threatmatch/src/threatmatch.py index 2bd827271b..5a83aa4726 100644 --- a/external-import/threatmatch/src/threatmatch.py +++ b/external-import/threatmatch/src/threatmatch.py @@ -7,6 +7,7 @@ import requests import yaml +from bs4 import BeautifulSoup from pycti import OpenCTIConnectorHelper, get_config_variable @@ -50,13 +51,6 @@ def __init__(self): False, True, ) - # self.threatmatch_import_reports = get_config_variable( - # "THREATMATCH_IMPORT_REPORTS", - # ["threatmatch", "import_reports"], - # config, - # False, - # True, - # ) self.threatmatch_import_iocs = get_config_variable( "THREATMATCH_IMPORT_IOCS", ["threatmatch", "import_iocs"], @@ -103,10 +97,12 @@ def _get_item(self, token, type, item_id): if r.status_code != 200: self.helper.log_error(str(r.text)) return [] - # if 'error' in r.json(): - # return [] if r.status_code == 200: data = r.json()["objects"] + for object in data: + object["description"] = BeautifulSoup( + object["description"], "html.parser" + ).get_text() return data def _process_list(self, work_id, token, type, list): @@ -215,21 +211,6 @@ def run(self): self._process_list( work_id, token, "alerts", data.get("list") ) - # if self.threatmatch_import_reports: - # r = requests.get( - # self.threatmatch_url + "/api/reports/all", - # headers=headers, - # json={ - # "mode": "compact", - # "date_since": import_from_date, - # }, - # ) - # if r.status_code != 200: - # self.helper.log_error(str(r.text)) - # data = r.json() - # self._process_list( - # work_id, token, "reports", data.get("list") - # ) if self.threatmatch_import_iocs: response = requests.get( self.threatmatch_url + "/api/taxii/groups",