Skip to content

Commit

Permalink
Make datasets private that aren't in API
Browse files Browse the repository at this point in the history
  • Loading branch information
b-j-mills committed Oct 3, 2024
1 parent 6d262e7 commit e4dc9c3
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 1 deletion.
13 changes: 13 additions & 0 deletions peacesecurity.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(self, configuration, retriever, folder, errors):
self.errors = errors
self.dataset_data = {}
self.metadata = {}
self.dataset_ids = []

def get_data(self, state, datasets=None):
base_url = self.configuration["base_url"]
Expand All @@ -34,6 +35,9 @@ def get_data(self, state, datasets=None):

for meta_json in meta_jsons:
dataset_id = meta_json["Dataset ID"]
hdx_dataset_id = self.configuration["dataset_names"].get(dataset_id, dataset_id)
hdx_dataset_id = slugify(hdx_dataset_id)
self.dataset_ids.append(hdx_dataset_id)
if datasets and dataset_id not in datasets:
continue
last_update_date = meta_json["Last Update Date"]
Expand All @@ -54,6 +58,15 @@ def get_data(self, state, datasets=None):

return [{"name": dataset_name} for dataset_name in sorted(self.dataset_data)]

def check_hdx_datasets(self):
datasets = Dataset.search_in_hdx(fq="organization:unpeacesecurity")
private_datasets = []
for dataset in datasets:
if dataset["name"] not in self.dataset_ids and not dataset["private"]:
dataset["private"] = True
private_datasets.append(dataset)
return private_datasets

def generate_dataset_and_showcase(self, dataset_name):
rows = self.dataset_data[dataset_name]
metadata = self.metadata[dataset_name]
Expand Down
17 changes: 16 additions & 1 deletion run.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,23 @@ def main(save: bool = False, use_saved: bool = False) -> None:
dataset_names = peacesecurity.get_data(
state_dict,
)
logger.info(f"Number of datasets to upload: {len(dataset_names)}")
private_datasets = peacesecurity.check_hdx_datasets()
logger.info(f"Number of datasets to set private: {len(private_datasets)}")
for dataset in private_datasets:
try:
dataset.update_in_hdx(
update_resources=False,
hxl_update=False,
operation="patch",
batch_mode="KEEP_OLD",
updated_by_script=updated_by_script,
ignore_fields=["resource:description", "extras"],
)
except HDXError:
errors.add(f"Could not make {dataset['name']} private")
continue

logger.info(f"Number of datasets to upload: {len(dataset_names)}")
for _, nextdict in progress_storing_folder(info, dataset_names, "name"):
dataset_name = nextdict["name"]
dataset, showcase = peacesecurity.generate_dataset_and_showcase(dataset_name)
Expand Down

0 comments on commit e4dc9c3

Please sign in to comment.