-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsood_c_dataset_creation.sh
executable file
·48 lines (36 loc) · 1.71 KB
/
sood_c_dataset_creation.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/bin/bash
# Check if all required parameters are provided
if [ "$#" -lt 3 ]; then
echo "Usage: $0 <data_id> <root_imagenet> <batch_size>"
exit 1
fi
# Assign command-line arguments to variables
DATA_ID=$1
ROOT_IMAGENET=$2
BATCH_SIZE=$3
# Classification Dataset Creation
# Step 1: Cluster ImageNet using WordNet and Sentence Transformer
echo "Running cluster_imagenet.py..."
python cluster_imagenet.py
# Step 2: Build VLM Superclass by associating sub-classes to the proper super-class
echo "Running vlm_superclass_building.py... [HUMAN INTERACTION REQUIRED]"
python vlm_superclass_building.py --data_id "$DATA_ID" --root_imagenet "$ROOT_IMAGENET"
# Step 3: Perform human checks on the images
echo "Running human_check_tool.py... [HUMAN INTERACTION REQUIRED]"
python human_check_tool.py --data_id "$DATA_ID"
# Step 4: Check for replicated sub-classes in each super-class
echo "Running check_replicas.py... [HUMAN INTERACTION REQUIRED]"
python check_replicas.py --data_id "$DATA_ID"
# Step 5: Filter super-classes with few sub-classes
echo "Running check_scores.py..."
python check_scores.py --data_id "$DATA_ID" --min_num_subclasses 10
# Step 6: Compute correlation scores with CLIP
echo "Running clip_score_generation.py..."
python clip_score_generation.py --data_id "$DATA_ID" --root_imagenet "$ROOT_IMAGENET" --batch_size "$BATCH_SIZE" --min_num_subclasses 10
# Step 7: Detect outliers in the score distribution
echo "Running outliers_detection.py..."
python outliers_detection.py
# Step 8: Split the dataset into IID (train), test easy OOD, and test hard OOD
echo "Running dataset_split.py..."
python dataset_split.py --root_imagenet "$ROOT_IMAGENET" --p_value_1 40 --p_value_2 20 --data_id "$DATA_ID"
echo "All steps completed."