-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreproc_pipeline.sh
53 lines (46 loc) · 2.11 KB
/
preproc_pipeline.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/bin/sh
# Shell script
# for automating data preprocessing
# Usage guide
# 1. set up environment variables, listed down below.
# 2. DATA_CONFIG_DIR - full path to your data configuration .json file
# 3. DATA_DIR - full path to your dataset.
# 4. AUGMENTED_DATA_DIR - full path, where your augmented data is going to be stored after preprocessing
MODE=$1
if [ "$MODE" == "train" ];
then
FILE=./env_vars/train_preproc_pipeline.env
if [ -f "$FILE" ];
then source $FILE
else echo "$FILE does not exist.\n
Shell script loads configuration for training dataset from this file.\n
Before usage, you need to create this file and set up configuration for your training data.\n
For more details, see 'docs/data_management/DATA_PIPELINE.md'."
fi;
elif [ "$MODE" == "validation" ];
then
FILE=./env_vars/val_preproc_pipeline.env
if [ -f "$FILE" ];
then source $FILE
else echo "$FILE does not exist.\n
Shell script loads configuration for validation dataset from this file.\n
Before usage, you need to create this file and set up configuration for your validation data.\n
For more details, see 'docs/data_management/DATA_PIPELINE.md'."
fi;
else
echo "Invalid MODE parameter provided: should be either 'train' or 'validation'. \n
'train' means, it will load .env configuration for the training dataset, \n
provided under 'env_vars/train_data_pipeline.env' path. 'validation' means, it will load .env configuration for validation dataset \
provided under 'env_vars/val_data_pipeline.env' path. You can use only these options, nothing else. \n
For more details check 'docs/data_management/DATA_PIPELINE.md'.
Example:
sh ./preproc_pipeline 'train' -------> indicates we are running preprocessing on training data.
sh ./preproc_pipeline 'validation' -----> indicates we are running preprocessing on validation data."
exit 1;
fi
python3 -u -m src.pipelines.preproc_pipeline \
--json-data-config-path $JSON_DATA_CONFIG_DIR \
--data-dir $DATA_DIR \
--csv-labels-crop-path $CSV_LABELS_CROP_PATH \
--crop-dir $OUTPUT_CROP_DIR \
--dataset-type "$MODE" \