-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathintegrated_pipeline.sh
53 lines (44 loc) · 1.6 KB
/
integrated_pipeline.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/bin/bash
# Step 1: Download data
# Extract the path to LOCAL_10X_2018_DATA_DIR from .research_config
MAIN_DIR=$(source ~/.research_config; echo $LOCAL_10X_2018_DATA_DIR)
# Check if MAIN_DIR is set
if [ -z "$MAIN_DIR" ]; then
echo "Error: MAIN_DIR is not set. Check .research_config file."
exit 1
fi
RAW_DIR="${MAIN_DIR}raw/"
mkdir -p "$RAW_DIR"
# Step 1.1: Download FastQ files
cd "$RAW_DIR" || exit
wget https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_v3/pbmc_1k_v3_fastqs.tar
tar -xvf pbmc_1k_v3_fastqs.tar
wget https://s3-us-west-2.amazonaws.com/10x.files/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_fastqs.tar
tar -xvf pbmc_10k_v3_fastqs.tar
# Step 1.2: Download and extract the GRCh38 transcriptome database
cd "$RAW_DIR" || exit
if [ ! -f "refdata-gex-GRCh38-2020-A.tar.gz" ]; then
wget https://cf.10xgenomics.com/supp/cell-exp/refdata-gex-GRCh38-2020-A.tar.gz
fi
if [ ! -d "refdata-gex-GRCh38-2020-A" ]; then
tar -zxvf refdata-gex-GRCh38-2020-A.tar.gz
fi
REFDATA_DIR="${RAW_DIR}refdata-gex-GRCh38-2020-A/"
# Step 2: Process data
PROCESSED_DIR="${MAIN_DIR}processed/"
mkdir -p "$PROCESSED_DIR"
cd "$PROCESSED_DIR"
# Check if Cell Ranger is installed
if ! command -v cellranger &> /dev/null; then
echo "Error: cellranger command not found."
exit 1
fi
# Step 2.1: Run Cell Ranger count for each SRR
for ID in pbmc_1k_v3 pbmc_10k_v3; do
cellranger count --id="${ID}" \
--transcriptome="$REFDATA_DIR" \
--fastqs="${RAW_DIR}/${ID}_fastqs" \
--sample="${ID}" \
--create-bam=true
done
echo "All tasks completed successfully!"