-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrunBcl2fastq.pbs
executable file
·103 lines (83 loc) · 3.89 KB
/
runBcl2fastq.pbs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/bin/bash
#PBS -q condo
#PBS -N django_bcl2fastq
#PBS -l nodes=1:ppn=16
#PBS -l walltime=2:00:00
#PBS -V
#PBS -m abe
#PBS -A epigen-group
# -v pass parameters: flowcell_id, run_dir
############################################################
## update status to job submitted @VM
############################################################
cmd="source activate django;python \$(which updateRunStatus.py) -s '1' -f $flowcell_id"
ssh zhc268@epigenomics.sdsc.edu $cmd
############################################################
## run bcl2fastq @TSCC
############################################################
## prepare variables & directories
s="--no-lane-splitting"
[[ "$split" = true ]] && s=""
[[ "$idx" = true ]] && c="--create-fastq-for-index-reads"
samplesheets=($(find ${run_dir}/ -name "SampleSheet.csv" ))
reads_cnt_file="${run_dir}/Data/Fastqs/reads_cnt.tsv"
>$reads_cnt_file
# count for mix 1 and 2 primers
for samplesheet in ${samplesheets[@]}
do
cd $run_dir
out_dir=${samplesheet%/*}
## run bcl2fastq
samplesheet_exp=${samplesheet/SampleSheet.csv/SampleSheet_expand.csv}
if [ -f $samplesheet_exp ] # check if expanded 10x barcodes
then
bcl2fastq --runfolder-dir $run_dir --output-dir $out_dir $s $c -p 16 --min-log-level TRACE --sample-sheet $samplesheet_exp
else
bcl2fastq --runfolder-dir $run_dir --output-dir $out_dir $s $c -p 16 --min-log-level TRACE --sample-sheet $samplesheet
fi
## simlink fastqs & count reads number
n=$(grep -n Data $samplesheet|cut -d':' -f1)
libs=($(sed -n "$[n+2]"',$p' $samplesheet|cut -d',' -f 1))
barcode_names=($(sed -n "$[n+2]"',$p' $samplesheet|cut -d',' -f 5))
cd $out_dir
i=0
for l in ${libs[@]}
do
# paired ended
if [ -f Undetermined*R2* ]
then
find . -name "${l}*.gz" |xargs -n1 -I '{}' echo "cp -pfs $(pwd)/{} /projects/ps-epigen/seqdata/{}" | sed "s/_S[0-9]*_/_/2;s/\/\.//g;s/_001.fastq/.fastq/2" |bash
# contain 10x barcodes
if [[ ${barcode_names[$i]} == "SI-NA"* ]]
then
echo -e "cat paired-ended 10x lib $l"
cat /projects/ps-epigen/seqdata/${l}_[0-3]_R1.fastq.gz > /projects/ps-epigen/seqdata/${l}_R1.fastq.gz & sleep 1
cat /projects/ps-epigen/seqdata/${l}_[0-3]_R2.fastq.gz > /projects/ps-epigen/seqdata/${l}_R2.fastq.gz & sleep 1
wait
rm /projects/ps-epigen/seqdata/${l}_[0-3]_R[1-2].fastq.gz
fi
## count the reads
nreads=$(zcat /projects/ps-epigen/seqdata/${l}_R1.fastq.gz| wc -l ) && echo -e "$l\t$[nreads/4]" >> $reads_cnt_file & sleep 1
# single-ended
else
find . -name "${l}*.gz" |xargs -n1 -I '{}' echo "cp -pfs $(pwd)/{} /projects/ps-epigen/seqdata/{}" | sed "s/_S[0-9]*_/_/2;s/\/\.//g;s/_001.fastq/.fastq/2;s/_R1//2" |bash
# contain 10x barcodes
if [[ ${barcode_names[$i]} == "SI-NA"* ]]
then
echo -e "cat single-ended 10x lib $l"
cat /projects/ps-epigen/seqdata/${l}_[0-3].fastq.gz > /projects/ps-epigen/seqdata/${l}.fastq.gz
rm /projects/ps-epigen/seqdata/${l}_[0-3].fastq.gz
fi
## count the reads
nreads=$(zcat /projects/ps-epigen/seqdata/${l}.fastq.gz| wc -l ) && echo -e "$l\t$[nreads/4]" >> $reads_cnt_file & sleep 1
fi
## mv index
i=$[i+1]
done
done
wait
############################################################
# update status to finish or warning @ VM
############################################################
cmd="source activate django; python \$(which updateReadsNumberPerRun.py) -f $flowcell_id -i $reads_cnt_file; python \$(which updateRunReads.py) -f $flowcell_id"
ssh zhc268@epigenomics.sdsc.edu $cmd