-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMTandrRNA_filter.txt
27 lines (18 loc) · 2.02 KB
/
MTandrRNA_filter.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
##for mitochondrial gene depletion
awk -F"\t" -v OFS='\t' '{ if ($1=="MT") print $0}' Rattus_norvegicus.mRatBN7.2.108.gtf > mitochrondrial_genes.gtf
awk -F"\t" '{ if ($3 == "gene") print $9}' mitochrondrial_genes.gtf | awk -F"; " '{print $1}' | awk -F" " '{print $2}' | sed 's/\"//g' > mitochrondrial_genes_ids.txt ##just gene_ids
awk -F"\t" '{ if ($3 == "gene") print $9}' mitochrondrial_genes.gtf | awk -F";" '{print $1 $3}' | awk -F"\"" '{print $2 "\t" $4}' > mitochrondrial_genes_ids.txt ##use this for both ID and name
##for rRNA looking for "Biotypes"
##first select "gene"; then select 5th part of feature attributes
awk -F"\t" '{ if ($3 == "gene") print $9}' Rattus_norvegicus.mRatBN7.2.108.gtf | awk -F"\"" '{ if ($10 =="rRNA") print $0}' | head
awk -F"\t" '{ if ($3 == "gene") print $9}' Rattus_norvegicus.mRatBN7.2.108.gtf | awk -F"\"" '{ if ($10 =="rRNA") print $0}' > ribosomal_RNA.gtf ##attributes col 9 only
awk -F"\t" '{ if ($3 == "gene") print $0}' Rattus_norvegicus.mRatBN7.2.108.gtf | awk -F"\"" '{ if ($10 =="rRNA") print $0}' > ribosomal_RNA.gtf
awk -F"\t" '{ print $9}' ribosomal_RNA.gtf | awk -F"; " '{print $1}' | awk -F" " '{print $2}' | sed 's/\"//g' > ribosomal_genes_ids.txt ##initial test
awk -F"\t" '{ print $9}' ribosomal_RNA.gtf | awk -F";" '{print $1 $3}' | awk -F"\"" '{print $2 "\t" $4}' > ribosomal_genes_ids.txt ##works better gives ensembl ID and name
##and for 7SK SRP although may not use to filter
grep snRNA Rattus_norvegicus.mRatBN7.2.108.gtf | awk -F"\t" '{ if ($3 == "gene") print $9}' | awk -F"; " '{print $3}' | awk -F" " '{print $2}' | head
[eak37@htc-1024-n1 Rnor_7.0]$ grep misc Rattus_norvegicus.mRatBN7.2.108.gtf | awk -F"\t" '{ if ($3 == "gene") print $9}' | awk -F"; " '{print $3}' | awk -F" " '{print $2}' | sort | uniq
"Metazoa_SRP"
"Rn7sl1"
grep -E "Metazoa_SRP|Rn7sl1" Rattus_norvegicus.mRatBN7.2.108.gtf | awk -F"\t" '{ if ($3 == "gene") print $9}' | awk -F"; " '{print $3}' | awk -F" " '{print $2}' >
##next use the mt and rRNA gene lists to filter reads for each RNA seq sample and set counts = 0.