Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
Rendrick27 committed Apr 23, 2024
1 parent daa01b0 commit 9834a79
Show file tree
Hide file tree
Showing 12 changed files with 302 additions and 0 deletions.
Binary file added ASCN/18S_RNA.fasta
Binary file not shown.
File renamed without changes.
Binary file added ASCN/28S_rRNA.fasta
Binary file not shown.
File renamed without changes.
74 changes: 74 additions & 0 deletions ASCN/ASCN_Download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from Bio import Entrez
import sys


def fetch_sequences(database, accession_numbers):
"""
Fetch sequences from NCBI database using accession numbers.
Args:
database (str): Database name to fetch sequences from.
accession_numbers (list): List of accession numbers.
Returns:
None
"""
Entrez.email = "fakeemail@exemplo.com"
Entrez.tool = "fetch_sequences.py"

# Join the accession numbers into a comma-separated string
accession_list = ",".join(accession_numbers)

try:
# Fetch the sequences
fetch_results = Entrez.efetch(db=database, id=accession_list, rettype="fasta", retmode="text")
sys.stdout.write(fetch_results.read())
fetch_results.close()
except Exception as e:
print(f"Error fetching sequences: {e}")
sys.exit(1)


def read_accession_numbers(file_path):
"""
Read accession numbers from a file.
Args:
file_path (str): Path to the file containing accession numbers.
Returns:
list: List of accession numbers.
"""
try:
with open(file_path) as file:
# Filter out empty lines and strip whitespace from each line
accession_numbers = [line.strip() for line in file if line.strip()]
return accession_numbers
except FileNotFoundError:
print(f"Error: File '{file_path}' not found.")
sys.exit(1)


def main():
"""
Main function to execute the script.
"""
if len(sys.argv) != 3:
print("Usage: python fetch_sequences.py [database] "
"[file_path_to_accession_numbers]")
sys.exit(1)

database = sys.argv[1]
file_path = sys.argv[2]

accession_numbers = read_accession_numbers(file_path)

if not accession_numbers:
print("Error: No accession numbers found in the file.")
sys.exit(1)

fetch_sequences(database, accession_numbers)


if __name__ == '__main__':
main()
Binary file added ASCN/COI.fasta
Binary file not shown.
52 changes: 52 additions & 0 deletions ASCN/COI.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
COI
OP394113

OP394114
MW593931
MK578905
FJ435802
FJ435803
ON005160
MT023412
MT023413
MT023414

MH675999
MH676000
MH676001
MH676002
MH676003
MH676004
MH676005
MH676006
MH676008

MH676009
MH676010
MH676014
MH676015
MH676016
MH676017
MK041020
MH675998
MK041019
MH676007
MH676013
MK041015
MN097837
MH676011
MT260372
MT260373
MF568534
LC637242
MK040994
OK662995
MK040996
OK662996
MH676018
MN888322
MN888328
MN888329
MN888330
KX810042
MN444827
Binary file added ASCN/ITS–2.fasta
Binary file not shown.
52 changes: 52 additions & 0 deletions ASCN/ITS–2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
ITS–2



MW588022
MK584657



MT024000
MT024001
MT024001
OK663216
MH666081
MH666082


MH666083

MH666084

MH666086
MH666087
MH666088
MH666089
MH666093

MH666094
MH666095

MH666080

MH666085
MH666092

MN073464
MH666090


MF568535
LC649794

OK663213

OK663214
MH666096
MN888331
MN888348
MN888349
MN888350
KX810046
MN443038
45 changes: 45 additions & 0 deletions Paper Stuff/ASCN_Numbers/18S_rRNA.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
18S rRNA
OP394210
OP394211
OP394212
MW588029
MK584659
FJ435746
FJ435748
ON005189
MT023998
MT023999
MT023999
OK663227
MH664932
MH664934
MH664935
MH664936
MH664938
MH664939
MH664940
MH664944
MH664945
MH664933
MK041032
MH664931
MK041031
MH664937
MH664943
MK041030
MN073468
MH664941
MT261913
MF568532
LC637243
MK041023
OK663224
MK041024
OK663225
MH664946
MN888371
MN888376
MN888377
MN888378
KX810045
MN443040
52 changes: 52 additions & 0 deletions Paper Stuff/ASCN_Numbers/28S_rRNA.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
28S rRNA

OP394209

MW588034
MK584658
FJ435760
FJ435761
ON005195
MT024041
MT024042
MT024043
OK663238
MH664949
MH664952


MH664953

MH664954

MH664956

MH664957
MH664958
MH664961

MH664962
MH664951

MH664948

MH664955
MH664960

MN073465
MH664950
MT261904

MF568533
LC649795

OK663236

OK663236
MH664963
MN888357
MN888361
MN888362
MN888363
KX810049
MN443035
27 changes: 27 additions & 0 deletions Snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import Bio

# Dataset
sample_names = [os.path.splitext(os.path.basename(file))[0] for file in glob.glob("dataset/*.fasta")]

# Define rule to generate all desired outputs
rule all:
input:
expand("results/mafft/{sample}/tree/{sample}Tree.svg", sample=sample_names),
expand("results/clustal_omega/{sample}/tree/{sample}Tree.svg", sample=sample_names),
expand("results/Muscle/{sample}/tree/{sample}Tree.svg", sample=sample_names),
expand("iqtree/{sample}/mafft/mafft.log", sample=sample_names),
expand("iqtree/{sample}/clustal_omega/clustal_omega.log", sample=sample_names),
expand("iqtree/{sample}/Muscle/Muscle.log", sample=sample_names)

#Alignment rules
rule ASCN_Download:
input:
ASCN="ASCN/{sample}.txt"
output:
fasta="results/mafft/{sample}/{sample}fileAligned.fasta"
params:
library=nuccore
shell:
"""
python3 ASCN_Download.py nuccore ASCN > {sample}.fasta
"""

0 comments on commit 9834a79

Please sign in to comment.