-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
daa01b0
commit 9834a79
Showing
12 changed files
with
302 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
File renamed without changes.
Binary file not shown.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
from Bio import Entrez | ||
import sys | ||
|
||
|
||
def fetch_sequences(database, accession_numbers): | ||
""" | ||
Fetch sequences from NCBI database using accession numbers. | ||
Args: | ||
database (str): Database name to fetch sequences from. | ||
accession_numbers (list): List of accession numbers. | ||
Returns: | ||
None | ||
""" | ||
Entrez.email = "fakeemail@exemplo.com" | ||
Entrez.tool = "fetch_sequences.py" | ||
|
||
# Join the accession numbers into a comma-separated string | ||
accession_list = ",".join(accession_numbers) | ||
|
||
try: | ||
# Fetch the sequences | ||
fetch_results = Entrez.efetch(db=database, id=accession_list, rettype="fasta", retmode="text") | ||
sys.stdout.write(fetch_results.read()) | ||
fetch_results.close() | ||
except Exception as e: | ||
print(f"Error fetching sequences: {e}") | ||
sys.exit(1) | ||
|
||
|
||
def read_accession_numbers(file_path): | ||
""" | ||
Read accession numbers from a file. | ||
Args: | ||
file_path (str): Path to the file containing accession numbers. | ||
Returns: | ||
list: List of accession numbers. | ||
""" | ||
try: | ||
with open(file_path) as file: | ||
# Filter out empty lines and strip whitespace from each line | ||
accession_numbers = [line.strip() for line in file if line.strip()] | ||
return accession_numbers | ||
except FileNotFoundError: | ||
print(f"Error: File '{file_path}' not found.") | ||
sys.exit(1) | ||
|
||
|
||
def main(): | ||
""" | ||
Main function to execute the script. | ||
""" | ||
if len(sys.argv) != 3: | ||
print("Usage: python fetch_sequences.py [database] " | ||
"[file_path_to_accession_numbers]") | ||
sys.exit(1) | ||
|
||
database = sys.argv[1] | ||
file_path = sys.argv[2] | ||
|
||
accession_numbers = read_accession_numbers(file_path) | ||
|
||
if not accession_numbers: | ||
print("Error: No accession numbers found in the file.") | ||
sys.exit(1) | ||
|
||
fetch_sequences(database, accession_numbers) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
COI | ||
OP394113 | ||
|
||
OP394114 | ||
MW593931 | ||
MK578905 | ||
FJ435802 | ||
FJ435803 | ||
ON005160 | ||
MT023412 | ||
MT023413 | ||
MT023414 | ||
|
||
MH675999 | ||
MH676000 | ||
MH676001 | ||
MH676002 | ||
MH676003 | ||
MH676004 | ||
MH676005 | ||
MH676006 | ||
MH676008 | ||
|
||
MH676009 | ||
MH676010 | ||
MH676014 | ||
MH676015 | ||
MH676016 | ||
MH676017 | ||
MK041020 | ||
MH675998 | ||
MK041019 | ||
MH676007 | ||
MH676013 | ||
MK041015 | ||
MN097837 | ||
MH676011 | ||
MT260372 | ||
MT260373 | ||
MF568534 | ||
LC637242 | ||
MK040994 | ||
OK662995 | ||
MK040996 | ||
OK662996 | ||
MH676018 | ||
MN888322 | ||
MN888328 | ||
MN888329 | ||
MN888330 | ||
KX810042 | ||
MN444827 |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
ITS–2 | ||
|
||
|
||
|
||
MW588022 | ||
MK584657 | ||
|
||
|
||
|
||
MT024000 | ||
MT024001 | ||
MT024001 | ||
OK663216 | ||
MH666081 | ||
MH666082 | ||
|
||
|
||
MH666083 | ||
|
||
MH666084 | ||
|
||
MH666086 | ||
MH666087 | ||
MH666088 | ||
MH666089 | ||
MH666093 | ||
|
||
MH666094 | ||
MH666095 | ||
|
||
MH666080 | ||
|
||
MH666085 | ||
MH666092 | ||
|
||
MN073464 | ||
MH666090 | ||
|
||
|
||
MF568535 | ||
LC649794 | ||
|
||
OK663213 | ||
|
||
OK663214 | ||
MH666096 | ||
MN888331 | ||
MN888348 | ||
MN888349 | ||
MN888350 | ||
KX810046 | ||
MN443038 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
18S rRNA | ||
OP394210 | ||
OP394211 | ||
OP394212 | ||
MW588029 | ||
MK584659 | ||
FJ435746 | ||
FJ435748 | ||
ON005189 | ||
MT023998 | ||
MT023999 | ||
MT023999 | ||
OK663227 | ||
MH664932 | ||
MH664934 | ||
MH664935 | ||
MH664936 | ||
MH664938 | ||
MH664939 | ||
MH664940 | ||
MH664944 | ||
MH664945 | ||
MH664933 | ||
MK041032 | ||
MH664931 | ||
MK041031 | ||
MH664937 | ||
MH664943 | ||
MK041030 | ||
MN073468 | ||
MH664941 | ||
MT261913 | ||
MF568532 | ||
LC637243 | ||
MK041023 | ||
OK663224 | ||
MK041024 | ||
OK663225 | ||
MH664946 | ||
MN888371 | ||
MN888376 | ||
MN888377 | ||
MN888378 | ||
KX810045 | ||
MN443040 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
28S rRNA | ||
|
||
OP394209 | ||
|
||
MW588034 | ||
MK584658 | ||
FJ435760 | ||
FJ435761 | ||
ON005195 | ||
MT024041 | ||
MT024042 | ||
MT024043 | ||
OK663238 | ||
MH664949 | ||
MH664952 | ||
|
||
|
||
MH664953 | ||
|
||
MH664954 | ||
|
||
MH664956 | ||
|
||
MH664957 | ||
MH664958 | ||
MH664961 | ||
|
||
MH664962 | ||
MH664951 | ||
|
||
MH664948 | ||
|
||
MH664955 | ||
MH664960 | ||
|
||
MN073465 | ||
MH664950 | ||
MT261904 | ||
|
||
MF568533 | ||
LC649795 | ||
|
||
OK663236 | ||
|
||
OK663236 | ||
MH664963 | ||
MN888357 | ||
MN888361 | ||
MN888362 | ||
MN888363 | ||
KX810049 | ||
MN443035 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import Bio | ||
|
||
# Dataset | ||
sample_names = [os.path.splitext(os.path.basename(file))[0] for file in glob.glob("dataset/*.fasta")] | ||
|
||
# Define rule to generate all desired outputs | ||
rule all: | ||
input: | ||
expand("results/mafft/{sample}/tree/{sample}Tree.svg", sample=sample_names), | ||
expand("results/clustal_omega/{sample}/tree/{sample}Tree.svg", sample=sample_names), | ||
expand("results/Muscle/{sample}/tree/{sample}Tree.svg", sample=sample_names), | ||
expand("iqtree/{sample}/mafft/mafft.log", sample=sample_names), | ||
expand("iqtree/{sample}/clustal_omega/clustal_omega.log", sample=sample_names), | ||
expand("iqtree/{sample}/Muscle/Muscle.log", sample=sample_names) | ||
|
||
#Alignment rules | ||
rule ASCN_Download: | ||
input: | ||
ASCN="ASCN/{sample}.txt" | ||
output: | ||
fasta="results/mafft/{sample}/{sample}fileAligned.fasta" | ||
params: | ||
library=nuccore | ||
shell: | ||
""" | ||
python3 ASCN_Download.py nuccore ASCN > {sample}.fasta | ||
""" |