-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlist_to_seq.pl
executable file
·107 lines (90 loc) · 3.07 KB
/
list_to_seq.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/perl
#Edited by Jen 05/08/2015 to change sequence identifier to database,feature id and organism
use lib $ENV{SCRIPTS};
use ENV;
use DBI;
use Getopt::Std;
use Bio::SeqIO;
use Data::Dumper qw(Dumper);
&getopts('D:i:n:f:o:');
my $host = $ENV{DBSERVER} ? $ENV{DBSERVER} : 'localhost';
my $dbh = DBI->connect("dbi:mysql:host=$host;db=$opt_D", 'access', 'access');
my $listfile = $opt_f;
my $setid = $opt_i;
my $setname = $opt_n;
my @fids;
open my $LIST, $listfile;
while (my $line=<$LIST>) {
chomp $line;
push @fids, $line;
}
my $outfh;
if ($opt_o) {
$outfh = Bio::SeqIO->new(-file => ">$opt_o",
-format => 'fasta');
} else {
$outfh = Bio::SeqIO->new(-fh => \*STDOUT,
-format => 'fasta');
}
#my $setref = &get_seq_sets($dbh);
if ($setname && !$setid) { $setid = &set_name_to_id($dbh, $setname); }
if (! $setid) { die "Can't get setid\n"; }
my $protref = &get_features_by_feature_id($dbh, @fids);
my $seqids = &set_id_to_seq_ids($dbh, $setid);
our $SEQ = &get_sequence_by_seq_id($dbh, @$seqids);
foreach my $fid (sort {
$protref->{$a}->{'location'}->{'seq_id'} <=> $protref->{$b}->{'location'}->{'seq_id'} ||
$protref->{$a}->{'location'}->{'feat_min'} <=> $protref->{$b}->{'location'}->{'feat_min'}
}
keys %$protref) {
# my $acc = join("|", values %{$protref->{$fid}->{'accessions'}});
my $locus = $protref->{$fid}->{'accessions'}->{'locus_tag'};
my $db = $opt_D;
my $acc = "$db|$fid|$locus";
my ($seq_id, $min, $max, $strand) = ($protref->{$fid}->{'location'}->{$setid}->{'seq_id'},
$protref->{$fid}->{'location'}->{$setid}->{'feat_min'},
$protref->{$fid}->{'location'}->{$setid}->{'feat_max'},
$protref->{$fid}->{'location'}->{$setid}->{'strand'});
if (!($min && $max) || ($min >= $max)) { warn "Why $min/$max for $fid, $acc?\n";}
my $seq = &get_subseq($seq_id, $min, $max, $strand);
# if (length($protref->{$fid}->{'product'}) == 0 ||
# !defined($protref->{$fid}->{'product'})) {
# warn "No product string for feature $fid ($acc). Skipping...";
# next;
# }
my $setdesc = &get_set_desc($dbh, $setid);
my $desc = "[$setdesc]";
# while (my ($qual,$vref) = each %{$protref->{$fid}->{'annotation'}}) {
# my ($k) = sort {$a<=>$b} keys %$vref;
# $desc .= "$qual=$vref->{$k}->[0]->{value};";
#}
my $seqo = Bio::Seq->new(-display_id => $acc,
-desc => $desc,
-seq => $seq);
$outfh->write_seq($seqo);
}
sub get_set_desc {
my $dbh = shift;
my $setid = shift;
my $ssq = "SELECT description FROM sequence_sets WHERE set_id=$setid";
return $dbh->selectcol_arrayref($ssq)->[0];
}
sub get_mainroles {
my $dbh = shift;
my $mrq = "select * from egad.mainrole";
return $dbh->selectall_hashref($mrq, 'id');
}
sub get_subroles {
my $dbh = shift;
my $mrq = "select * from egad.subrole";
return $dbh->selectall_hashref($mrq, 'id');
}
sub get_subseq {
my ($seq_id, $min, $max, $strand) = @_;
my $subseq = $SEQ->{$seq_id}->trunc($min, $max);
if ($strand < 0 || $strand eq "-") {
return $subseq->revcom->seq;
} else {
return $subseq->seq;
}
}