-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathextractinformativesites.m
71 lines (60 loc) · 1.84 KB
/
extractinformativesites.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
function [aln2,sites]=extractinformativesites(aln,variants)
%EXTRACTINFORMATIVESITES - Extract informative sites
%Sites returns the indices of columns with at least 2 bases appearing at least
%twice each. These are the informative sites of the method of maximum parsimony.
%
% Syntax: [aln2]=extractinformativesites(aln)
%
% Inputs:
% aln - Alignment structure
%
% Outputs:
% aln2 - New alignment including informative sites only
%
% See also: EXTRACTSEGREGATINGSITES
% Molecular Biology and Evolution Toolbox (MBEToolbox)
% Author: James Cai
% Email: jcai@tamu.edu
% Website: http://bioinformatics.org/mbetoolbox/
%
% $LastChangedDate: 2013-01-05 12:04:29 -0600 (Sat, 05 Jan 2013) $
% $LastChangedRevision: 327 $
% $LastChangedBy: jcai $
if (nargin<2), variants=2; end
if (variants<2), variants=2; end
%if ~(isvalidaln(aln,'CODING')|isvalidaln(aln,'NONCODING'))
% error ('ERROR: Not nucleotide seq')
%end
% if (hasgap(aln))
% error ('ERROR: Sequences cannot contain gaps. Gaps can be removed by using REMOVEGAPS.')
% end
seqarray=aln.seq;
[m,n]=size(seqarray);
if m<4
disp('Too few sequences.')
end
aln2=aln;
%aln2.seqtype = aln.seqtype;
%aln2.geneticcode = 0;
%aln2.seqnames = aln.seqnames;
% informative.m
%
% usage: sites=informative(seqarray)
%
% Each row of seqarray should be a sequence in A,G,C, and T,
% with one row per taxa;
% sites returns the indices of columns with at least 2 bases
% appearing at least twice each. These are the informative
% sites of the method of maximum parsimony.
%
% 8/2/03
%if variants=2, two variants;
%if variants=3, three variants;
%if variants=4, four variants;
variants=variants-1;
As=( (sum(seqarray==1)) > variants ); % A
Gs=( (sum(seqarray==3)) > variants ); % G
Cs=( (sum(seqarray==2)) > variants ); % C
Ts=( (sum(seqarray==4)) > variants ); % T
sites=find( (As+Gs+Cs+Ts) > 1 );
aln2.seq=aln.seq(:,sites);