-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSequence.cpp
113 lines (106 loc) · 2.22 KB
/
Sequence.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#include"Sequence.h"
char Base[]="ACTG";
Sequence::Table::Table(string baseString,int com){
common=com;
cMatchs=0;
for(int i=0;i<4;i++)
Heads[i]=baseString+Base[i];
}
Sequence::Sequence(string filename):last(0),Ancestor(new Table("",0)){
for(int i=0;i<4;i++){
Num[i]=0;
Start[i]=0;
Max[i]=0;
Ancestor->Sons[i]=new Table(Ancestor->Heads[i],1);
Ancestor->Sons[i]->cStarts=new int[MAX];
}
fin.open(filename.c_str());
if(!fin.is_open())
cout<<"Fail to open file:"<<filename<<endl;
ReadFile();
}
void Sequence::ReadFile(){
char p;
while((p=fin.get())!=EOF){
if(p=='\n')
continue;
getX(p);
s+=p;
}
}
int getIndex(char base){
switch(base){
case 'A':return 0;
case 'C':return 1;
case 'T':return 2;
case 'G':return 3;
}
}
void Sequence::getX(char base){
static int tmpStart[4]={};
static int tmpMax[4]={};
static int index=0;
if(base=='p'){
for(int i=0;i<4;i++)
if(tmpMax[i]>Max[i])
Max[i]=tmpMax[i];
return;
}
index=getIndex(base);
Num[index]++;
Ancestor->Sons[index]->cStarts[Ancestor->Sons[index]->cMatchs++]=s.size();
if(last==base){
tmpMax[index]++;
last=base;
}
else{
last=base;
if(tmpMax[index]>Max[index]){
Start[index]=tmpStart[index];
Max[index]=tmpMax[index];
}
tmpMax[index]=1;
tmpStart[index]=s.size();
}
}
string Sequence::longestConsecutive(){
int maxIndex=0;
int maxMax=0;
long minStart=9999999;
for(int i=0;i<4;i++)
if(Max[i]>maxMax){
maxIndex=i;
maxMax=Max[i];
}
for(int i=0;i<4;i++)
if(Max[i]==maxMax&&Start[i]<minStart)
maxIndex=i;
return string(Max[maxIndex],Base[maxIndex]);
}
void Sequence::Filter(Sequence::Table* tmp){{
int mIndex;
for(int i=0;i<4;i++){
tmp->Sons[i]=new Table(tmp->Heads[i],tmp->common+1);
tmp->Sons[i]->cStarts=new int[tmp->cMatchs];
}
for(int i=0;i<tmp->cMatchs;i++){
mIndex=getIndex(s[tmp->cStarts[i]+tmp->common]);
tmp->Sons[mIndex]->cStarts[tmp->Sons[mIndex]->cMatchs++]=tmp->cStarts[i];
}
}
for(int i=0;i<4;i++){
Table*& Stmp=tmp->Sons[i];
if(Stmp->cMatchs>1){
if(Stmp->common>BestString.size())
BestString=tmp->Heads[i];
Filter(Stmp);
}
delete Stmp;
}
delete tmp->cStarts;
}
string Sequence::longestRepeated(){
for(int i=0;i<4;i++)
Filter(Ancestor->Sons[i]);
return BestString;
}