-
Notifications
You must be signed in to change notification settings - Fork 130
/
Copy pathfusionByLda.sh
executable file
·131 lines (97 loc) · 4.47 KB
/
fusionByLda.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/bin/bash
# Copyright xmuspeech (Author:Snowdar 2018/09/28)
train_normalize=true
form_normalize=false # value of normalize is tuning
keep_mat=true # if false,rm *.mat in the end
computeCavg=false
printlog=true # if you don't want to print c++ program log,you can use 2>/dev/null when calling this script with this in the end of your calling.
equal_fusion=true # If true,print fusion eer with equal weight as the same time.
eval=false # if true ,use a exist mat to do eval fusion
mat=
. subtools/parse_options.sh
. subtools/path.sh
set -e
if [[ $# != 3 ]];then
echo "[exit] Num of parameters is not equal to 3"
echo "usage:$0 <train/test-trials> <train/test-score-scp|train/test-score-list> <out-fusion-score>"
exit 1
fi
trials=$1
scorescp=$2
outscore=$3
for x in $trials $scorescp;do
[[ ! -f $x || ! -s $x ]] && echo "No such file $x or the file $x is empty" && exit 1
done
[[ "$eval" == "true" && "$mat" == "" ]] && echo "[exit] --eval mod is used but you have not provided a transform-mat." && exit 1
[ "$eval" == "true" ] && equal_fusion=false && echo "Now,it's test mod..."
dir=$(dirname $scorescp)
name=$(basename ${scorescp%.*})
list=$(sed '/^#/d' $scorescp | awk '{print $NF}')
duplicate=$(echo "$list" | sort | uniq -d)
[ "$duplicate" != "" ] && echo "[exit] Exist duplicated scores "$duplicate && exit 1
for x in $list;do
[[ ! -f $x || ! -s $x ]] && echo "No such file $x or the file $x is empty" && exit 1
done
num=$(echo "$list" | wc -l)
[ "$printlog" == "true" ] && echo "Num of valid scores is $num"
[ "$printlog" == "true" ] && echo "Get score vector to be computed by lda matrix... "
trialsnum=$(wc -l $trials | awk '{print $1}')
awk 'NR==FNR{a[$1"-"$2]=$1"-"$2" ["}
NR>FNR{if(FILENAME!=ARGV[ARGC-1]){
a[$1"-"$2]=a[$1"-"$2]" "$3;}else{print a[$1"-"$2]" ]"}}' \
$trials $list $trials | awk -v num=$num '{if(NF==num+3)print $0}' >$dir/$name.ark
trialsnum=$(wc -l $trials | awk '{print $1}')
arknum=$(wc -l $dir/$name.ark | awk '{print $1}')
[ $trialsnum != $arknum ] && echo "[exit] $trialsnum != $arknum: Num of trials is not equal to num of score-pairs. Maybe you miss some scores in foo score file and you should provide the intersection-trials with enough score-pairs which should appear in every score file." && exit 1
if [ "$eval" == "false" ];then
[ "$printlog" == "true" ] && echo "Get vector lable for lda matrix training..."
awk '{print $1"-"$2,$3}' $trials > $dir/$name.lable
[ "$printlog" == "true" ] && echo "Train lda matrix..."
ivector-compute-lda --binary=false --dim=1 --total-covariance-factor=0.1 \
"ark:ivector-normalize-length --normalize=$train_normalize ark:$dir/$name.ark ark:- |" \
ark:$dir/$name.lable \
$dir/$name.mat
[ "$printlog" == "true" ] && echo "Check LDA matrix to make sure that mean of target sets is always greater than nontarget sets..."
awk '{if(NR==2){$(NF-1)=0;k=0;
for(i=1;i<NF-1;i++){
if($i<=0){
k++;
}
}
for(i=1;i<NF;i++){
if(k>=(NF-2)/2.0 && $i<0){
$i=-$i}
printf " "$i}print " ]"}else{printf "["}}' $dir/$name.mat > $dir/$name.mat.tmp
mv -f $dir/$name.mat.tmp $dir/$name.mat
mat=$dir/$name.mat
fi
[ "$printlog" == "true" ] && echo "Do fusion..."
ivector-transform $mat \
"ark:ivector-normalize-length --normalize=$form_normalize ark:$dir/$name.ark ark:- |" \
ark,t:$outscore.tmp
[ "$printlog" == "true" ] && echo "Write fusion score to $outscore..."
awk '{print $1,$2}' $trials | paste - <(awk '{print $3}' $outscore.tmp) > $outscore
if [ "$equal_fusion" == "true" ];then
[ "$printlog" == "true" ] && echo "Do extra equal fusion..."
awk '{printf "[";for(i=1;i<NF-2;i++){printf " 1"}print " 0 ]"}' $mat > $mat.equal
ivector-transform $mat.equal \
"ark:ivector-normalize-length --normalize=$form_normalize ark:$dir/$name.ark ark:- |" \
ark,t:$outscore.tmp
awk '{print $1,$2}' $trials | paste - <(awk '{print $3}' $outscore.tmp) > $outscore.equal
fi
rm -f $dir/$name.ark
rm -f $dir/$name.lable
rm -f $outscore.tmp
if [ "$eval" == "false" ];then
echo "Fusion weights are as follows:"
cat $mat
[ "$keep_mat" == "false" ] && [ "$eval" == "false" ] && rm -f $mat
echo "Get fusion eer:"
subtools/computeEER.sh $trials $outscore 2>/dev/null
[ "$equal_fusion" == "true" ] && echo "( fuision with equal weight :" `subtools/computeEER.sh $trials $outscore.equal 2>/dev/null` ")" && rm -f $mat.equal
[ "$computeCavg" == "true" ] && echo "Get fusion Cavg:"
[ "$computeCavg" == "true" ] && subtools/computeCavg.py -pairs $trials $outscore
else
echo -e "\n[Fusion done] Your fusion score is $outscore."
fi
[ "$printlog" == "true" ] && echo "All done."