forked from wty9391/GMM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsoftmax_gaussian_mixture.py
162 lines (121 loc) · 6.65 KB
/
softmax_gaussian_mixture.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import numpy as np
import sys
import math
import random
from sklearn.preprocessing import normalize
from scipy.stats import norm
from gaussian_mixture import Gaussian, GaussianMixture
from softmax import SoftmaxClassifier
epsilon = sys.float_info.epsilon
class SoftmaxGaussianMixture(GaussianMixture):
def __init__(self, feature_dimension, label_dimension, mean=[1, 5], variance=[5, 10]):
# self.multinoulli is duplicated, since we use softmax to handle the posterior
GaussianMixture.__init__(self, label_dimension, [], mean, variance)
self.softmax = SoftmaxClassifier(feature_dimension=feature_dimension, label_dimension=label_dimension)
def e_step(self, z, x):
# z is a m x 1 matrix, each row represents a market price
# x is a m x feature_dimension matrix, each row represents a bid request
# returns responsibilities, i.e., posterior probability, for each bid request
(m, _) = np.shape(z)
likelihood = np.zeros(shape=(m, self.num))
# likelihood for each market price
for i in range(self.num):
gaussian = Gaussian(self.mean[i], np.sqrt(self.variance[i]))
likelihood[:, i] = gaussian.pdf(z[:, 0])
# element-wise multiplication and normalize probability
return normalize(np.multiply(self.softmax.predict(x), likelihood), norm='l1', axis=1)
def m_step(self, z, x, rs, batch_size=512, epoch=10, eta=2e-2, labda=0.0, verbose=1):
# z is a m x 1 matrix, each row represents a market price
# x is a m x feature_dimension matrix, each row represents a bid request
# rs is a m x label_dimension matrix, each row represents the posterior for this bid request
# returns nothing, but update model's parameters in m-step
# update softmax
self.softmax.fit(rs, x, batch_size=batch_size, epoch=epoch, eta=eta, labda=labda, verbose=verbose)
rs_sum = rs.sum(axis=0)
for i in range(self.num):
# update variance and mean
self.variance[i] = (rs[:, i] * (z[:, 0] - self.mean[i]) ** 2).sum() / rs_sum[i]
self.mean[i] = (rs[:, i] * z[:, 0]).sum() / rs_sum[i]
if self.variance[i] < epsilon:
# To fix singularity problem, i.e. variance = 0
print("Singularity problem encountered: mix index:{0:d}, mean:{1:.5f}, variance:{2:.5f}".format(i, self.mean[i], self.variance[i]))
self.variance[i] = random.randint(10, 100)
self.mean[i] = random.randint(10, 100)
if verbose == 1:
print(self)
def fit(self, z, x, sample_rate=1.0, epoch=10, softmax_epoch=10, batch_size=512, eta=2e-2, labda = 0.0, verbose=1):
# z is a m x 1 matrix, each row represents a market price
# x is a m x feature_dimension matrix, each row represents a bid request
(m, _) = np.shape(z)
print("Start to fit, sample_rate:{0:.2f}, epoch:{1:d}, softmax_epoch:{2:d}, "
"batch_size:{3:d}, eta:{4:.4f}, labda:{5:.4f}".format(
sample_rate, epoch, softmax_epoch, batch_size, eta, labda
))
for i in range(epoch):
mask = np.random.choice([False, True], m, p=[1-sample_rate, sample_rate])
if verbose == 1:
print("============== E-M epoch: {} ==============".format(str(i)))
print("{0:d} records have been sampled".format(z[mask, :].shape[0]))
self.m_step(z[mask, :], x[mask, :], self.e_step(z[mask, :], x[mask, :]),
batch_size=batch_size, epoch=softmax_epoch, eta=eta/np.sqrt(i+1),
labda=labda, verbose=verbose)
# trick
if i < epoch-1:
self.variance = [v/5 for v in self.variance]
def pdf_overall(self, z, x):
# z is n x 1 matrix, each row represents a market price
# x is m x feature_dimension matrix, each row represents a bid request to be margin out
# returns n x 1 matrix, each row is the probability to be taken by the corresponding z
(m, _) = np.shape(x)
self.multinoulli = (self.softmax.predict(x).sum(axis=0) / m).tolist()
print(self)
return super().pdf(z)
def pdf(self, z, x):
# z is a n x 1 matrix, each row represents a market price
# x is a m x feature_dimension matrix, each row represents a bid request
# returns m x n matrix, each row is the probability to be taken by the corresponding x and z
(n, _) = np.shape(z)
(m, _) = np.shape(x)
gaussian_pdf = np.zeros(shape=(m, n))
multinoulli = self.softmax.predict(x) # m x n
for i in range(m):
self.multinoulli = multinoulli[i, :].tolist()
gaussian_pdf[i, :] = super().pdf(z[:, 0])
return gaussian_pdf
def cdf_overall(self, z, x):
# z is n x 1 matrix, each row represents a market price
# x is m x feature_dimension matrix, each row represents a bid request to be margin out
# returns n x 1 matrix, each row is the probability to be taken by the corresponding z
(m, _) = np.shape(x)
self.multinoulli = (self.softmax.predict(x).sum(axis=0) / m).tolist()
print(self)
return super().cdf(z)
def cdf(self, z, x):
# z is a n x 1 matrix, each row represents a market price
# x is a m x feature_dimension matrix, each row represents a bid request
# returns n x m matrix, each row is the probability to be taken by the corresponding z and x
(n, _) = np.shape(z)
(m, _) = np.shape(x)
gaussian_cdf = np.zeros(shape=(m, n))
multinoulli = self.softmax.predict(x)
for i in range(m):
self.multinoulli = multinoulli[i, :].tolist()
gaussian_cdf[i, :] = super().cdf(z[:, 0])
return gaussian_cdf
def predict_z(self, x, min_z=1, max_z=300):
# x is a m x feature_dimension matrix, each row represents a bid request
# min_z and max_z are the lower bound and upper bound to integrate
# returns m x 1 matrix, each row is the predicted market price
zs = np.asmatrix(np.arange(min_z, max_z+1, 1)).transpose()
return self.pdf(zs, x) @ zs
def ANLP(self, z, x):
# z is a m x 1 matrix, each row represents a market price
# x is a m x feature_dimension matrix, each row represents a bid request
# returns a scala, which is average negative log probability (ANLP)
(m, _) = np.shape(x)
nlp = np.zeros(shape=(m, 1))
multinoulli = self.softmax.predict(x) # m x n
for i in range(m):
self.multinoulli = multinoulli[i, :].tolist()
nlp[i, 0] = -np.log(super().pdf([z[i, 0]]))
return np.sum(nlp) / m