Skip to content

Commit

Permalink
check in the book & sourcecode
Browse files Browse the repository at this point in the history
  • Loading branch information
Gelei Chen committed Oct 30, 2017
0 parents commit 6169fa1
Show file tree
Hide file tree
Showing 48 changed files with 19,805 additions and 0 deletions.
Binary file added aat-ebook-20170711.pdf
Binary file not shown.
26 changes: 26 additions & 0 deletions chapter-bayesian-statistics-binomial-proportion/beta_plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# beta_plot.py

import numpy as np
from scipy.stats import beta
import matplotlib.pyplot as plt
import seaborn as sns


if __name__ == "__main__":
sns.set_palette("deep", desat=.6)
sns.set_context(rc={"figure.figsize": (8, 4)})
x = np.linspace(0, 1, 100)
params = [
(0.5, 0.5),
(1, 1),
(4, 3),
(2, 5),
(6, 6)
]
for p in params:
y = beta.pdf(x, p[0], p[1])
plt.plot(x, y, label="$\\alpha=%s$, $\\beta=%s$" % p)
plt.xlabel("$\\theta$, Fairness")
plt.ylabel("Density")
plt.legend(title="Parameters")
plt.show()
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# beta_binomial.py

import numpy as np
from scipy import stats
from matplotlib import pyplot as plt


if __name__ == "__main__":
# Create a list of the number of coin tosses ("Bernoulli trials")
number_of_trials = [0, 2, 10, 20, 50, 500]

# Conduct 500 coin tosses and output into a list of 0s and 1s
# where 0 represents a tail and 1 represents a head
data = stats.bernoulli.rvs(0.5, size=number_of_trials[-1])

# Discretise the x-axis into 100 separate plotting points
x = np.linspace(0, 1, 100)

# Loops over the number_of_trials list to continually add
# more coin toss data. For each new set of data, we update
# our (current) prior belief to be a new posterior. This is
# carried out using what is known as the Beta-Binomial model.
# For the time being, we won't worry about this too much.
for i, N in enumerate(number_of_trials):
# Accumulate the total number of heads for this
# particular Bayesian update
heads = data[:N].sum()

# Create an axes subplot for each update
ax = plt.subplot(len(number_of_trials) / 2, 2, i + 1)
ax.set_title("%s trials, %s heads" % (N, heads))

# Add labels to both axes and hide labels on y-axis
plt.xlabel("$P(H)$, Probability of Heads")
plt.ylabel("Density")
if i == 0:
plt.ylim([0.0, 2.0])
plt.setp(ax.get_yticklabels(), visible=False)

# Create and plot a Beta distribution to represent the
# posterior belief in fairness of the coin.
y = stats.beta.pdf(x, 1 + heads, 1 + N - heads)
plt.plot(x, y, label="observe %d tosses,\n %d heads" % (N, heads))
plt.fill_between(x, 0, y, color="#aaaadd", alpha=0.5)

# Expand plot to cover full width/height and show it
plt.tight_layout()
plt.show()
106 changes: 106 additions & 0 deletions chapter-bayesian-statistics-linear-regression/bayes-linear-reg-sim.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc3 as pm
import seaborn as sns


sns.set(style="darkgrid", palette="muted")


def simulate_linear_data(N, beta_0, beta_1, eps_sigma_sq):
"""
Simulate a random dataset using a noisy
linear process.
N: Number of data points to simulate
beta_0: Intercept
beta_1: Slope of univariate predictor, X
"""
# Create a pandas DataFrame with column 'x' containing
# N uniformly sampled values between 0.0 and 1.0
df = pd.DataFrame(
{"x":
np.random.RandomState(42).choice(
map(
lambda x: float(x)/100.0,
np.arange(N)
), N, replace=False
)
}
)

# Use a linear model (y ~ beta_0 + beta_1*x + epsilon) to
# generate a column 'y' of responses based on 'x'
eps_mean = 0.0
df["y"] = beta_0 + beta_1*df["x"] + np.random.RandomState(42).normal(
eps_mean, eps_sigma_sq, N
)

return df


def glm_mcmc_inference(df, iterations=5000):
"""
Calculates the Markov Chain Monte Carlo trace of
a Generalised Linear Model Bayesian linear regression
model on supplied data.
df: DataFrame containing the data
iterations: Number of iterations to carry out MCMC for
"""
# Use PyMC3 to construct a model context
basic_model = pm.Model()
with basic_model:
# Create the glm using the Patsy model syntax
# We use a Normal distribution for the likelihood
pm.glm.glm("y ~ x", df, family=pm.glm.families.Normal())

# Use Maximum A Posteriori (MAP) optimisation
# as initial value for MCMC
start = pm.find_MAP()

# Use the No-U-Turn Sampler
step = pm.NUTS()

# Calculate the trace
trace = pm.sample(
iterations, step, start,
random_seed=42, progressbar=True
)

return trace


if __name__ == "__main__":
# These are our "true" parameters
beta_0 = 1.0 # Intercept
beta_1 = 2.0 # Slope

# Simulate 100 data points, with a variance of 0.5
N = 200
eps_sigma_sq = 0.5

# Simulate the "linear" data using the above parameters
df = simulate_linear_data(N, beta_0, beta_1, eps_sigma_sq)

# Plot the data, and a frequentist linear regression fit
# using the seaborn package
sns.lmplot(x="x", y="y", data=df, size=10)
plt.xlim(0.0, 1.0)

trace = glm_mcmc_inference(df, iterations=5000)
pm.traceplot(trace[500:])
plt.show()

# Plot a sample of posterior regression lines
sns.lmplot(x="x", y="y", data=df, size=10, fit_reg=False)
plt.xlim(0.0, 1.0)
plt.ylim(0.0, 4.0)
pm.glm.plot_posterior_predictive(trace, samples=100)
x = np.linspace(0, 1, N)
y = beta_0 + beta_1*x
plt.plot(x, y, label="True Regression Line", lw=3., c="green")
plt.legend(loc=0)
plt.show()

Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import matplotlib.pyplot as plt
import numpy as np
import pymc3
import scipy.stats as stats

plt.style.use("ggplot")

# Parameter values for prior and analytic posterior
n = 50
z = 10
alpha = 12
beta = 12
alpha_post = 22
beta_post = 52

# How many samples to carry out for MCMC
iterations = 100000

# Use PyMC3 to construct a model context
basic_model = pymc3.Model()
with basic_model:
# Define our prior belief about the fairness
# of the coin using a Beta distribution
theta = pymc3.Beta("theta", alpha=alpha, beta=beta)

# Define the Bernoulli likelihood function
y = pymc3.Binomial("y", n=n, p=theta, observed=z)

# Carry out the MCMC analysis using the Metropolis algorithm
# Use Maximum A Posteriori (MAP) optimisation as initial value for MCMC
start = pymc3.find_MAP()

# Use the Metropolis algorithm (as opposed to NUTS or HMC, etc.)
step = pymc3.Metropolis()

# Calculate the trace
trace = pymc3.sample(iterations, step, start, random_seed=1, progressbar=True)

# Plot the posterior histogram from MCMC analysis
bins=50
plt.hist(
trace["theta"], bins,
histtype="step", normed=True,
label="Posterior (MCMC)", color="red"
)

# Plot the analytic prior and posterior beta distributions
x = np.linspace(0, 1, 100)
plt.plot(
x, stats.beta.pdf(x, alpha, beta),
"--", label="Prior", color="blue"
)
plt.plot(
x, stats.beta.pdf(x, alpha_post, beta_post),
label='Posterior (Analytic)', color="green"
)

# Update the graph labels
plt.legend(title="Parameters", loc="best")
plt.xlabel("$\\theta$, Fairness")
plt.ylabel("Density")
plt.show()

# Show the trace plot
pymc3.traceplot(trace)
plt.show()
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# exponential_plot.py

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


if __name__ == "__main__":
sns.set_palette("deep", desat=.6)
sns.set_context(rc={"figure.figsize": (8, 4)})
x = np.linspace(0.0, 5.0, 100)
lambdas = [0.5, 1.0, 2.0]
for lam in lambdas:
y = lam*np.exp(-lam*x)
ax = plt.plot(x, y, label="$\\lambda=%s$" % lam)
plt.xlabel("x")
plt.ylabel("P(x)")
plt.legend(title="Parameters")
plt.show()
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# pymc3_bayes_stochastic_vol.py

import datetime
import pprint

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_datareader as pdr
import pymc3 as pm
from pymc3.distributions.timeseries import GaussianRandomWalk
import seaborn as sns


def obtain_plot_amazon_prices_dataframe(start_date, end_date):
"""
Download, calculate and plot the AMZN logarithmic returns.
"""
print("Downloading and plotting AMZN log returns...")
amzn = pdr.get_data_yahoo("AMZN", start_date, end_date)
amzn["returns"] = amzn["Adj Close"]/amzn["Adj Close"].shift(1)
amzn.dropna(inplace=True)
amzn["log_returns"] = np.log(amzn["returns"])
amzn["log_returns"].plot(linewidth=0.5)
plt.ylabel("AMZN daily percentage returns")
plt.show()
return amzn


def configure_sample_stoch_vol_model(log_returns, samples):
"""
Configure the stochastic volatility model using PyMC3
in a 'with' context. Then sample from the model using
the No-U-Turn-Sampler (NUTS).
Plot the logarithmic volatility process and then the
absolute returns overlaid with the estimated vol.
"""
print("Configuring stochastic volatility with PyMC3...")
model = pm.Model()
with model:
sigma = pm.Exponential('sigma', 50.0, testval=0.1)
nu = pm.Exponential('nu', 0.1)
s = GaussianRandomWalk('s', sigma**-2, shape=len(log_returns))
logrets = pm.StudentT(
'logrets', nu,
lam=pm.math.exp(-2.0*s),
observed=log_returns
)

print("Fitting the stochastic volatility model...")
with model:
trace = pm.sample(samples)
pm.traceplot(trace, model.vars[:-1])
plt.show()

print("Plotting the log volatility...")
k = 10
opacity = 0.03
plt.plot(trace[s][::k].T, 'b', alpha=opacity)
plt.xlabel('Time')
plt.ylabel('Log Volatility')
plt.show()

print("Plotting the absolute returns overlaid with vol...")
plt.plot(np.abs(np.exp(log_returns))-1.0, linewidth=0.5)
plt.plot(np.exp(trace[s][::k].T), 'r', alpha=opacity)
plt.xlabel("Trading Days")
plt.ylabel("Absolute Returns/Volatility")
plt.show()


if __name__ == "__main__":
# State the starting and ending dates of the AMZN returns
start_date = datetime.datetime(2006, 1, 1)
end_date = datetime.datetime(2015, 12, 31)

# Obtain and plot the logarithmic returns of Amazon prices
amzn_df = obtain_plot_amazon_prices_dataframe(start_date, end_date)
log_returns = np.array(amzn_df["log_returns"])

# Configure the stochastic volatility model and carry out
# MCMC sampling using NUTS, plotting the trace
samples = 2000
configure_sample_stoch_vol_model(log_returns, samples)
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# student_t_plot.py

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import t
import seaborn as sns


if __name__ == "__main__":
sns.set_palette("deep", desat=.6)
sns.set_context(rc={"figure.figsize": (8, 4)})
x = np.linspace(-5.0, 5.0, 100)
nus = [1.0, 2.0, 5.0, 50.0]
for nu in nus:
y = t.pdf(x, nu)
ax = plt.plot(x, y, label="$\\nu=%s$" % nu)
plt.xlabel("x")
plt.ylabel("P(x)")
plt.legend(title="Parameters")
plt.show()
Loading

0 comments on commit 6169fa1

Please sign in to comment.