-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Gelei Chen
committed
Oct 30, 2017
0 parents
commit 6169fa1
Showing
48 changed files
with
19,805 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
26 changes: 26 additions & 0 deletions
26
chapter-bayesian-statistics-binomial-proportion/beta_plot.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# beta_plot.py | ||
|
||
import numpy as np | ||
from scipy.stats import beta | ||
import matplotlib.pyplot as plt | ||
import seaborn as sns | ||
|
||
|
||
if __name__ == "__main__": | ||
sns.set_palette("deep", desat=.6) | ||
sns.set_context(rc={"figure.figsize": (8, 4)}) | ||
x = np.linspace(0, 1, 100) | ||
params = [ | ||
(0.5, 0.5), | ||
(1, 1), | ||
(4, 3), | ||
(2, 5), | ||
(6, 6) | ||
] | ||
for p in params: | ||
y = beta.pdf(x, p[0], p[1]) | ||
plt.plot(x, y, label="$\\alpha=%s$, $\\beta=%s$" % p) | ||
plt.xlabel("$\\theta$, Fairness") | ||
plt.ylabel("Density") | ||
plt.legend(title="Parameters") | ||
plt.show() |
48 changes: 48 additions & 0 deletions
48
chapter-bayesian-statistics-intro-to-bayesian-methods/beta_binomial.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# beta_binomial.py | ||
|
||
import numpy as np | ||
from scipy import stats | ||
from matplotlib import pyplot as plt | ||
|
||
|
||
if __name__ == "__main__": | ||
# Create a list of the number of coin tosses ("Bernoulli trials") | ||
number_of_trials = [0, 2, 10, 20, 50, 500] | ||
|
||
# Conduct 500 coin tosses and output into a list of 0s and 1s | ||
# where 0 represents a tail and 1 represents a head | ||
data = stats.bernoulli.rvs(0.5, size=number_of_trials[-1]) | ||
|
||
# Discretise the x-axis into 100 separate plotting points | ||
x = np.linspace(0, 1, 100) | ||
|
||
# Loops over the number_of_trials list to continually add | ||
# more coin toss data. For each new set of data, we update | ||
# our (current) prior belief to be a new posterior. This is | ||
# carried out using what is known as the Beta-Binomial model. | ||
# For the time being, we won't worry about this too much. | ||
for i, N in enumerate(number_of_trials): | ||
# Accumulate the total number of heads for this | ||
# particular Bayesian update | ||
heads = data[:N].sum() | ||
|
||
# Create an axes subplot for each update | ||
ax = plt.subplot(len(number_of_trials) / 2, 2, i + 1) | ||
ax.set_title("%s trials, %s heads" % (N, heads)) | ||
|
||
# Add labels to both axes and hide labels on y-axis | ||
plt.xlabel("$P(H)$, Probability of Heads") | ||
plt.ylabel("Density") | ||
if i == 0: | ||
plt.ylim([0.0, 2.0]) | ||
plt.setp(ax.get_yticklabels(), visible=False) | ||
|
||
# Create and plot a Beta distribution to represent the | ||
# posterior belief in fairness of the coin. | ||
y = stats.beta.pdf(x, 1 + heads, 1 + N - heads) | ||
plt.plot(x, y, label="observe %d tosses,\n %d heads" % (N, heads)) | ||
plt.fill_between(x, 0, y, color="#aaaadd", alpha=0.5) | ||
|
||
# Expand plot to cover full width/height and show it | ||
plt.tight_layout() | ||
plt.show() |
106 changes: 106 additions & 0 deletions
106
chapter-bayesian-statistics-linear-regression/bayes-linear-reg-sim.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
import pandas as pd | ||
import pymc3 as pm | ||
import seaborn as sns | ||
|
||
|
||
sns.set(style="darkgrid", palette="muted") | ||
|
||
|
||
def simulate_linear_data(N, beta_0, beta_1, eps_sigma_sq): | ||
""" | ||
Simulate a random dataset using a noisy | ||
linear process. | ||
N: Number of data points to simulate | ||
beta_0: Intercept | ||
beta_1: Slope of univariate predictor, X | ||
""" | ||
# Create a pandas DataFrame with column 'x' containing | ||
# N uniformly sampled values between 0.0 and 1.0 | ||
df = pd.DataFrame( | ||
{"x": | ||
np.random.RandomState(42).choice( | ||
map( | ||
lambda x: float(x)/100.0, | ||
np.arange(N) | ||
), N, replace=False | ||
) | ||
} | ||
) | ||
|
||
# Use a linear model (y ~ beta_0 + beta_1*x + epsilon) to | ||
# generate a column 'y' of responses based on 'x' | ||
eps_mean = 0.0 | ||
df["y"] = beta_0 + beta_1*df["x"] + np.random.RandomState(42).normal( | ||
eps_mean, eps_sigma_sq, N | ||
) | ||
|
||
return df | ||
|
||
|
||
def glm_mcmc_inference(df, iterations=5000): | ||
""" | ||
Calculates the Markov Chain Monte Carlo trace of | ||
a Generalised Linear Model Bayesian linear regression | ||
model on supplied data. | ||
df: DataFrame containing the data | ||
iterations: Number of iterations to carry out MCMC for | ||
""" | ||
# Use PyMC3 to construct a model context | ||
basic_model = pm.Model() | ||
with basic_model: | ||
# Create the glm using the Patsy model syntax | ||
# We use a Normal distribution for the likelihood | ||
pm.glm.glm("y ~ x", df, family=pm.glm.families.Normal()) | ||
|
||
# Use Maximum A Posteriori (MAP) optimisation | ||
# as initial value for MCMC | ||
start = pm.find_MAP() | ||
|
||
# Use the No-U-Turn Sampler | ||
step = pm.NUTS() | ||
|
||
# Calculate the trace | ||
trace = pm.sample( | ||
iterations, step, start, | ||
random_seed=42, progressbar=True | ||
) | ||
|
||
return trace | ||
|
||
|
||
if __name__ == "__main__": | ||
# These are our "true" parameters | ||
beta_0 = 1.0 # Intercept | ||
beta_1 = 2.0 # Slope | ||
|
||
# Simulate 100 data points, with a variance of 0.5 | ||
N = 200 | ||
eps_sigma_sq = 0.5 | ||
|
||
# Simulate the "linear" data using the above parameters | ||
df = simulate_linear_data(N, beta_0, beta_1, eps_sigma_sq) | ||
|
||
# Plot the data, and a frequentist linear regression fit | ||
# using the seaborn package | ||
sns.lmplot(x="x", y="y", data=df, size=10) | ||
plt.xlim(0.0, 1.0) | ||
|
||
trace = glm_mcmc_inference(df, iterations=5000) | ||
pm.traceplot(trace[500:]) | ||
plt.show() | ||
|
||
# Plot a sample of posterior regression lines | ||
sns.lmplot(x="x", y="y", data=df, size=10, fit_reg=False) | ||
plt.xlim(0.0, 1.0) | ||
plt.ylim(0.0, 4.0) | ||
pm.glm.plot_posterior_predictive(trace, samples=100) | ||
x = np.linspace(0, 1, N) | ||
y = beta_0 + beta_1*x | ||
plt.plot(x, y, label="True Regression Line", lw=3., c="green") | ||
plt.legend(loc=0) | ||
plt.show() | ||
|
66 changes: 66 additions & 0 deletions
66
chapter-bayesian-statistics-markov-chain-monte-carlo/bayes-binomial-mcmc.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
import pymc3 | ||
import scipy.stats as stats | ||
|
||
plt.style.use("ggplot") | ||
|
||
# Parameter values for prior and analytic posterior | ||
n = 50 | ||
z = 10 | ||
alpha = 12 | ||
beta = 12 | ||
alpha_post = 22 | ||
beta_post = 52 | ||
|
||
# How many samples to carry out for MCMC | ||
iterations = 100000 | ||
|
||
# Use PyMC3 to construct a model context | ||
basic_model = pymc3.Model() | ||
with basic_model: | ||
# Define our prior belief about the fairness | ||
# of the coin using a Beta distribution | ||
theta = pymc3.Beta("theta", alpha=alpha, beta=beta) | ||
|
||
# Define the Bernoulli likelihood function | ||
y = pymc3.Binomial("y", n=n, p=theta, observed=z) | ||
|
||
# Carry out the MCMC analysis using the Metropolis algorithm | ||
# Use Maximum A Posteriori (MAP) optimisation as initial value for MCMC | ||
start = pymc3.find_MAP() | ||
|
||
# Use the Metropolis algorithm (as opposed to NUTS or HMC, etc.) | ||
step = pymc3.Metropolis() | ||
|
||
# Calculate the trace | ||
trace = pymc3.sample(iterations, step, start, random_seed=1, progressbar=True) | ||
|
||
# Plot the posterior histogram from MCMC analysis | ||
bins=50 | ||
plt.hist( | ||
trace["theta"], bins, | ||
histtype="step", normed=True, | ||
label="Posterior (MCMC)", color="red" | ||
) | ||
|
||
# Plot the analytic prior and posterior beta distributions | ||
x = np.linspace(0, 1, 100) | ||
plt.plot( | ||
x, stats.beta.pdf(x, alpha, beta), | ||
"--", label="Prior", color="blue" | ||
) | ||
plt.plot( | ||
x, stats.beta.pdf(x, alpha_post, beta_post), | ||
label='Posterior (Analytic)', color="green" | ||
) | ||
|
||
# Update the graph labels | ||
plt.legend(title="Parameters", loc="best") | ||
plt.xlabel("$\\theta$, Fairness") | ||
plt.ylabel("Density") | ||
plt.show() | ||
|
||
# Show the trace plot | ||
pymc3.traceplot(trace) | ||
plt.show() |
19 changes: 19 additions & 0 deletions
19
chapter-bayesian-statistics-stochastic-volatility-model/exponential_plot.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# exponential_plot.py | ||
|
||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import seaborn as sns | ||
|
||
|
||
if __name__ == "__main__": | ||
sns.set_palette("deep", desat=.6) | ||
sns.set_context(rc={"figure.figsize": (8, 4)}) | ||
x = np.linspace(0.0, 5.0, 100) | ||
lambdas = [0.5, 1.0, 2.0] | ||
for lam in lambdas: | ||
y = lam*np.exp(-lam*x) | ||
ax = plt.plot(x, y, label="$\\lambda=%s$" % lam) | ||
plt.xlabel("x") | ||
plt.ylabel("P(x)") | ||
plt.legend(title="Parameters") | ||
plt.show() |
85 changes: 85 additions & 0 deletions
85
chapter-bayesian-statistics-stochastic-volatility-model/pymc3_bayes_stochastic_vol.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
# pymc3_bayes_stochastic_vol.py | ||
|
||
import datetime | ||
import pprint | ||
|
||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
import pandas as pd | ||
import pandas_datareader as pdr | ||
import pymc3 as pm | ||
from pymc3.distributions.timeseries import GaussianRandomWalk | ||
import seaborn as sns | ||
|
||
|
||
def obtain_plot_amazon_prices_dataframe(start_date, end_date): | ||
""" | ||
Download, calculate and plot the AMZN logarithmic returns. | ||
""" | ||
print("Downloading and plotting AMZN log returns...") | ||
amzn = pdr.get_data_yahoo("AMZN", start_date, end_date) | ||
amzn["returns"] = amzn["Adj Close"]/amzn["Adj Close"].shift(1) | ||
amzn.dropna(inplace=True) | ||
amzn["log_returns"] = np.log(amzn["returns"]) | ||
amzn["log_returns"].plot(linewidth=0.5) | ||
plt.ylabel("AMZN daily percentage returns") | ||
plt.show() | ||
return amzn | ||
|
||
|
||
def configure_sample_stoch_vol_model(log_returns, samples): | ||
""" | ||
Configure the stochastic volatility model using PyMC3 | ||
in a 'with' context. Then sample from the model using | ||
the No-U-Turn-Sampler (NUTS). | ||
Plot the logarithmic volatility process and then the | ||
absolute returns overlaid with the estimated vol. | ||
""" | ||
print("Configuring stochastic volatility with PyMC3...") | ||
model = pm.Model() | ||
with model: | ||
sigma = pm.Exponential('sigma', 50.0, testval=0.1) | ||
nu = pm.Exponential('nu', 0.1) | ||
s = GaussianRandomWalk('s', sigma**-2, shape=len(log_returns)) | ||
logrets = pm.StudentT( | ||
'logrets', nu, | ||
lam=pm.math.exp(-2.0*s), | ||
observed=log_returns | ||
) | ||
|
||
print("Fitting the stochastic volatility model...") | ||
with model: | ||
trace = pm.sample(samples) | ||
pm.traceplot(trace, model.vars[:-1]) | ||
plt.show() | ||
|
||
print("Plotting the log volatility...") | ||
k = 10 | ||
opacity = 0.03 | ||
plt.plot(trace[s][::k].T, 'b', alpha=opacity) | ||
plt.xlabel('Time') | ||
plt.ylabel('Log Volatility') | ||
plt.show() | ||
|
||
print("Plotting the absolute returns overlaid with vol...") | ||
plt.plot(np.abs(np.exp(log_returns))-1.0, linewidth=0.5) | ||
plt.plot(np.exp(trace[s][::k].T), 'r', alpha=opacity) | ||
plt.xlabel("Trading Days") | ||
plt.ylabel("Absolute Returns/Volatility") | ||
plt.show() | ||
|
||
|
||
if __name__ == "__main__": | ||
# State the starting and ending dates of the AMZN returns | ||
start_date = datetime.datetime(2006, 1, 1) | ||
end_date = datetime.datetime(2015, 12, 31) | ||
|
||
# Obtain and plot the logarithmic returns of Amazon prices | ||
amzn_df = obtain_plot_amazon_prices_dataframe(start_date, end_date) | ||
log_returns = np.array(amzn_df["log_returns"]) | ||
|
||
# Configure the stochastic volatility model and carry out | ||
# MCMC sampling using NUTS, plotting the trace | ||
samples = 2000 | ||
configure_sample_stoch_vol_model(log_returns, samples) |
20 changes: 20 additions & 0 deletions
20
chapter-bayesian-statistics-stochastic-volatility-model/student_t_plot.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# student_t_plot.py | ||
|
||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
from scipy.stats import t | ||
import seaborn as sns | ||
|
||
|
||
if __name__ == "__main__": | ||
sns.set_palette("deep", desat=.6) | ||
sns.set_context(rc={"figure.figsize": (8, 4)}) | ||
x = np.linspace(-5.0, 5.0, 100) | ||
nus = [1.0, 2.0, 5.0, 50.0] | ||
for nu in nus: | ||
y = t.pdf(x, nu) | ||
ax = plt.plot(x, y, label="$\\nu=%s$" % nu) | ||
plt.xlabel("x") | ||
plt.ylabel("P(x)") | ||
plt.legend(title="Parameters") | ||
plt.show() |
Oops, something went wrong.