import numpy as np
import matplotlib.pyplot as plt


#Rejection sampling for generating samples from Beta(4, 1):
#Our proposal distribution will be Uniform[0, 1]
#The value of M can be taken to be the largest value of the density. 
M = 4
N = 20000 #this is the number of proposal samples that we will generate
prior_samples = np.random.rand(N)
p_prior_samples = prior_samples ** 3
Y_samples = np.random.binomial(n = 1, p = p_prior_samples)
posterior_samples = prior_samples[Y_samples == 1]
print(len(posterior_samples))
plt.hist(posterior_samples, bins = 500, density = True, alpha = 0.6, label = 'Rejection Sampling Samples from Beta(4, 1)') 
x = np.linspace(0, 1, 1000)
from scipy.stats import beta
pdf_values = beta.pdf(x, 4, 1)
plt.plot(x, pdf_values, 'r-', label = 'Beta(4, 1) Density')
plt.xlabel('Value')
plt.ylabel('Density')
plt.legend()
plt.title('Histogram of Samples from Rejection Sampling and Beta(4, 1) density')
plt.show()
#The match between the histogram and the true density is not bad but we are only 
#getting about 1/4 of the total samples (others are rejected because of Y = 0).

5017


#Rejection sampling for generating samples from Beta(20, 2):
#Our proposal distribution will be Uniform[0, 1]
#The value of M can be taken to be anything larger than the largest value of the density. 
M = 8
N = 20000 #this is the number of proposal samples that we will generate
prior_samples = np.random.rand(N)
p_prior_samples = 420 * (prior_samples ** 19) * (1 - prior_samples) * (1/8)
Y_samples = np.random.binomial(n = 1, p = p_prior_samples)
posterior_samples = prior_samples[Y_samples == 1]
print(len(posterior_samples))
plt.hist(posterior_samples, bins = 500, density = True, alpha = 0.6, label = 'Rejection Sampling Samples from Beta(20, 2)') 
x = np.linspace(0, 1, 1000)
from scipy.stats import beta
pdf_values = beta.pdf(x, 20, 2)
plt.plot(x, pdf_values, 'r-', label = 'Beta(20, 2) Density')
plt.xlabel('Value')
plt.ylabel('Density')
plt.legend()
plt.title('Histogram of Samples from Rejection Sampling and Beta(20, 2) density')
plt.show()
#The match between the histogram and the true density is not bad but we are only 
#getting about 1/8 of the total samples (others are rejected because of Y = 0).

2510


import numpy as np
n = 15
y_obs = np.array([17.62, 17.61, 17.61, 17.62, 17.62, 17.615, 17.615, 17.625, 17.61, 17.62, 17.62, 17.605, 17.61, 17.62, 17.61])
ybar = np.mean(y_obs)
sighat = np.std(y_obs, ddof = 1)
display(ybar, sighat)
alpha = 0.05
from scipy.stats import t
t_critical_value = t.ppf(1 - alpha/2, n-1)
print(t_critical_value)
#95% confidence interval
ci_lower = ybar - sighat*(1/np.sqrt(n))*t_critical_value
ci_upper = ybar + sighat*(1/np.sqrt(n))*t_critical_value
display([ci_lower, ci_upper])

17.615333333333336

0.005814595756329882

2.1447866879169273

[17.61211331753182, 17.618553349134853]


import pymc as pm
import arviz as az
n = 15
y_obs = np.array([17.62, 17.61, 17.61, 17.62, 17.62, 17.615, 17.615, 17.625, 17.61, 17.62, 17.62, 17.605, 17.61, 17.62, 17.61])
measurement_model = pm.Model()
with measurement_model:
    theta = pm.Uniform("theta", lower = -80, upper = 80)
    log_sigma = pm.Uniform("log_sigma", lower = -10, upper = 10)
    sigma = pm.Deterministic("sigma", pm.math.exp(log_sigma))
    Y = pm.Normal("Y", mu = theta, sigma = sigma, observed=y_obs)
    #Sample from posterior:
    idata = pm.sample(2000, chains = 2, return_inferencedata = True)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 4 jobs)
NUTS: [theta, log_sigma]

Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 4 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics


#The posterior samples can be used to estimate theta (and sigma) along with uncertainty quantification as follows: 
theta_samples = idata.posterior['theta'].values.flatten()
log_sigma_samples = idata.posterior['log_sigma'].values.flatten()
sigma_samples = np.exp(log_sigma_samples)
#Our best estimate of the unknown length can be taken to be the mean of the posterior samples for theta:
thetamean = np.mean(theta_samples)
display(thetamean)
#A 95% interval for theta based on the posterior samples is computed as follows:
lower_limit_theta = np.percentile(theta_samples, 2.5)
upper_limit_theta = np.percentile(theta_samples, 97.5)
display([lower_limit_theta, upper_limit_theta])
#This interval should be very similar to the confidence interval for theta derived previously

17.615329695222517

[17.612139833124715, 17.618756119947683]


import numpy as np
n = 15
y_obs = np.array([17.62, 17.61, 17.61, 17.62, 17.62, 17.615, 17.615, 17.625, 17.61, 17.62, 17.62, 17.605, 17.61, 17.62, 17.61])
T = 4000

#Initialize
theta_0 = np.mean(y_obs)
sigma_0 = np.std(y_obs, ddof = 1)

#Create a list to store pairs (theta, sigma)
theta_sigma_pairs = [(theta_0, sigma_0)]

#Run the Gibbs steps
for t in range(1, T+1):
    s = theta_sigma_pairs[t-1][1]
    theta_t = np.random.normal(np.mean(y_obs), np.sqrt(s**2/n))
    deviations = [(y_i - theta_t)**2 for y_i in y_obs]
    u_t = np.random.gamma(n/2, 2/sum(deviations))
    sigma_t = 1/np.sqrt(u_t)
    theta_sigma_pairs.append((theta_t, sigma_t))

theta_values = [pair[0] for pair in theta_sigma_pairs]
sigma_values = [pair[1] for pair in theta_sigma_pairs]
theta_Gibbs = np.array(theta_values)
sigma_Gibbs = np.array(sigma_values)


#Our best estimate of theta can be taken to be the mean of the posterior samples for theta:
thetamean_Gibbs = np.mean(theta_Gibbs)
display(thetamean_Gibbs)
#A 95% interval for theta based on the posterior samples is computed as follows:
lower_limit_theta = np.percentile(theta_Gibbs, 2.5)
upper_limit_theta = np.percentile(theta_Gibbs, 97.5)
display([lower_limit_theta, upper_limit_theta])

17.615333303630138

[17.611948168740746, 17.618647759495197]


#Histogram of posterior theta samples for theta
import matplotlib.pyplot as plt
plt.hist(theta_Gibbs, bins = 500, color = 'blue')
plt.hist(theta_samples, bins = 500, color = 'red')
plt.xlabel('Values of theta')
plt.ylabel('Frequency')
plt.title('Histogram of the Posterior theta Gibbs samples')
plt.show();

plt.hist(sigma_Gibbs, bins = 500, color = 'blue')
plt.hist(sigma_samples, bins = 500, color = 'red')
plt.xlabel('values of sigma')
plt.ylabel('Frequency')
plt.title('Histogram of the Posterior sigma Gibbs samples')
plt.show();


import pymc as pm
import arviz as az
n = 15
y_obs = np.array([17.62, 17.61, 17.61, 17.62, 17.62, 17.615, 17.615, 17.625, 17.61, 17.62, 17.62, 17.605, 17.61, 17.62, 17.61])
measurement_model = pm.Model()
with measurement_model:
    theta = pm.Flat("theta") #now no range needs to be specified
    log_sigma = pm.Flat("log_sigma")
    sigma = pm.Deterministic("sigma", pm.math.exp(log_sigma))
    Y = pm.Normal("Y", mu = theta, sigma = sigma, observed=y_obs)
    #Sample from posterior:
    idata = pm.sample(2000, chains = 2, return_inferencedata = True) 

#The posterior samples can be used to estimate theta (and sigma) along with uncertainty quantification as follows: 
theta_samples = idata.posterior['theta'].values.flatten()
log_sigma_samples = idata.posterior['log_sigma'].values.flatten()
sigma_samples = np.exp(log_sigma_samples)
#Our best estimate of the unknown length can be taken to be the mean of the posterior samples for theta:
thetamean = np.mean(theta_samples)
display(thetamean)
#A 95% interval for theta based on the posterior samples is computed as follows:
lower_limit_theta = np.percentile(theta_samples, 2.5)
upper_limit_theta = np.percentile(theta_samples, 97.5)
display([lower_limit_theta, upper_limit_theta])

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 4 jobs)
NUTS: [theta, log_sigma]

Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 3 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics

17.615384516213563

[17.61223398903628, 17.618566649045455]


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import chi2, multivariate_normal

#Pearson Height Data
height = pd.read_table("PearsonHeightData.txt")
print(height.shape)
n = len(height)
print(height.head(10))

#Plot the Data:
plt.scatter(height['Father'], height['Son'], label = 'Data', s = 7, alpha = 1)
plt.xlabel('Father Height')
plt.ylabel('Son Height')
plt.title("Pearson's data on the heights of fathers and their sons")
plt.legend()
plt.show()

(1078, 2)
   Father   Son
0    65.0  59.8
1    63.3  63.2
2    65.0  63.3
3    65.8  62.8
4    61.1  64.3
5    63.0  64.2
6    65.4  64.1
7    64.7  64.0
8    66.1  64.6
9    67.0  64.0


#Linear Regression using the library statsmodels
import statsmodels.api as sm
X = sm.add_constant(height['Father'])
print(X.head(10))
model = sm.OLS(height['Son'], X).fit()
print(model.summary())
#This summary gives the least squares estimates
#The standard error corresponding to each estimate of beta_j is simply \hat{\sigma} \sqrt{(X^T X)^{j+1, j+1}}

   const  Father
0    1.0    65.0
1    1.0    63.3
2    1.0    65.0
3    1.0    65.8
4    1.0    61.1
5    1.0    63.0
6    1.0    65.4
7    1.0    64.7
8    1.0    66.1
9    1.0    67.0
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    Son   R-squared:                       0.251
Model:                            OLS   Adj. R-squared:                  0.250
Method:                 Least Squares   F-statistic:                     360.9
Date:                Wed, 27 Sep 2023   Prob (F-statistic):           1.27e-69
Time:                        07:45:29   Log-Likelihood:                -2489.4
No. Observations:                1078   AIC:                             4983.
Df Residuals:                    1076   BIC:                             4993.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         33.8928      1.833     18.491      0.000      30.296      37.489
Father         0.5140      0.027     18.997      0.000       0.461       0.567
==============================================================================
Omnibus:                       17.527   Durbin-Watson:                   0.765
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               30.642
Skew:                          -0.052   Prob(JB):                     2.22e-07
Kurtosis:                       3.819   Cond. No.                     1.67e+03
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.67e+03. This might indicate that there are
strong multicollinearity or other numerical problems.


#Plotting the regression line on the scatterplot of the data:
plt.figure(figsize = (13, 9))
plt.scatter(height['Father'], height['Son'], label = 'Data', s = 7, alpha = 1)
plt.plot(height['Father'], model.predict(X), color = 'red', label = 'Regression Line', linewidth = 0.6)
plt.xlabel('Father Height')
plt.ylabel('Son Height')
plt.title("Pearson's data on the heights of fathers and their sons")
plt.legend()
plt.show()


#Residual Standard Error:
residuals = height['Son'] - model.predict(X)
sighat = np.sqrt(np.sum(residuals**2) / (n - 2))
print(sighat)

2.4381343793524275


#Standard Errors of Regression Coefficient Estimates: 
#As mentioned, the standard error corresponding to each estimate of beta_j is 
#simply \hat{\sigma} \sqrt{(X^T X)^{j+1, j+1}}. We verify this here
X_matrix = np.column_stack([np.ones(n), height['Father']])
Sigma_mat = (sighat**2) * np.linalg.inv(X_matrix.T @ X_matrix)
stderrs = np.sqrt(np.diag(Sigma_mat))
print("Standard Errors:", stderrs)

Standard Errors: [1.83289458 0.02705681]


#Bayesian Linear Regression through PyMC
linregmod = pm.Model()
with linregmod:
    # Priors for unknown model parameters
    b0 = pm.Flat("b0")
    b1 = pm.Flat("b1")
    log_sigma = pm.Flat("log_sigma")
    sigma = pm.Deterministic("sigma", pm.math.exp(log_sigma))
    # Expected value of outcome
    mu = b0 + b1 * height['Father']
    # Likelihood
    Y_obs = pm.Normal("Y_obs", mu=mu, sigma=sigma, observed=height['Son'])
    idata = pm.sample(2000, chains = 2, return_inferencedata = True)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 4 jobs)
NUTS: [b0, b1, log_sigma]

Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 33 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics


b0_samples = idata.posterior['b0'].values.flatten()
b1_samples = idata.posterior['b1'].values.flatten()
log_sigma_samples = idata.posterior['log_sigma'].values.flatten()
sigma_samples = np.exp(log_sigma_samples)

display([np.mean(b0_samples), np.std(b0_samples)])
display([np.mean(b1_samples), np.std(b1_samples)])
display([np.mean(sigma_samples)])
#These numbers should be close to the numbers reported in the regression summary i.e., the Bayesian solution gives very similar answers to the frequentist solution

[33.822828043068746, 1.8380392435492228]

[0.5150565570775588, 0.02713650826010662]

[2.438215512084639]


# Visualizing posterior variability
N = 200
# Plotting the data, regression line, and posterior samples
plt.figure(figsize = (13, 9))
plt.scatter(height['Father'], height['Son'], label='Data', alpha=1, s = 7)
for k in range(N):
    plt.plot(height['Father'], b0_samples[k] + b1_samples[k]*height['Father'], color='blue', alpha=0.2)
plt.plot(height['Father'], model.predict(X), color='red', label='Regression Line', linewidth = 1)
plt.xlabel('Father Height')
plt.ylabel('Son Height')
plt.title("Height Data with Posterior Samples")
plt.legend()
plt.show()
#Generally the posterior in linear regression will be quite narrow


#US Population Example:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
from numpy.linalg import inv
import pymc as pm


# Read data
uspop_raw = pd.read_csv("POPTHM.csv")
print(uspop_raw.head(15))
print(uspop_raw.tail(15))
print(uspop_raw.shape)
#Monthly Data downloaded from FRED. 
#Data given for each month equals the average of the estimated population on the first day of the month
#and the first day of the next month. 
#The units are thousands of dollars so 200,000 actually refers to 200 million. 

# Plot raw data
plt.figure(figsize=(13, 9))
plt.plot(uspop_raw['POPTHM'])
plt.ylabel('Population (in thousands)')
plt.xlabel('Months since start')
plt.title('US Population')
plt.show()

          DATE    POPTHM
0   1959-01-01  175818.0
1   1959-02-01  176044.0
2   1959-03-01  176274.0
3   1959-04-01  176503.0
4   1959-05-01  176723.0
5   1959-06-01  176954.0
6   1959-07-01  177208.0
7   1959-08-01  177479.0
8   1959-09-01  177755.0
9   1959-10-01  178026.0
10  1959-11-01  178273.0
11  1959-12-01  178504.0
12  1960-01-01  178925.0
13  1960-02-01  179326.0
14  1960-03-01  179707.0
           DATE    POPTHM
747  2021-04-01  331963.0
748  2021-05-01  332008.0
749  2021-06-01  332092.0
750  2021-07-01  332192.0
751  2021-08-01  332295.0
752  2021-09-01  332403.0
753  2021-10-01  332513.0
754  2021-11-01  332598.0
755  2021-12-01  332640.0
756  2022-01-01  332684.0
757  2022-02-01  332750.0
758  2022-03-01  332812.0
759  2022-04-01  332863.0
760  2022-05-01  332928.0
761  2022-06-01  333028.0
(762, 2)


#Linear Regression of Population with Time
time = np.arange(1, uspop_raw.shape[0] + 1)
X = sm.add_constant(time)
lin_model = sm.OLS(uspop_raw['POPTHM'], X).fit()
print(lin_model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                 POPTHM   R-squared:                       0.997
Model:                            OLS   Adj. R-squared:                  0.997
Method:                 Least Squares   F-statistic:                 2.394e+05
Date:                Wed, 27 Sep 2023   Prob (F-statistic):               0.00
Time:                        07:46:05   Log-Likelihood:                -7090.6
No. Observations:                 762   AIC:                         1.419e+04
Df Residuals:                     760   BIC:                         1.419e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       1.742e+05    193.211    901.647      0.000    1.74e+05    1.75e+05
x1           214.6489      0.439    489.236      0.000     213.788     215.510
==============================================================================
Omnibus:                      183.666   Durbin-Watson:                   0.000
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               69.725
Skew:                          -0.553   Prob(JB):                     7.23e-16
Kurtosis:                       2.013   Cond. No.                         882.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


#Plot data with the fitted regression line:
plt.figure(figsize=(13, 9))
plt.plot(time, uspop_raw['POPTHM'], label="Data", color="black")
plt.plot(time, lin_model.fittedvalues, color="red", label="Fitted")
plt.ylabel('US Population')
plt.xlabel('Time (months)')
plt.title('Population of the United States')
plt.legend()
plt.show()


#Estimate of sigma
n = uspop_raw.shape[0]
sighat = np.sqrt(sum(lin_model.resid**2) / (n-2))
#Standard Errors of estimates:
stderrs = lin_model.bse

# Compute variance-covariance matrix
X_matrix = np.column_stack([np.ones(len(time)), time])
Sigma_mat = sighat**2 * inv(X_matrix.T @ X_matrix)

# Print relevant statistics
print("Residual Standard Error:", sighat)
print("Standard Errors from Model:", stderrs)
print("Variance-Covariance Matrix:", Sigma_mat)
print("Standard Errors from Matrix:", np.sqrt(np.diag(Sigma_mat)))

Residual Standard Error: 2664.107143206883
Standard Errors from Model: const    193.210975
x1         0.438743
dtype: float64
Variance-Covariance Matrix: [[ 3.73304810e+04 -7.34370117e+01]
 [-7.34370117e+01  1.92495444e-01]]
Standard Errors from Matrix: [193.21097524   0.43874303]


#Bayesian Inference for Regression
linregmod = pm.Model()
with linregmod:
    # Priors for unknown model parameters
    b0 = pm.Flat("b0")
    b1 = pm.Flat("b1")
    log_sigma = pm.Flat("log_sigma")
    sigma = pm.Deterministic("sigma", pm.math.exp(log_sigma))
    # Expected value of outcome
    mu = b0 + b1 * time
    # Likelihood
    Y_obs = pm.Normal("Y_obs", mu=mu, sigma=sigma, observed=uspop_raw['POPTHM'])
    idata = pm.sample(3000, chains = 2, return_inferencedata = True)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 4 jobs)
NUTS: [b0, b1, log_sigma]

Sampling 2 chains for 1_000 tune and 3_000 draw iterations (2_000 + 6_000 draws total) took 9 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics


b0_samples = idata.posterior['b0'].values.flatten()
b1_samples = idata.posterior['b1'].values.flatten()
log_sigma_samples = idata.posterior['log_sigma'].values.flatten()
sigma_samples = np.exp(log_sigma_samples)

display([np.mean(b0_samples), np.std(b0_samples)])
display([np.mean(b1_samples), np.std(b1_samples)])
display([np.mean(sigma_samples)])

[174207.48705076656, 193.37142939362715]

[214.64974122366178, 0.44074506729948476]

[2665.247385131357]


# Visualizing posterior variability
N = 200
# Plotting the data, regression line, and posterior samples
plt.figure(figsize = (13, 9))
plt.plot(time, uspop_raw['POPTHM'], label='Data', color = 'black')
for k in range(N):
    plt.plot(time, b0_samples[k] + b1_samples[k]*time, color='blue', alpha=0.2)
plt.plot(time, lin_model.predict(X), color='red', label='Regression Line', linewidth = 1)
plt.xlabel('Time')
plt.ylabel('US Population')
plt.title("Population of the United States")
plt.legend()
plt.show()


#Here is a simulated example
n = 400
x = np.arange(1, n+1)
sig = 1000
dt = 5 + 0.8 * ((x-(n/2)) ** 2) + sig * np.random.randn(n)
plt.figure(figsize = (10, 6))
plt.plot(x, dt)
plt.xlabel("x")
plt.ylabel("Data")
plt.title("A simulated data for regression")
plt.show()


#Fitting a line to the above data
X = sm.add_constant(x)
lin_model = sm.OLS(dt, X).fit()
print(lin_model.summary())
residuals = dt - lin_model.predict(X)
sighat = np.sqrt(np.sum(residuals**2) / (n - 2))
print(sighat)

plt.figure(figsize = (10, 6))
plt.plot(x, dt)
plt.plot(x, lin_model.fittedvalues, color = "red", label = "Linear Fit", linewidth = 0.8)
plt.xlabel("x")
plt.ylabel("Data")
plt.title("A simulated data for regression")
plt.show()

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.002
Method:                 Least Squares   F-statistic:                   0.04003
Date:                Wed, 27 Sep 2023   Prob (F-statistic):              0.842
Time:                        07:46:17   Log-Likelihood:                -4234.9
No. Observations:                 400   AIC:                             8474.
Df Residuals:                     398   BIC:                             8482.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       1.055e+04    962.957     10.951      0.000    8652.026    1.24e+04
x1             0.8327      4.162      0.200      0.842      -7.349       9.015
==============================================================================
Omnibus:                       66.281   Durbin-Watson:                   0.020
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               39.002
Skew:                           0.627   Prob(JB):                     3.39e-09
Kurtosis:                       2.125   Cond. No.                         464.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
9611.524939661922


#Bayesian Linear Regression through PyMC
linregmod = pm.Model()
with linregmod:
    # Priors for unknown model parameters
    b0 = pm.Flat("b0")
    b1 = pm.Flat("b1")
    log_sigma = pm.Flat("log_sigma")             
    sigma = pm.Deterministic("sigma", pm.math.exp(log_sigma))
    # Expected value of outcome
    mu = b0 + b1 * x
    # Likelihood
    Y_obs = pm.Normal("Y_obs", mu=mu, sigma=sigma, observed=dt)
    idata = pm.sample(2000, chains = 2, return_inferencedata = True)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 4 jobs)
NUTS: [b0, b1, log_sigma]

Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 6 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics


b0_samples = idata.posterior['b0'].values.flatten()
b1_samples = idata.posterior['b1'].values.flatten()
N = 200
plt.figure(figsize = (13, 9))
plt.plot(x, dt)
for k in range(N):
    plt.plot(x, b0_samples[k] + b1_samples[k] * x, color='blue', alpha=0.2)
plt.plot(x, lin_model.fittedvalues, color = "red", label = "Linear Fit", linewidth = 1.2)
plt.xlabel("x")
plt.ylabel("Data")
plt.title("A simulated data for regression")
plt.show()
#Clearly the posterior lines are tightly clustered around the fitted regression line.


#Now let us fit a quadratic function to this dataset:
X = sm.add_constant(np.column_stack([x, x**2]))  # Adds constant term (for intercept) and quadratic term
quad_model = sm.OLS(dt, X).fit()
print(quad_model.summary())
residuals = dt - quad_model.predict(X)
sighat = np.sqrt(np.sum(residuals**2) / (n - 3))
print(sighat)

#Plotting
plt.figure(figsize=(13, 9))
plt.plot(x, dt, label="Data", color="black")
plt.plot(x, quad_model.fittedvalues, color="red", label="Quadratic Fit")
plt.xlabel("x")
plt.ylabel("Data")
plt.title("A Simulated Dataset")
plt.legend()
plt.show()

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.990
Model:                            OLS   Adj. R-squared:                  0.990
Method:                 Least Squares   F-statistic:                 1.934e+04
Date:                Wed, 27 Sep 2023   Prob (F-statistic):               0.00
Time:                        07:46:25   Log-Likelihood:                -3317.0
No. Observations:                 400   AIC:                             6640.
Df Residuals:                     397   BIC:                             6652.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       3.203e+04    146.228    219.074      0.000    3.17e+04    3.23e+04
x1          -319.9079      1.684   -189.966      0.000    -323.219    -316.597
x2             0.7999      0.004    196.676      0.000       0.792       0.808
==============================================================================
Omnibus:                        1.160   Durbin-Watson:                   1.938
Prob(Omnibus):                  0.560   Jarque-Bera (JB):                0.935
Skew:                           0.091   Prob(JB):                        0.626
Kurtosis:                       3.152   Cond. No.                     2.16e+05
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.16e+05. This might indicate that there are
strong multicollinearity or other numerical problems.
969.985769798426


#Bayesian quadratic fitting:
quadregmod = pm.Model()
with quadregmod:
    # Priors for unknown model parameters
    b0 = pm.Flat("b0")
    b1 = pm.Flat("b1")
    b2 = pm.Flat("b2")
    log_sigma = pm.Flat("log_sigma")             
    sigma = pm.Deterministic("sigma", pm.math.exp(log_sigma))
    # Expected value of outcome
    mu = b0 + b1 * x + b2 * (x ** 2)
    # Likelihood
    Y_obs = pm.Normal("Y_obs", mu=mu, sigma=sigma, observed=dt)
    idata = pm.sample(2000, chains = 2, return_inferencedata = True)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 4 jobs)
NUTS: [b0, b1, b2, log_sigma]

Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 17 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics


b0_samples = idata.posterior['b0'].values.flatten()
b1_samples = idata.posterior['b1'].values.flatten()
b2_samples = idata.posterior['b2'].values.flatten()
N = 200
plt.figure(figsize = (13, 9))
plt.plot(x, dt)
for k in range(N):
    plt.plot(x, b0_samples[k] + b1_samples[k] * x + b2_samples[k] * (x ** 2), color='blue', alpha=0.2)
plt.plot(x, quad_model.fittedvalues, color = "red", label = "Linear Fit", linewidth = 1.2)
plt.xlabel("x")
plt.ylabel("Data")
plt.title("A simulated data for regression")
plt.show()


#Another illustration of linear regression
sales_raw = pd.read_csv("MRTSSM4453USN.csv")
#This dataset gives monthly data on beer, wine and liquor sales in the US from January 1992 to August 2022
print(sales_raw.tail())

dt = sales_raw.iloc[:, 1].values

# Plot the data
time = np.arange(1, len(dt) + 1)
plt.figure(figsize=(13, 9))
plt.plot(time, dt, label="Data", color="black")
plt.xlabel("Monthly Time")
plt.ylabel("Millions of Dollars")
plt.title("Retail Sales: Beer, Wine and Liquor Stores")
plt.show()

           DATE  MRTSSM4453USN
362  2022-03-01           5590
363  2022-04-01           5710
364  2022-05-01           5919
365  2022-06-01           6038
366  2022-07-01           6230


# Construct harmonic regression terms and quadratic terms
t = time
d = 12
v1 = np.cos(2 * np.pi * 1 * t / d)
v2 = np.sin(2 * np.pi * 1 * t / d)
v3 = np.cos(2 * np.pi * 2 * t / d)
v4 = np.sin(2 * np.pi * 2 * t / d)
v5 = np.cos(2 * np.pi * 3 * t / d)
v6 = np.sin(2 * np.pi * 3 * t / d)
v7 = np.cos(2 * np.pi * 4 * t / d)
v8 = np.sin(2 * np.pi * 4 * t / d)
v9 = t
v10 = t**2

# Fit the regression model
X = sm.add_constant(np.column_stack([v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]))
lin_mod = sm.OLS(dt, X).fit()
print(lin_mod.summary())

# Plot original and fitted data
plt.figure(figsize=(13, 9))
plt.plot(t, dt, label="Data", color="black")
plt.plot(t, lin_mod.fittedvalues, color="red", label="Fitted")
plt.xlabel("Time (months)")
plt.ylabel("Sales Data")
plt.title("Retail Sales: Beer, Wine and Liquor Stores")
plt.legend()
plt.show()

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.948
Model:                            OLS   Adj. R-squared:                  0.946
Method:                 Least Squares   F-statistic:                     648.5
Date:                Wed, 27 Sep 2023   Prob (F-statistic):          1.15e-221
Time:                        07:46:44   Log-Likelihood:                -2591.1
No. Observations:                 367   AIC:                             5204.
Df Residuals:                     356   BIC:                             5247.
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       1797.3211     45.085     39.865      0.000    1708.655    1885.987
x1            53.8210     21.117      2.549      0.011      12.291      95.351
x2          -165.5352     21.151     -7.826      0.000    -207.133    -123.938
x3           292.5844     21.144     13.837      0.000     251.001     334.168
x4           -95.9640     21.115     -4.545      0.000    -137.490     -54.438
x5           247.3131     21.158     11.689      0.000     205.702     288.924
x6          -101.8554     21.100     -4.827      0.000    -143.352     -60.359
x7           214.7822     21.142     10.159      0.000     173.203     256.361
x8           -40.7833     21.114     -1.932      0.054     -82.307       0.740
x9             2.2312      0.566      3.943      0.000       1.118       3.344
x10            0.0228      0.001     15.325      0.000       0.020       0.026
==============================================================================
Omnibus:                       26.551   Durbin-Watson:                   2.304
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               48.335
Skew:                           0.440   Prob(JB):                     3.19e-11
Kurtosis:                       4.545   Cond. No.                     1.82e+05
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.82e+05. This might indicate that there are
strong multicollinearity or other numerical problems.

A study of some techniques for Monte Carlo Sampling¶

Rejection Sampling¶

Gibbs Sampling¶

Flat Priors in PyMC¶

Gibbs Sampler in the more general Multivariate Case¶

Linear Regression¶

Standard Frequentist Linear Regression Analysis¶

Standard Bayesian Linear Regression Analysis¶