import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pymc as pm
import arviz as az
np.set_printoptions(suppress=True, formatter={'float_kind':'{:f}'.format}) 
#this option makes Jupyter print numbers in ordinary (as opposed to scientific) notation


#Import the MROZ.csv dataset
mroz = pd.read_csv("MROZ.csv")
print(mroz.head(12))

    inlf  hours  kidslt6  kidsge6  age  educ    wage  repwage  hushrs  husage  \
0      1   1610        1        0   32    12  3.3540     2.65    2708      34   
1      1   1656        0        2   30    12  1.3889     2.65    2310      30   
2      1   1980        1        3   35    12  4.5455     4.04    3072      40   
3      1    456        0        3   34    12  1.0965     3.25    1920      53   
4      1   1568        1        2   31    14  4.5918     3.60    2000      32   
5      1   2032        0        0   54    12  4.7421     4.70    1040      57   
6      1   1440        0        2   37    16  8.3333     5.95    2670      37   
7      1   1020        0        0   54    12  7.8431     9.98    4120      53   
8      1   1458        0        2   48    12  2.1262     0.00    1995      52   
9      1   1600        0        2   39    12  4.6875     4.15    2100      43   
10     1   1969        0        1   33    12  4.0630     4.30    2450      34   
11     1   1960        0        1   42    11  4.5918     4.58    2375      47   

    ...  faminc     mtr  motheduc  fatheduc  unem  city  exper   nwifeinc  \
0   ...   16310  0.7215        12         7   5.0     0     14  10.910060   
1   ...   21800  0.6615         7         7  11.0     1      5  19.499981   
2   ...   21040  0.6915        12         7   5.0     0     15  12.039910   
3   ...    7300  0.7815         7         7   5.0     0      6   6.799996   
4   ...   27300  0.6215        12        14   9.5     1      7  20.100058   
5   ...   19495  0.6915        14         7   7.5     1     33   9.859054   
6   ...   21152  0.6915        14         7   5.0     0     11   9.152048   
7   ...   18900  0.6915         3         3   5.0     0     35  10.900038   
8   ...   20405  0.7515         7         7   3.0     0     24  17.305000   
9   ...   20425  0.6915         7         7   5.0     0     21  12.925000   
10  ...   32300  0.5815        12         3   5.0     0     15  24.299953   
11  ...   28700  0.6215        14         7   5.0     0     14  19.700071   

       lwage  expersq  
0   1.210154      196  
1   0.328512       25  
2   1.514138      225  
3   0.092123       36  
4   1.524272       49  
5   1.556480     1089  
6   2.120260      121  
7   2.059634     1225  
8   0.754336      576  
9   1.544899      441  
10  1.401922      225  
11  1.524272      196  

[12 rows x 22 columns]


#Several regressions can be fit on this dataset. Let us fit one with
#hours as the response variable, and
#kidslt6, kidsge6, age, educ, exper, expersq, huswage, huseduc, hushrs, motheduc and fatheduc
#as covariates
import statsmodels.api as sm
#Define the response variable and covariates
Y = mroz['hours']
X = mroz[['kidslt6', 'kidsge6', 'age', 'educ', 
       'hushrs',  'huseduc', 'huswage',  'motheduc',
       'fatheduc', 'exper', 'expersq']].copy()
#Add a constant (intercept) to the model
X = sm.add_constant(X)

#Fit the model: 
model = sm.OLS(Y, X).fit()
print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                  hours   R-squared:                       0.273
Model:                            OLS   Adj. R-squared:                  0.262
Method:                 Least Squares   F-statistic:                     25.30
Date:                Sat, 30 Sep 2023   Prob (F-statistic):           1.05e-44
Time:                        03:18:56   Log-Likelihood:                -6045.7
No. Observations:                 753   AIC:                         1.212e+04
Df Residuals:                     741   BIC:                         1.217e+04
Df Model:                          11                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       1488.5175    293.748      5.067      0.000     911.839    2065.196
kidslt6     -439.3761     58.748     -7.479      0.000    -554.709    -324.043
kidsge6      -32.6212     23.202     -1.406      0.160     -78.171      12.928
age          -30.4462      4.382     -6.948      0.000     -39.049     -21.844
educ          41.2569     16.470      2.505      0.012       8.924      73.590
hushrs        -0.0635      0.049     -1.292      0.197      -0.160       0.033
huseduc      -16.1572     12.297     -1.314      0.189     -40.298       7.983
huswage      -13.7469      7.556     -1.819      0.069     -28.580       1.086
motheduc      10.9852     10.368      1.059      0.290      -9.370      31.340
fatheduc      -4.0224      9.771     -0.412      0.681     -23.205      15.161
exper         65.9440      9.984      6.605      0.000      46.344      85.544
expersq       -0.7289      0.325     -2.241      0.025      -1.368      -0.090
==============================================================================
Omnibus:                       82.696   Durbin-Watson:                   1.389
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              120.379
Skew:                           0.786   Prob(JB):                     7.25e-27
Kurtosis:                       4.167   Cond. No.                     2.54e+04
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.54e+04. This might indicate that there are
strong multicollinearity or other numerical problems.


Y = mroz['hours']
X = mroz[['kidslt6', 'age', 'educ', 
        'huswage', 'exper', 'expersq']].copy()
X = sm.add_constant(X) #add a constant (intercept) to the model
#Fit the model: 
linmodel = sm.OLS(Y, X).fit()
print(linmodel.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                  hours   R-squared:                       0.266
Model:                            OLS   Adj. R-squared:                  0.260
Method:                 Least Squares   F-statistic:                     44.99
Date:                Sat, 30 Sep 2023   Prob (F-statistic):           4.67e-47
Time:                        03:19:00   Log-Likelihood:                -6049.5
No. Observations:                 753   AIC:                         1.211e+04
Df Residuals:                     746   BIC:                         1.215e+04
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       1166.8778    243.738      4.787      0.000     688.384    1645.372
kidslt6     -433.1229     58.416     -7.414      0.000    -547.803    -318.443
age          -28.4308      4.067     -6.991      0.000     -36.414     -20.447
educ          32.6255     12.827      2.543      0.011       7.444      57.807
huswage      -13.9353      6.857     -2.032      0.042     -27.397      -0.474
exper         67.7980      9.896      6.851      0.000      48.371      87.225
expersq       -0.7375      0.325     -2.270      0.023      -1.375      -0.100
==============================================================================
Omnibus:                       78.707   Durbin-Watson:                   1.383
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              111.647
Skew:                           0.768   Prob(JB):                     5.70e-25
Kurtosis:                       4.095   Cond. No.                     2.76e+03
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.76e+03. This might indicate that there are
strong multicollinearity or other numerical problems.


#We can also take the Bayesian Approach and use PyMC:
import pymc as pm
mrozmod = pm.Model()
with mrozmod:
    # Priors for unknown model parameters
    b0 = pm.Flat("b0")
    b1 = pm.Flat("b1")
    b2 = pm.Flat("b2")
    b3 = pm.Flat("b3")
    b4 = pm.Flat("b4")
    b5 = pm.Flat("b5")
    b6 = pm.Flat("b6")
    log_sigma = pm.Flat("log_sigma")             
    sigma = pm.Deterministic("sigma", pm.math.exp(log_sigma))
    # Expected value of outcome
    mu = b0 + b1 * mroz['kidslt6'] +   b2 * mroz['age'] + b3 * mroz['educ'] + b4 * mroz['huswage'] + b5 * mroz['exper'] + b6 * mroz['expersq']
    # Likelihood
    Y_obs = pm.Normal("Y_obs", mu=mu, sigma=sigma, observed=mroz['hours'])
    idata = pm.sample(2000, chains = 2, return_inferencedata = True)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 4 jobs)
NUTS: [b0, b1, b2, b3, b4, b5, b6, log_sigma]

Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 31 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics


b0_samples = idata.posterior['b0'].values.flatten()
b1_samples = idata.posterior['b1'].values.flatten()
b2_samples = idata.posterior['b2'].values.flatten()
b3_samples = idata.posterior['b3'].values.flatten()
b4_samples = idata.posterior['b4'].values.flatten()
b5_samples = idata.posterior['b5'].values.flatten()
b6_samples = idata.posterior['b6'].values.flatten()

allsamples = [b0_samples, b1_samples, b2_samples, b3_samples, b4_samples, b5_samples, b6_samples]
names = ['b0', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6']
print("Parameter   | Mean     | Std. Dev. | Least Squares | Std. Error")
print("------------|----------|----------")
for i, (name, arr) in enumerate(zip(names, allsamples)):
    print(f"{name:10}| {np.mean(arr):.6f} | {np.std(arr):.6f} | {linmodel.params.values[i]:.6f}  | {linmodel.bse.values[i]:.6f}")

Parameter   | Mean     | Std. Dev. | Least Squares | Std. Error
------------|----------|----------
b0        | 1175.858222 | 242.523307 | 1166.877797  | 243.737876
b1        | -434.363992 | 59.045116 | -433.122873  | 58.416292
b2        | -28.572263 | 4.043531 | -28.430794  | 4.066762
b3        | 32.440296 | 13.042272 | 32.625466  | 12.827177
b4        | -13.836047 | 7.102644 | -13.935253  | 6.857217
b5        | 67.582198 | 9.766088 | 67.797967  | 9.895837
b6        | -0.727326 | 0.320257 | -0.737492  | 0.324827


#Poisson Regression through StatsModels
# Define the response variable and covariates
Y = mroz['hours']
X = mroz[['kidslt6', 'age', 'educ', 
        'huswage', 'exper', 'expersq']].copy()
X = sm.add_constant(X) # Add a constant (intercept) to the model
# Fit the Poisson regression model
poiregmodel = sm.GLM(Y, X, family=sm.families.Poisson()).fit()
print(poiregmodel.summary())

                 Generalized Linear Model Regression Results                  
==============================================================================
Dep. Variable:                  hours   No. Observations:                  753
Model:                            GLM   Df Residuals:                      746
Model Family:                 Poisson   Df Model:                            6
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:            -3.1563e+05
Date:                Sat, 30 Sep 2023   Deviance:                   6.2754e+05
Time:                        03:19:46   Pearson chi2:                 6.60e+05
No. Iterations:                     5   Pseudo R-squ. (CS):              1.000
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          6.9365      0.012    562.281      0.000       6.912       6.961
kidslt6       -0.8075      0.004   -193.217      0.000      -0.816      -0.799
age           -0.0427      0.000   -201.166      0.000      -0.043      -0.042
educ           0.0528      0.001     83.439      0.000       0.052       0.054
huswage       -0.0207      0.000    -54.548      0.000      -0.021      -0.020
exper          0.1204      0.001    219.231      0.000       0.119       0.121
expersq       -0.0018   1.63e-05   -112.090      0.000      -0.002      -0.002
==============================================================================


#56% comes from:
print((np.exp(poiregmodel.params['kidslt6']) - 1)*100)

-55.40391074218227


#Newton's Method for Calculating MLE in Poisson Regression
beta_hat = poiregmodel.params.values 
print(beta_hat)
#this is the answer computed by statsmodels and we shall show that Newton's method leads to the same answer if initialized reasonably

#Initialization for Newton's Method
m = 6
p = 7
beta_initial = [5, 0, 0, 0, 0, 0, 0]
n = mroz.shape[0]
Xmat = X.values
Yvec = mroz['hours'].values

#Newton's method for 100 iterations
num_iterations = 100
for i in range(num_iterations):
    log_muvec = np.dot(Xmat, beta_initial)
    muvec = np.exp(log_muvec)
    gradient = np.dot(Xmat.T,  Yvec - muvec)
    M = np.diag(muvec)
    Hessian = -Xmat.T @ M @ Xmat
    Hessian_inv = np.linalg.inv(Hessian)
    beta_initial = beta_initial - Hessian_inv @ gradient
    print(beta_initial)

[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[11.862361 -2.918359 -0.191565 0.219829 -0.093895 0.456819 -0.004969]
[10.885550 -2.863001 -0.189547 0.218271 -0.093150 0.450808 -0.004889]
[9.946436 -2.723529 -0.184228 0.214137 -0.091177 0.435017 -0.004679]
[9.097029 -2.412093 -0.170935 0.203596 -0.086180 0.395908 -0.004161]
[8.414084 -1.884261 -0.142079 0.179445 -0.074896 0.313363 -0.003085]
[7.865945 -1.335615 -0.097770 0.136900 -0.055375 0.198124 -0.001679]
[7.313642 -0.990725 -0.061477 0.090536 -0.034424 0.127770 -0.001108]
[7.011093 -0.838093 -0.045790 0.061125 -0.022797 0.113816 -0.001399]
[6.944282 -0.808540 -0.042825 0.053242 -0.020705 0.118791 -0.001758]
[6.936650 -0.807526 -0.042682 0.052832 -0.020712 0.120342 -0.001827]
[6.936480 -0.807524 -0.042681 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]
[6.936480 -0.807524 -0.042680 0.052831 -0.020714 0.120372 -0.001829]


#We can also take the Bayesian Approach and use PyMC:
import pymc as pm
mrozpoimod = pm.Model()
with mrozpoimod:
    # Priors for unknown model parameters
    b0 = pm.Flat("b0")
    b1 = pm.Flat("b1")
    b2 = pm.Flat("b2")
    b3 = pm.Flat("b3")
    b4 = pm.Flat("b4")
    b5 = pm.Flat("b5")
    b6 = pm.Flat("b6")
    log_mu = b0 + b1 * mroz['kidslt6'] +   b2 * mroz['age'] + b3 * mroz['educ'] + b4 * mroz['huswage'] + b5 * mroz['exper'] + b6 * mroz['expersq']
    # Likelihood
    Y_obs = pm.Poisson("Y_obs", mu=np.exp(log_mu), observed=mroz['hours'])
    idata = pm.sample(2000, chains = 2, random_seed = 0, return_inferencedata = True)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 4 jobs)
NUTS: [b0, b1, b2, b3, b4, b5, b6]

Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 185 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics


b0_samples = idata.posterior['b0'].values.flatten()
b1_samples = idata.posterior['b1'].values.flatten()
b2_samples = idata.posterior['b2'].values.flatten()
b3_samples = idata.posterior['b3'].values.flatten()
b4_samples = idata.posterior['b4'].values.flatten()
b5_samples = idata.posterior['b5'].values.flatten()
b6_samples = idata.posterior['b6'].values.flatten()

allsamples = [b0_samples, b1_samples, b2_samples, b3_samples, b4_samples, b5_samples, b6_samples]
names = ['b0', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6']
print("Parameter | Estimate | Std. Dev. | Frequentist | Std. Error")
print("------------|----------|----------")
for i, (name, arr) in enumerate(zip(names, allsamples)):
    print(f"{name:8}| {np.mean(arr):.6f} | {np.std(arr):.6f}  | {poiregmodel.params.values[i]:.6f}  | {poiregmodel.bse.values[i]:.6f}")
#These results are quite close to the frequentist output.
#However PyMC is not very reliable here. Change the random seed from 0 to 4
#and look at the results.

Parameter | Estimate | Std. Dev. | Frequentist | Std. Error
------------|----------|----------
b0      | 6.936476 | 0.012442  | 6.936480  | 0.012336
b1      | -0.807527 | 0.004131  | -0.807524  | 0.004179
b2      | -0.042681 | 0.000213  | -0.042680  | 0.000212
b3      | 0.052838 | 0.000637  | 0.052831  | 0.000633
b4      | -0.020714 | 0.000378  | -0.020714  | 0.000380
b5      | 0.120361 | 0.000549  | 0.120372  | 0.000549
b6      | -0.001828 | 0.000016  | -0.001829  | 0.000016


#Standard Error Calculation:
log_muvec = np.dot(Xmat, beta_hat)
muvec = np.exp(log_muvec)
M = np.diag(muvec)
Hessian = -Xmat.T @ M @ Xmat
Hessian_inv = np.linalg.inv(Hessian)
CovMat = -Hessian_inv
print(np.sqrt(np.diag(CovMat)))
#Compare with
print(poiregmodel.bse)

[0.012336 0.004179 0.000212 0.000633 0.000380 0.000549 0.000016]
const      0.012336
kidslt6    0.004179
age        0.000212
educ       0.000633
huswage    0.000380
exper      0.000549
expersq    0.000016
dtype: float64

Linear Regression and Poisson Regression¶

Review of Linear Regression with an Economics Dataset¶

Poisson Regression¶

Maximum Likelihood Estimation in Poisson Regression¶

MLE via Newton's Method¶

Bayesian Analysis with Flat Prior¶

Posterior Normal Approximation and Standard Errors¶

Standard Errors for Poisson Regression¶