Two issues, 1. you're using year
variable in the plm
formula which is redundant because it's already index
ed, and 2. your Python PanelOLS
code calculates individual fixed effects so far, I can replicate the Python estimates with plm
using effect="individual"
.
library(plm)
fatalities_mod6 <- plm(fatal_rate ~ beertax + drinkage + punish + miles +
unemp + log(income), index=c("state", "year"),
model="within", effect="individual", data=Fatalities)
Furthermore Python's PanelOLS
appears to use standard errors clustered on state applying the Arellano method using heteroscedasticity-consistent standard errors type 1 ("HC1"
).
round(summary(fatalities_mod6,
vcov=vcovHC.plm(fatalities_mod6, cluster="group", type="HC1",
method="arellano"))$coe, 4)
# Estimate Std. Error t-value Pr(>|t|)
# beertax -0.3664 0.2920 -1.2550 0.2105
# drinkage -0.0378 0.0252 -1.4969 0.1355
# punishyes -0.0342 0.0951 -0.3598 0.7193
# miles 0.0000 0.0000 -0.4217 0.6736
# unemp -0.0196 0.0128 -1.5385 0.1250
# log(income) 0.6765 0.5424 1.2472 0.2134
This resembles the PanelOLS
result of Python.
Edit
For the two-way fixed effects estimator of your data with cluster-robust standard errors, the code would be,
for Python:
mod = PanelOLS(w1['fatal_rate'], w1[['beertax','drinkage','punish', 'miles' ,
'unemp','income']],
entity_effects=True, time_effects=True)
and for R:
fatalities_mod6 <- plm(fatal_rate ~ beertax + drinkage + punish + miles +
unemp + log(income), index=c("state", "year"),
model="within", effect="twoways", data=Fatalities)
summary(fatalities_mod6,
vcov=vcovHC.plm(fatalities_mod6, cluster="group", type="HC1"))
Edit 2
Here follow comparisons of Python, R, and Stata, conducted with the grunfeld
data that come with statsmodels
in Python (they're slightly different from the data(Grunfeld, package="plm")
).
PanelOLS
(Python), plm
(R), and xtreg
with vce(cluster *clustvar*)
(Stata) appear to apply slightly different methods to calculate cluster-robust standard errors (see linked docs for details).
Python:
from statsmodels.datasets import grunfeld
data = grunfeld.load_pandas().data
data = data.set_index(['firm','year'])
import pandas as pd
data.to_csv("grunfeld.csv") ## export data for R / Stata
from linearmodels import PanelOLS
mod = PanelOLS.from_formula('invest ~ value + capital + EntityEffects + TimeEffects',
data=data)
print(mod.fit())
# Parameter Std. Err. T-stat P-value Lower CI Upper CI
# ------------------------------------------------------------------------------
# value 0.1167 0.0129 9.0219 0.0000 0.0912 0.1422
# capital 0.3514 0.0210 16.696 0.0000 0.3099 0.3930
print(mod.fit(cov_type='robust'))
# value 0.1167 0.0191 6.1087 0.0000 0.0790 0.1544
# capital 0.3514 0.0529 6.6472 0.0000 0.2471 0.4557
print(mod.fit(cov_type='clustered', cluster_entity=True))
# value 0.1167 0.0113 10.368 0.0000 0.0945 0.1389
# capital 0.3514 0.0470 7.4836 0.0000 0.2588 0.4441
print(mod.fit(cov_type='clustered', cluster_entity=True, cluster_time=True))
# value 0.1167 0.0117 10.015 0.0000 0.0937 0.1397
# capital 0.3514 0.0447 7.8622 0.0000 0.2633 0.4396
R:
grunfeld <- read.csv("V:/Python/fatalities/grunfeld.csv")
library(plm)
fit <- plm(invest ~ value + capital, grunfeld, effect="twoways", model="within",
index=c("firm", "year"))
## resembles exactly py mod.fit()
round(summary(fit)$coe, 4)
# Estimate Std. Error t-value Pr(>|t|)
# value 0.1167 0.0129 9.0219 0
# capital 0.3514 0.0210 16.6964 0
## resembles approximately py mod.fit('cluster', cluster_entity=True) and Stata (below)
round(summary(fit, cluster="group",
vcov=vcovHC.plm(fit, method="arellano", type="HC2")
)$coe, 4)
# Estimate Std. Error t-value Pr(>|t|)
# value 0.1167 0.0114 10.2042 0
# capital 0.3514 0.0507 6.9364 0
## doesn't seem to change with two-way clustering
round(summary(fit, cluster=c("group", "time"),
vcov=vcovHC.plm(fit, method="arellano", type="HC2")
)$coe, 4)
# Estimate Std. Error t-value Pr(>|t|)
# value 0.1167 0.0114 10.2042 0
# capital 0.3514 0.0507 6.9364 0
fit.1 <- lm(invest ~ value + capital + factor(firm) + factor(year) + 0, grunfeld)
library(lmtest)
## resembles exactly py mod.fit(cov_type='robust')
round(coeftest(fit.1, vcov.=sandwich::vcovHC(fit.1, type="HC1"))[1:2,], 4)
# Estimate Std. Error t value Pr(>|t|)
# value 0.1167 0.0191 6.1087 0
# capital 0.3514 0.0529 6.6472 0
Stata:
import delim grunfeld.csv, clear
egen firmn = group(firm)
xtset firmn year
xtreg invest value capital i.year, fe vce(cluster firmn)
# | Robust
# invest | Coef. Std. Err. t P>|t| [95% Conf. Interval]
# -------------+----------------------------------------------------------------
# value | .1166811 .0114755 10.17 0.000 .0911122 .1422501
# capital | .3514357 .0478837 7.34 0.000 .2447441 .4581273