0

I would like to construct annualized volatility of returns for a panel data set in R. I have monthly returns (%) per month, per firm (entity), for a large dataset.

I would like to construct the five year average of annualized volatility of monthly returns - per year (t+5) and per firm.

Constructing this measure by it self is not difficult, but I would like to do it in R, so that it groups by firm & year. I am thankful for any help.

The data looks like this:

enter image description here

Phil
  • 7,287
  • 3
  • 36
  • 66
Peter
  • 1
  • 1

1 Answers1

0
library(xts)
library(PerformanceAnalytics)
library(quantmod)
library(lubridate)
library(data.table)
library(stringr)

# let's fetch some real-world panel data in a similar format to that cited by OP
symbols <- c('GOOG', 'AAPL', 'AMZN')
quantmod::getSymbols(symbols, 
                     auto.assign = TRUE, 
                     from = Sys.time() - years(20), 
                     periodicity = 'monthly')
lapply(symbols, function(x) {
  tmp <- get(x, envir = .GlobalEnv)
  tmp$Return <- CalculateReturns(Ad(tmp), method = 'discrete')
  tmp$LogReturn <- CalculateReturns(Ad(tmp), method = 'log')
  assign(x, tmp, envir = .GlobalEnv)
}) |> invisible()
panel_data_df <- lapply(symbols, function(x) {
  tmp <- get(x, envir = .GlobalEnv)
  df <- data.frame(Symbol = x,
                   Date = index(tmp),
                   Return = round(tmp$Return * 1e2, 2) |> 
                     sprintf(fmt = '%s%%') |> 
                     str_replace_all('NA%', NA_character_),
                   LogReturn = tmp$LogReturn)
  df
}) |> 
  rbindlist() |> 
  as.data.frame()
head(panel_data_df)
  Symbol       Date Return   LogReturn
1   GOOG 2004-09-01   <NA>          NA
2   GOOG 2004-10-01  47.1%  0.38593415
3   GOOG 2004-11-01 -4.54% -0.04649014
4   GOOG 2004-12-01  5.94%  0.05770476
5   GOOG 2005-01-01  1.47%  0.01457253
6   GOOG 2005-02-01  -3.9% -0.03978529
# now let's calculate the 5 year mean of annualized monthly volatility
metrics_df <- split(panel_data_df, panel_data_df$Symbol) |> 
  lapply(function(x) {
    df_xts <- xts(x$LogReturn, order.by = as.POSIXct(x$Date)) 
    stddev_1yr <- period.apply(df_xts, 
                               endpoints(df_xts, 'years', 1), 
                               StdDev.annualized)
    stddev_1yr_5yr_mean <- period.apply(stddev_1yr, 
                                        endpoints(stddev_1yr, 'years', 5), 
                                        mean)
    stddev_1yr_5yr_mean_df <- as.data.frame(stddev_1yr_5yr_mean)
    colnames(stddev_1yr_5yr_mean_df) <- 'StDevAnn5YrMean'
    stddev_1yr_5yr_mean_df$Date <- rownames(stddev_1yr_5yr_mean_df) |> 
      str_split('\\s') |> 
      sapply('[', 1)
    rownames(stddev_1yr_5yr_mean_df) <- NULL
    stddev_1yr_5yr_mean_df$Symbol <- x$Symbol[ 1 ]
    stddev_1yr_5yr_mean_df
  }) |> rbindlist() |> as.data.frame()

panel_data_df <- merge(panel_data_df, 
                       metrics_df, 
                       by = c('Symbol', 'Date'), 
                       all = TRUE)
head(panel_data_df, 50)
   Symbol       Date  Return    LogReturn StDevAnn5YrMean
1    AAPL 2002-11-01    <NA>           NA              NA
2    AAPL 2002-12-01  -7.55% -0.078484655              NA
3    AAPL 2003-01-01   0.21%  0.002089444              NA
4    AAPL 2003-02-01   4.53%  0.044272032              NA
5    AAPL 2003-03-01   -5.8% -0.059709353              NA
6    AAPL 2003-04-01   0.57%  0.005642860              NA
7    AAPL 2003-05-01  26.23%  0.232938925              NA
8    AAPL 2003-06-01   6.18%  0.060001124              NA
9    AAPL 2003-07-01   10.6%  0.100732953              NA
[ ... ]
26   AAPL 2004-12-01  -3.95% -0.040325449              NA
27   AAPL 2004-12-31    <NA>           NA       0.2947654
28   AAPL 2005-01-01  19.41%  0.177392802              NA
29   AAPL 2005-02-01  16.67%  0.154188206              NA
30   AAPL 2005-03-01  -7.11% -0.073765972              NA
[ ... ]
br00t
  • 1,440
  • 8
  • 10