1

I have an R portfolio construction code that uses daily adjusted close price data from yahoo. I've had some problems with NA values, but the code has been working for some time. Until this weekend (e.g., Feb 28, 2015).

Right now the yahoo data source seems to be completely broken when I use the tseries function get.hist.quote(). By broken I mean that it will not correctly return data for VTV and a number of other ETFs. I don't know if the Yahoo time series source is down or what.

There was a post (https://stackoverflow.com/a/3507948/2341077) which suggested that changing the URL in get.hist.quote() from chart.yahoo.com to ichart.yahoo.com would fix the problem. But this didn't change anything for me. I also made sure that I have the latest version of tseries installed.

Has anyone else been having problems with the close price time series from Yahoo? I've been wondering if I should change my code to use the quantmod function getSymbols, which, apparently, can use Google finance as a data source.

The code below is written to read hundreds of ETF symbols and return a matrix containing the ETF time series data. An attempt is made to align the data by date.

Even when Yahoo seemed to be providing data, there were still missing values, which is what the fillHoles() function is meant to address.

<pre>

#
# Fill "NA" holes in the time series.
#
fillHoles = function(ts.zoo) {
  v_approx = na.approx(ts.zoo, maxgap=4, na.rm=FALSE)
  v_fill = na.fill(v_approx, fill="extend")
  return( v_fill)
}
<i>
#
# The yahoo market data has problems (at least when it's fetched with get.hist.quote()) when the compression
# argument is used to fetch weekly adjusted close data.
#
# Two time series are shown below, for VXF and MINT. The weekly boundaries appear on different dates.
# 
#              VXF
# 2007-04-04 48.55
# 2007-04-09 48.98
# 2007-04-16 49.52 &lt;==
# 2007-04-23 49.70
# 2007-04-30 50.03
# 2007-05-07 50.04 &lt;==
# 
#            MINT
# 2007-04-04 8.03
# 2007-04-09 8.03
# 2007-04-17 7.88 &lt;==
# 2007-04-23 8.11
# 2007-04-30 8.92
# 2007-05-08 9.14 &lt;==
#   
# If the two time series are merged via a cbind NA values
# end up being inserted where the time series don't line up:'
# 
#              VXF MINT
# 2007-04-04 48.55 8.03
# 2007-04-09 48.98 8.03
# 2007-04-16 49.52   NA
# 2007-04-23 49.70 8.11
# 2007-04-30 50.03 8.92
# 2007-05-07 50.04   NA
#
# To avoid this problem of data alignment, the function fetches daily adjusted close that can then be converted
# into weekly adjusted close.
#
# Given a vector of symbols, this function will fetch the daily adjusted close price data from 
# Yahoo. The data is aligned since not all time series will have exactly the same start and end
# dates (although with daily data, as noted above, this should be less of an issue)
#
</i>
getDailyCloseData = function(symbols, startDate, endDate )
{
  closeData.z = c()
  firstTime = TRUE
  minDate = c()
  maxDate = c()
  fetchedSyms = c()
  startDate.ch = as.character( findMarketDate(as.Date(startDate)))
  endDate.ch = as.character( findMarketDate(as.Date(endDate)))
  for (i in 1:length(symbols)) {
    sym = symbols[i]
    print(sym)
    symClose.z = NULL
    timeOut = 1
    tsEndDate.ch = endDate.ch
    while ((timeOut < 7) && is.null(symClose.z)) {
      try(
        (symClose.z = get.hist.quote(instrument=sym, start=startDate.ch, end=tsEndDate.ch, quote="AdjClose",
                                     provider="yahoo", compression="d", retclass="zoo", quiet=T)),
        silent = TRUE)
      tsEndDate.ch = as.character( findMarketDate( (as.Date(tsEndDate.ch) - 1)))
      timeOut = timeOut + 1
    }
    if (! is.null(symClose.z)) {
      fetchedSyms = c(fetchedSyms, sym)
      dateIx = index(symClose.z)
      if (firstTime) {
        closeData.z = symClose.z
        firstTime = FALSE
        minDate = min(dateIx)
        maxDate = max(dateIx)
      } else {
        minDate = max(minDate, min(dateIx))
        maxDate = min(maxDate, max(dateIx))
        matIx = index(closeData.z)
        repeat {
          startIx = which(matIx == minDate)
          if (length(startIx) > 0 && startIx > 0) {
            break()
          } else {
            minDate = minDate + 1
          }
        } # repeat
        repeat {
           endIx = which(matIx == maxDate)
           if (length(endIx) > 0 && endIx > 0) {
             break()
           } else {
             maxDate = maxDate - 1
           }
        }
        matIxAdj = matIx[startIx:endIx]
        closeData.z = cbind(closeData.z[matIxAdj,], symClose.z[matIxAdj])
      }
    } # if (! is.null(symClose.z))
  } # for
  if (length(closeData.z) > 0) {
    dateIx = index(closeData.z)
     # fill any NA "holes" created by daily date alignment
     closeData.mat = apply(closeData.z, 2, FUN=fillHoles)
     rownames(closeData.mat) = as.character(dateIx)
     colnames(closeData.mat) = fetchedSyms
  }
  return( closeData.mat )
} # getDailyCloseData
</pre>
Community
  • 1
  • 1
iank
  • 63
  • 9

2 Answers2

0

A couple of observations and questions. You're using get.history.quote to return a zoo time series. Have you tried using merge.zoo from the zoo package to combine the time histories from different assets. That should align on dates without any problem. Second, Google and Yahoo correct historical prices in different ways so the prices from the two differ. Yahoo gives the historical prices for open, high, low, and close and then the adjusted price which is adjusted for splits and dividends and distributions. Google adjusts all prices but only for splits, ignoring dividends and distributions. You can see this difference for 2007 data with VXF.

I don't have a problem accessing Yahoo through quantmod's getSymbols so you might use this rather than switching to Google. Finally, according to Pimco, MINT's inception date is 11/16/2009 so I don't understand how you have data for 2007.

The xts package is something of an extension of zoo which I find has some helpful additional functions such as to.weekly which is used in the following. The code below is an example of using the quantmod and xts packages to provide daily and weekly prices for your ETF's. Note that the MINT data doesn't start until Nov 17, 2009 consistent with Pimco's inception date.

library(quantmod)
library(xts)

 getDailyCloseData = function(symbols, startDate, endDate ) {
  close_daily  <- getSymbols(symbols[1], src="yahoo", from=startDate, to=endDate, auto.assign=FALSE)[,6] 
  for(sym in symbols[-1]) {
     close_daily <- merge(close_daily, getSymbols(sym, src="yahoo", from=startDate, to=endDate, auto.assign=FALSE)[,6])
   }
    colnames(close_daily) <- symbols
    return(close_daily)
  }

 symbols <- c("VXF","MINT")
 startDate <- "2007-03-15"
 endDate <- Sys.Date()
 close_daily <- getDailyCloseData(symbols, startDate, endDate)
 close_weekly <- to.weekly(close_daily[,1], OHLC=FALSE)
 for(sym in symbols[-1]) {
   close_weekly <- merge(close_weekly, to.weekly(close_daily[,sym], OHLC=FALSE))
  }
WaltS
  • 5,410
  • 2
  • 18
  • 24
  • Thanks for the post. I have followed the course that you recommend: use Yahoo data via getSymbols(). As it turns out, using adjusted close prices (which only Yahoo has) is important if you want to pick up stocks that have dividends. Even if the price doesn't move much, the adjusted price will move because of the dividend payment. – iank Mar 02 '15 at 02:07
0

I've switched to using the quantmod() getSymbols function. The issues with Yahoo data are inconsistent, so it's hard to know if this is a complete solution. But the code is cleaner than what I've posted above.

The truth is, if you're investing real money and not just doing quantitative finance homework, you should probably buy professional grade data.

#
# Find the nearest market date (moving backward in time)
#
findMarketDate = function( date )
{
  while(! isBizday(x = as.timeDate(date), holidays=holidayNYSE(as.numeric(format(date, "%Y"))))) {
    date = date - 1
  }
  return(date)
}

#
# Fill "NA" holes in the time series.
#
fillHoles = function(ts.zoo) {
  v_approx = na.approx(ts.zoo, maxgap=4, na.rm=FALSE)
  v_fill = na.fill(v_approx, fill="extend")
  return( v_fill)
}


#
# Get daily equity market prices (e.g., stocks, ETFs). This code is designed to work
# with both Yahoo and Google. Yahoo is preferred because they have adjusted prices. An adjusted
# price is adjusted for splits and dividends. As a result, an ETF that doesn't move that much in price
# may still move in dividend adjusted price. Using these prices avoids omitting high divident assets.
#
getDailyPriceData = function(symbols, startDate, endDate, dataSource = "yahoo" )
{
  closeData.z = c()
  firstTime = TRUE
  fetchedSyms = c()
  startDate.d = findMarketDate(as.Date(startDate))
  endDate.d = findMarketDate(as.Date(endDate))
  for (i in 1:length(symbols)) {
    sym = symbols[i]
    print(sym)
    close.m = NULL
    timeOut = 1
    while ((timeOut < 7) && is.null(close.m)) {
      try(
        (close.m = getSymbols(Symbols=sym,src=dataSource, auto.assign=getOption('loadSymbols.auto.assign', FALSE),
                              warnings=FALSE)),
        silent = TRUE)
      timeOut = timeOut + 1
    } # while
    if (! is.null(close.m)) {
      dateIx = index(close.m)
      startIx = which(startDate.d == dateIx)
      endIx = which(endDate.d == dateIx)
      if ((length(startIx) > 0 && startIx > 0) && (length(endIx) > 0 && endIx > 0)) {
        fetchedSyms = c(fetchedSyms, sym)
        closeAdj.m = close.m[startIx:endIx,]

        price.z = NULL
        if (dataSource == "yahoo") {
           yahooAdjCol = paste(sym, "Adjusted", sep=".")
           price.z = closeAdj.m[, yahooAdjCol]
        } else {
           highCol = paste(sym, "High", sep=".")
           lowCol = highIx = paste(sym, "Low", sep=".")
           price.z = (closeAdj.m[,highCol] + closeAdj.m[,lowCol])/2
        }
        if (firstTime) {
          closeData.z = price.z
          firstTime = FALSE
        } else {
          closeData.z = cbind(closeData.z, price.z)
        }
      } # if (! is.null(symClose.z))
    } # if not null
  } # for
  closeData.m = c()
  if (length(closeData.z) > 0) {
    dateIx = index(closeData.z)
    closeData.m = coredata(closeData.z)
    numHoles = sum(is.na(closeData.m))
    if (numHoles > 0) {
      # fill any NA "holes" created by daily date alignment
      closeData.m = apply(closeData.m, 2, FUN=fillHoles)
    }
    rownames(closeData.m) = as.character(dateIx)
    colnames(closeData.m) = fetchedSyms
  }
  return( closeData.m )
} # getDailyPriceData
iank
  • 63
  • 9