1

So I've been working on grabbing climate data (specifically temperature, precip) from NOAA's network of GHCN weather stations. I've managed to get a list of the stations pertinent to my area (~200) and have built a loop to essentially get a certain a climate variable for every station on that list for every day of between a specified min and max date. Ultimately I need ~10 years worth of data. However my simple loop is taking forever to get this data and I was wondering if there's a better way to optimize it? Also I really want to access monthly data rather than daily but rnoaa doesn't seem to have an option for GHCN monthly data as the only available function is ghcnd_search(). If anyone also knows about how to mine monthly rather than daily data that would be appreciated

Station list:

df<-c("US1FLAL0048", "US1FLBK0003", "US1FLBV0002", "US1FLBV0006", 
"US1FLBV0023", "US1FLBV0040", "US1FLBW0099", "US1FLCT0012", "US1FLDV0051", 
"US1FLFR0006", "US1FLHL0003", "US1FLHN0009", "US1FLLB0001", "US1FLLE0005", 
"US1FLLK0012", "US1FLLN0004", "US1FLLN0018", "US1FLMN0013", "US1FLMR0012", 
"US1FLMR0033", "US1FLOK0017", "US1FLOR0028", "US1FLPS0002", "US1FLPS0018", 
"US1FLPT0007", "US1FLSJ0012", "US1FLSM0008", "US1FLSS0044", "US1FLST0014", 
"US1FLSW0008", "US1FLVL0035", "US1FLWK0001", "USC00080228", "USC00080236", 
"USC00080369", "USC00080414", "USC00080478", "USC00080598", "USC00080737", 
"USC00080945", "USC00080992", "USC00081163", "USC00081276", "USC00081306", 
"USC00081544", "USC00081641", "USC00081651", "USC00081978", "USC00082008", 
"USC00082046", "USC00082150", "USC00082229", "USC00082288", "USC00082298", 
"USC00082391", "USC00082418", "USC00082441", "USC00082850", "USC00082915", 
"USC00082944", "USC00083020", "USC00083153", "USC00083163", "USC00083168", 
"USC00083207", "USC00083209", "USC00083470", "USC00083874", "USC00083909", 
"USC00083956", "USC00083986", "USC00084050", "USC00084095", "USC00084210", 
"USC00084289", "USC00084320", "USC00084366", "USC00084394", "USC00084412", 
"USC00084461", "USC00084625", "USC00084662", "USC00084731", "USC00084802", 
"USC00085076", "USC00085099", "USC00085184", "USC00085275", "USC00085359", 
"USC00085377", "USC00085539", "USC00085612", "USC00085667", "USC00085879", 
"USC00085895", "USC00085973", "USC00086065", "USC00086078", "USC00086129", 
"USC00086240", "USC00086315", "USC00086406", "USC00086414", "USC00086618", 
"USC00086657", "USC00086764", "USC00086767", "USC00086828", "USC00086842", 
"USC00086999", "USC00087020", "USC00087025", "USC00087205", "USC00087228", 
"USC00087261", "USC00087304", "USC00087397", "USC00087429", "USC00087760", 
"USC00087826", "USC00087851", "USC00087869", "USC00087886", "USC00087982", 
"USC00088368", "USC00088529", "USC00088620", "USC00088756", "USC00088782", 
"USC00088824", "USC00088942", "USC00089120", "USC00089176", "USC00089219", 
"USC00089401", "USC00089430", "USC00089566", "USC00089640", "USC00089795", 
"USR0000FBLO", "USR0000FCAC", "USR0000FCEN", "USR0000FCHE", "USR0000FLSU", 
"USR0000FMER", "USR0000FMIL", "USR0000FNAV", "USR0000FOAS", "USR0000FOCH", 
"USR0000FOLU", "USR0000FRAC", "USR0000FSAN", "USR0000FSTM", "USR0000FSUM", 
"USR0000FWIL", "USW00003818", "USW00003853", "USW00012812", "USW00012815", 
"USW00012816", "USW00012818", "USW00012819", "USW00012832", "USW00012833", 
"USW00012834", "USW00012835", "USW00012836", "USW00012838", "USW00012839", 
"USW00012841", "USW00012842", "USW00012843", "USW00012844", "USW00012849", 
"USW00012850", "USW00012854", "USW00012871", "USW00012873", "USW00012876", 
"USW00012882", "USW00012885", "USW00012888", "USW00012894", "USW00012895", 
"USW00012896", "USW00012897", "USW00013884", "USW00013889", "USW00013899", 
"USW00053847", "USW00053853", "USW00053860", "USW00092805", "USW00092806", 
"USW00092809", "USW00092811", "USW00092821", "USW00093805", "USW00093837", 
"USW00093841")

Code:

library(rnoaa)
options(noaakey = "your api key")
data<-matrix(, nrow=0, ncol=0) #create empty matrix
for (i in 1:length(df)){
  a<-ghcnd_search(stationid=df[1],var='TMAX',date_min='2010-1-30',date_max='2015-12-31')
  data=rbind(data,a$tmax)

}
Leo Ohyama
  • 887
  • 1
  • 9
  • 26
  • 1
    Difficult to optimize, but growing your `data` matrix on every iteration of the loop is costly. Since you know your date range, and that the data are daily, and how many stations will be queried, you can pre-allocate an appropriately sized matrix and fill in chunks of the matrix as you go. As it stands, you are destroying and creating the data frame over and over. – jdobres Apr 10 '18 at 01:13
  • Are you sure you can get 10 years of data using `rnoaa`? IIRC they have a maximum limit of 1000 days – Tung Apr 10 '18 at 03:11

1 Answers1

2

Assuming the station ID is stored in a vector called dat, we can use the functions from the package to download the data and create a data frame.

# Load packages
library(rnoaa)
library(purrr)

# Download the data and create a data frame. 
dat_df <- map(dat, ghcnd_search, 
              var='TMAX', date_min = '2010-1-30', date_max = '2015-12-31') %>%
          map_dfr("tmax")

DATA

dat<-c("US1FLAL0048", "US1FLBK0003", "US1FLBV0002", "US1FLBV0006", 
      "US1FLBV0023", "US1FLBV0040", "US1FLBW0099", "US1FLCT0012", "US1FLDV0051", 
      "US1FLFR0006", "US1FLHL0003", "US1FLHN0009", "US1FLLB0001", "US1FLLE0005", 
      "US1FLLK0012", "US1FLLN0004", "US1FLLN0018", "US1FLMN0013", "US1FLMR0012", 
      "US1FLMR0033", "US1FLOK0017", "US1FLOR0028", "US1FLPS0002", "US1FLPS0018", 
      "US1FLPT0007", "US1FLSJ0012", "US1FLSM0008", "US1FLSS0044", "US1FLST0014", 
      "US1FLSW0008", "US1FLVL0035", "US1FLWK0001", "USC00080228", "USC00080236", 
      "USC00080369", "USC00080414", "USC00080478", "USC00080598", "USC00080737", 
      "USC00080945", "USC00080992", "USC00081163", "USC00081276", "USC00081306", 
      "USC00081544", "USC00081641", "USC00081651", "USC00081978", "USC00082008", 
      "USC00082046", "USC00082150", "USC00082229", "USC00082288", "USC00082298", 
      "USC00082391", "USC00082418", "USC00082441", "USC00082850", "USC00082915", 
      "USC00082944", "USC00083020", "USC00083153", "USC00083163", "USC00083168", 
      "USC00083207", "USC00083209", "USC00083470", "USC00083874", "USC00083909", 
      "USC00083956", "USC00083986", "USC00084050", "USC00084095", "USC00084210", 
      "USC00084289", "USC00084320", "USC00084366", "USC00084394", "USC00084412", 
      "USC00084461", "USC00084625", "USC00084662", "USC00084731", "USC00084802", 
      "USC00085076", "USC00085099", "USC00085184", "USC00085275", "USC00085359", 
      "USC00085377", "USC00085539", "USC00085612", "USC00085667", "USC00085879", 
      "USC00085895", "USC00085973", "USC00086065", "USC00086078", "USC00086129", 
      "USC00086240", "USC00086315", "USC00086406", "USC00086414", "USC00086618", 
      "USC00086657", "USC00086764", "USC00086767", "USC00086828", "USC00086842", 
      "USC00086999", "USC00087020", "USC00087025", "USC00087205", "USC00087228", 
      "USC00087261", "USC00087304", "USC00087397", "USC00087429", "USC00087760", 
      "USC00087826", "USC00087851", "USC00087869", "USC00087886", "USC00087982", 
      "USC00088368", "USC00088529", "USC00088620", "USC00088756", "USC00088782", 
      "USC00088824", "USC00088942", "USC00089120", "USC00089176", "USC00089219", 
      "USC00089401", "USC00089430", "USC00089566", "USC00089640", "USC00089795", 
      "USR0000FBLO", "USR0000FCAC", "USR0000FCEN", "USR0000FCHE", "USR0000FLSU", 
      "USR0000FMER", "USR0000FMIL", "USR0000FNAV", "USR0000FOAS", "USR0000FOCH", 
      "USR0000FOLU", "USR0000FRAC", "USR0000FSAN", "USR0000FSTM", "USR0000FSUM", 
      "USR0000FWIL", "USW00003818", "USW00003853", "USW00012812", "USW00012815", 
      "USW00012816", "USW00012818", "USW00012819", "USW00012832", "USW00012833", 
      "USW00012834", "USW00012835", "USW00012836", "USW00012838", "USW00012839", 
      "USW00012841", "USW00012842", "USW00012843", "USW00012844", "USW00012849", 
      "USW00012850", "USW00012854", "USW00012871", "USW00012873", "USW00012876", 
      "USW00012882", "USW00012885", "USW00012888", "USW00012894", "USW00012895", 
      "USW00012896", "USW00012897", "USW00013884", "USW00013889", "USW00013899", 
      "USW00053847", "USW00053853", "USW00053860", "USW00092805", "USW00092806", 
      "USW00092809", "USW00092811", "USW00092821", "USW00093805", "USW00093837", 
      "USW00093841")
www
  • 38,575
  • 12
  • 48
  • 84
  • Hey @www I ended up using a different search function in rnoaa with a very different for loop format. It involves mining the GSOM dataset at NOAA. I was wondering if there was a way I could share my loop with you because I need some advice on how to incorporate it using a purrr function. Sadly, the character limit does not allow me to copy and paste it. – Leo Ohyama Apr 13 '18 at 12:03
  • @LeoOhyama I think you can ask a new question with reproducible about GSOM dataset. There is no need to share the entire dataset, just part of it would be sufficient. – www Apr 13 '18 at 12:41
  • thanks! here is the new question https://stackoverflow.com/questions/49818889/better-optimization-for-rnoaa-gsom-for-loop – Leo Ohyama Apr 13 '18 at 14:06