I have 366 raster image files (MODIS satellite daily data) in tif format containing snow data and another csv file containing 19,000 locations (latitude and longitudes). I need to collect the snow data from raster files. I have tried collecting the data using GDAL python library. However, the program is taking approximately 30 minutes for collecting data from each single file. That means I have to run the code for around 180 hours. Following is the code I am using. Please suggest if there is anyway I can improve the speed the program executes, or if there is any better way I can implement the same.
import gdal
import pandas
import numpy as np
import os,subprocess
def runCmdAndGetOutput(cmd) :
outList = []
proc = subprocess.Popen(cmd,stdout=subprocess.PIPE)
while True:
line = proc.stdout.readline()
if not line:
break
#the real code does filtering here
outList.append(line.rstrip())
print(outList)
# value = float(outList[2].decode("utf-8").replace("<Value>","").replace("</Value>",""))
value = float(outList[0].decode("utf-8"))
return value
# ndsiFile = "2016001.tif"
locs = "hkkhlocations.csv"
ndsFileLoc = r"D:\SrinivasaRao_Docs\MODIS_NDSI_V6_2016\5000000499560\out"
# with open(locs) as f:
# locData = f.readlines()
latLnginfo = pandas.read_csv(locs)
print(latLnginfo.columns)
print(latLnginfo.shape)
# outDf = pandas.DataFrame()
outDf = pandas.DataFrame(np.zeros([len(latLnginfo),370])*np.nan)
day =1
print(os.listdir(ndsFileLoc))
print(type(os.listdir(ndsFileLoc)))
datasetsList = os.listdir(ndsFileLoc)
for eFile in datasetsList:
rCount = 0
# print(eFile)
cCount = int(eFile[4:7])
# print(cCount)
with open("output.csv") as f :
for line in f :
locData = line.split(",")
cmdToRun = ["gdallocationinfo" ,"-valonly", "-wgs84", os.path.join(ndsFileLoc,eFile) ,str(latLnginfo.iloc[rCount,4]), str(latLnginfo.iloc[rCount,3])]# str(locData[0]), str(locData[1])]
v = runCmdAndGetOutput(cmdToRun)
outDf.iloc[rCount,cCount]= float(v)
rCount = rCount + 1
print("rowno: ", rCount, "Dayno :", cCount, "SCF value: ", v)
day = day+1
outDf.to_csv('test.csv')
'''