-1

I am writing a script in python for handling NetCDF files, but I am facing some issues in creating variables, here is the part of the code:

stepnumber_var = ofl.createVariable("step_number", "i",("step_number",))
stepnumber_var.standard_name = "step_number"

atomNumber_var = ofl.createVariable("atom_number", "i", ("atom_number",))
atomNumber_var.standard_name = "atom__number"

But gives me this error:

Traceback (most recent call last):
  File "sub_avg.py", line 141, in <module>
    atomNumber_var = ofl.createVariable("atom_number", "i", ("atom_number",))
IOError: netcdf: NetCDF: Invalid dimension ID or name

My question is, why the first variable is created without any problem and the second doesn't work?

Thanks

Here it is the full code

from array import array 
import os
import sys
import math
import string as st
import numpy as N
from Scientific.IO.NetCDF import NetCDFFile as S

if len(sys.argv) < 2:
    sys.exit( "No input file found. \nPlease privide NetCDF trajectory input file" )
#######################
## Open NetCDF file ### 
#######################
infl = S(sys.argv[1], 'r')  

file = sys.argv[1]
title,ext = file.split(".")

                                #for v in infl.variables:   # Lists the variables in file
                                #   print(v)        

#################################################################################
# Variable "configurations" has the structure [step_number, atom_number, x y z] #
#################################################################################

varShape = infl.variables['configuration'].shape        # This gets the shape of the variable, i.e. the dimension in terms of elements

nSteps = varShape[0]                                
nAtoms = varShape[1]


coordX_atom = N.zeros((nSteps,nAtoms))
coordY_atom = N.zeros((nSteps,nAtoms))
coordZ_atom = N.zeros((nSteps,nAtoms))

sumX = [0] * nAtoms
sumY = [0] * nAtoms
sumZ = [0] * nAtoms

######################################################
# 1) Calculate the average structure fron trajectory #
######################################################

for i in range(0, 3):
    for j in range(0, 3):
        coordX_atom[i][j] = infl.variables["configuration"][i,j,0]
        coordY_atom[i][j] = infl.variables["configuration"][i,j,1]
        coordZ_atom[i][j] = infl.variables["configuration"][i,j,2]

        sumX[j] = sumX[j] + coordX_atom[i][j]
        sumY[j] = sumY[j] + coordY_atom[i][j]
        sumZ[j] = sumZ[j] + coordZ_atom[i][j]

avgX = [0] * nAtoms
avgY = [0] * nAtoms
avgZ = [0] * nAtoms

for j in range(0, 3):
    avgX[j] = sumX[j]/nSteps 
        avgY[j] = sumY[j]/nSteps
        avgZ[j] = sumZ[j]/nSteps

##############################################################
# 2) Subtract average structure to each atom and for each frame #
##############################################################

for i in range(0, 3):
    for j in range(0, 3):
                coordX_atom[i][j] = infl.variables["configuration"][i,j,0] - avgX[j]
                coordY_atom[i][j] = infl.variables["configuration"][i,j,1] - avgY[j]
                coordZ_atom[i][j] = infl.variables["configuration"][i,j,2] - avgZ[j]

#######################################
# 3) Write new NetCDF trajectory file #                      
#######################################

ofl = S(title + "_subAVG.nc", "a")
############################################################
# Get information of variables contained in the NetCDF input file
#############################################################

i = 0
for v in infl.variables:       
    varNames = [v for v in infl.variables]
    i += 1
#############################################
# Respectively get, elements names in variable, dimension of elements and lenght of the array variableNames
##############################################
for v in infl.variables["box_size"].dimensions:
    boxSizeNames = [v for v in infl.variables["box_size"].dimensions]
for v in infl.variables["box_size"].shape:
    boxSizeShape = [v for v in infl.variables["box_size"].shape]
boxSizeLenght = boxSizeNames.__len__()

print boxSizeLenght

for v in infl.variables["step"].dimensions:
    stepNames = [v for v in infl.variables["step"].dimensions]
for v in infl.variables["step"].shape:
    stepShape = [v for v in infl.variables["box_size"].shape]
stepLenght = stepNames.__len__()
print stepLenght

for v in infl.variables["configuration"].dimensions:
    configurationNames = [v for v in infl.variables["configuration"].dimensions]
for v in infl.variables["configuration"].shape:
    configurationShape = [v for v in infl.variables["configuration"].shape]
configurationLenght = configurationNames.__len__()
print configurationLenght

for v in infl.variables["description"].dimensions:
    descriptionNames = [v for v in infl.variables["description"].dimensions]
for v in infl.variables["description"].shape:
    descriptionShape = [v for v in infl.variables["description"].shape]
descriptionLenght = descriptionNames.__len__()
print descriptionLenght

for v in infl.variables["time"].dimensions:
    timeNames = [v for v in infl.variables["time"].dimensions]
for v in infl.variables["time"].shape:
    timeShape = [v for v in infl.variables["time"].shape]
timeLenght = timeNames.__len__()
print timeLenght

#Get Box size

xBox =  infl.variables["box_size"][0,0]
yBox =  infl.variables["box_size"][0,1]
zBox =  infl.variables["box_size"][0,2]

# Get description lenght
description_lenghtLenght = infl.variables["description"][:]

############################################################
# Create Dimensions
############################################################

stepnumber_var = ofl.createVariable("step_number", "i",("step_number",))
stepnumber_var.standard_name = "step_number"

atomNumber_var = ofl.createVariable("atom_number", "i", ("atom_number",))
atomNumber_var.standard_name = "atom__number"


#
#xyz_var = ofl.createVariable("xyz", "f",("xyz",))
#xyz_var.units = "nanometers"
#xyz_var.standard_name = "xyz"
#
#configuration_var = ofl.createVariable("configuration", "f", ("step_number", "atom_number", "xyz"))
#configuration_var.units = "nanometers"
#configuration_var.standard_name = "configuration"
#
#print configuration_var.shape
#step_var = ofl.createVariable("box_size_lenght", 3)
#configuration_var = ofl.createVariable("atom_number", nAtoms)
#description_var = ofl.createVariable("xyz", 3)
#time_var = ofl.createVariable(description_lenght, description_lenghtLenght)
#
#a = infl.variables["step_number"].dimensions.keys()
#print a

Thanks!

Marco_G
  • 1
  • 2
  • 1
    Would you be willing to post more of the code? There isn't enough information in your small snippet to go off of, other than you may not have created the atom_number dimension. Thanks! – Sean A. Jul 18 '14 at 14:41
  • Posted, you will'find the lines near the end of the code, thanks! – Marco_G Jul 18 '14 at 14:47
  • Is it possible that the dimension "atom_number" does not exist in the input file? If not, you may need to create it. Also, if you are adding new things to the netCDF file, you should open it in "w" mode, not "r" mode (around line 14). How big is the file you are trying to read? – Sean A. Jul 18 '14 at 15:03
  • Never mind, I read more of the script...let me keep looking... – Sean A. Jul 18 '14 at 15:08
  • Hello Sean, the input file I use only for read data, then I need to create a new file (called ofl in the code, opened in "a" mode). So it shouldn't be a problem if the atom_number variable exists or not in the input file (by the way, it exists), since I'm creating a new one in a new file, am I right? – Marco_G Jul 18 '14 at 15:11
  • The input file I'm using for testing is just 100 MB, the real one would be more or less 15 GB – Marco_G Jul 18 '14 at 15:12
  • What is interesting is that I do not see where in ofl you are actually creating any dimensions. Before and after you create stepnumber_var, can you add a print statement to list the dimensions in ofl? Sorry for the long train here, but we'll get it figured out. – Sean A. Jul 18 '14 at 15:23
  • Please let me specify that I started yesterday using NetCDF and this is a problem for sure. Anyway, I was thinking that I din't create any dimensions before creating variables, the print command gives: [] Could be this the problem? Thank you – Marco_G Jul 18 '14 at 15:31
  • Why not move one level higher and use [`xarray`](http://xarray.pydata.org)? It takes care of so many things for you. Create a [`DataArray`](http://xarray.pydata.org/en/stable/generated/xarray.DataArray.html#xarray.DataArray) with your data, turn it into a `Dataset` and save to a well-formatted netCDF file in one line. – j08lue May 16 '17 at 07:22

2 Answers2

3

This may be a case of a library trying to be "helpful" (see the end of my post for details, but I can't confirm it). To fix this, you should explicitly create dimensions for atom_number and step_number, by using the following before you create the variables (assuming I am understanding nSteps and nAtoms correctly):

ofl.createDimension("step_number", nSteps) ofl.createDimension("atom_number", nAtoms)

If you are new to netCDF, I might suggest looking at either the netcdf4-python package,

http://unidata.github.io/netcdf4-python/

of the netCDF package found in scipy:

http://docs.scipy.org/doc/scipy/reference/io.html

What might be going on: it looks like the issue is that when you create the variable step_number, the library is trying to be helpful by creating a step_number dimension with unlimited length. However, you can only have one unlimited dimension in a netcdf-3 file, so the helpful "trick" does not work.

Sean A.
  • 652
  • 5
  • 9
  • Thank you Sean, that would be my idea too! Anyway, every time I need to delete the ofl file created otherwise gives me an error (instead of rewriting ofl file). – Marco_G Jul 21 '14 at 08:43
  • That's interesting...I would think the "a" flag would handle that. Are you closing the file after writing to it by using olf.close()? If so, you may want to submit a bug report to the developers at https://bitbucket.org/khinsen/scientificpython/issues/new – Sean A. Jul 21 '14 at 15:15
  • Hi Sean, yes I thought about that and yes I am closing the file, I don't know why but I need to delete ofl file in order to make the program work. I will send a bug report! – Marco_G Jul 22 '14 at 11:26
0

atomNumber_var.standard_name = "atom__number"

The atom__number has two "__" instead of one "_". I am not sure if this is your problem, but it may be something to look at.

I would also suggest making your netcdf file steps clearer. I like to break them down into 3 steps. I used an example of scientific data using ocean sst. You also have a section for creating dimensions, but you don't actually do it. This is more correctly create variable section.

  1. Create Dimensions

  2. Create Variable

  3. Fill the variable

    from netCDF4 import Dataset
    ncfile = Dataset('temp.nc','w')
    lonsdim = latdata.shape    #Set dimension lengths
    latsdim = londata.shape   
    ###############
    #Create Dimensions
    ###############
    latdim   = ncfile.createDimension('latitude', latsdim)
    londim   = ncfile.createDimension('longitude', lonsdim)
    ###############
    #Create Variables
    #################   The variables contain the dimensions previously set
    latitude  = ncfile.createVariable('latitude','f8',('latitude'))
    longitude = ncfile.createVariable('longitude','f8',('longitude'))
    oceantemp  = ncfile.createVariable('SST','f4' ('latitude','longitude'),fill_value=-99999.0)
    ###############
    Fill Variables
    ################
    latitude[:]    = latdata      #lat data to fill in
    longitude[:]   = londata      #lon data to fill in
    oceantemp[:,:]  = sst[:,:]    #some variable previous calculated
    

I hope this is helpful.

Almidas
  • 379
  • 2
  • 6
  • 16