Skip to content

Commit

Permalink
Merge pull request #70 from kwilcox/master
Browse files Browse the repository at this point in the history
Local file(s) support and cleanup
  • Loading branch information
daf committed Jan 31, 2014
2 parents 4bbbfe3 + 2787dc5 commit 096e3dc
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 43 deletions.
97 changes: 59 additions & 38 deletions paegan/cdm/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
"lat_psi", "LAT_PSI",
]


def _sub_by_nan(data, ind):
"""
Funtction to subset a dimension variable by replacing values
Expand Down Expand Up @@ -96,6 +95,36 @@ def _sub_by_nan2(data, ind):

class CommonDataset(object):

@staticmethod
def nc_object(ncfile, tname='time'):

if isinstance(ncfile, basestring):
try:
return netCDF4.Dataset(ncfile)
except (IOError, RuntimeError, IndexError):
# Are we a set of files?
try:
return netCDF4.MFDataset(ncfile)
except (IOError, RuntimeError, IndexError):
try:
return netCDF4.MFDataset(ncfile, aggdim=tname)
except (IOError, RuntimeError, IndexError):
try:
# Unicode isn't working sometimes?
return netCDF4.MFDataset(str(ncfile), aggdim=tname)
except Exception:
logger.exception("Can not open %s" % ncfile)
raise
except Exception:
logger.exception("Can not open %s" % ncfile)
raise
elif isinstance(ncfile, Dataset):
# Passed in paegan Dataset object
return ncfile.nc
elif isinstance(ncfile, netCDF4.Dataset) or isinstance(ncfile, netCDF4.MFDataset):
# Passed in a netCDF4 Dataset object
return ncfile

@staticmethod
def open(ncfile, xname='lon', yname='lat', zname='z', tname='time', **kwargs):
"""
Expand All @@ -110,27 +139,8 @@ def open(ncfile, xname='lon', yname='lat', zname='z', tname='time', **kwargs):
>> dataset = CommonDataset.open(url, dataset_type="cgrid")
"""

nc = None
filename = None

if isinstance(ncfile, str):
ncfile = unicode(ncfile.strip())

if isinstance(ncfile, unicode):
try:
nc = netCDF4.Dataset(ncfile)
filename = ncfile
except StandardError:
logger.error(ncfile)
raise
elif isinstance(ncfile, Dataset):
# Passed in paegan Dataset object
nc = ncfile.nc
elif isinstance(ncfile, netCDF4.Dataset):
# Passed in a netCDF4 Dataset object
nc = ncfile

datasettype = kwargs.get('dataset_type', None)
nc = CommonDataset.nc_object(ncfile)
filepath = ncfile

# Find the coordinate variables for testing, unknown if not found
keys = set(nc.variables)
Expand All @@ -147,6 +157,7 @@ def open(ncfile, xname='lon', yname='lat', zname='z', tname='time', **kwargs):
testvarx = nc.variables[xmatches[0]]

# Test the shapes of the coordinate variables to determine the grid type
datasettype = kwargs.get('dataset_type', None)
if datasettype is None:
if testvary.ndim > 1:
datasettype = "cgrid"
Expand All @@ -170,13 +181,13 @@ def open(ncfile, xname='lon', yname='lat', zname='z', tname='time', **kwargs):
from paegan.cdm.grids.r_grid import RGridDataset

if datasettype == 'ncell':
dataobj = NCellDataset(filename, datasettype,
dataobj = NCellDataset(filepath, datasettype,
zname=zname, tname=tname, xname=xname, yname=yname)
elif datasettype == 'rgrid':
dataobj = RGridDataset(filename, datasettype,
dataobj = RGridDataset(filepath, datasettype,
zname=zname, tname=tname, xname=xname, yname=yname)
elif datasettype == 'cgrid':
dataobj = CGridDataset(filename, datasettype,
dataobj = CGridDataset(filepath, datasettype,
zname=zname, tname=tname, xname=xname, yname=yname)
else:
dataobj = None
Expand All @@ -185,11 +196,9 @@ def open(ncfile, xname='lon', yname='lat', zname='z', tname='time', **kwargs):


class Dataset(object):
def __init__(self, filename, datasettype, xname='lon', yname='lat',
def __init__(self, filepath, datasettype, xname='lon', yname='lat',
zname='z', tname='time'):
self.nc = None
self._coordcache = dict()
self._filename = filename
self._datasettype = datasettype

self._possiblet = _possiblet
Expand All @@ -206,6 +215,7 @@ def __init__(self, filename, datasettype, xname='lon', yname='lat',
if tname not in self._possiblet:
self._possiblet.append(tname)

self._filepath = filepath
self.opennc()
self._current_variables = list(self.nc.variables.keys())

Expand All @@ -222,7 +232,7 @@ def getvariableinfo(self):
for var in self._current_variables:
variables[var] = {}
for attr in self.nc.variables[var].ncattrs():
variables[var][attr] = self.nc.variables[var].getncattr(attr)
variables[var][attr] = getattr(self.nc.variables[var], attr)
return variables

def lon2ind(self, var=None, **kwargs):
Expand All @@ -243,14 +253,25 @@ def get_xyind_from_bbox(self, var, bbox):
def get_xyind_from_point(self, var, point, **kwargs):
raise NotImplementedError

def closenc(self):
self.metadata = None
self.nc.close()
self.nc = None

def opennc(self):
self.nc = netCDF4.Dataset(self._filename)
self.metadata = self.nc.__dict__
try:
# Open if if it None
assert self.nc is not None
# Raises an exception when the dataset has alrady been closed
self.nc.__str__()
except StandardError:
self.nc = CommonDataset.nc_object(self._filepath)
self.metadata = self.nc.__dict__

def closenc(self):
try:
# close will raise an error if the Dataset is already closed
self.nc.close()
except StandardError:
pass
finally:
self.metadata = None
self.nc = None

def gettimestep(self, var=None):
assert var in self._current_variables
Expand Down Expand Up @@ -382,7 +403,7 @@ def __str__(self):
[[
<Paegan Dataset Object>
Dataset Type: """ + self._datasettype + """
Resource: """ + self._filename + """
Resource: """ + self._filepath + """
Variables:
""" + str(k) + """
]]"""
Expand Down Expand Up @@ -499,7 +520,7 @@ def get_varname_from_stdname(self, standard_name=None, match=None):
return var_matches

def __repr__(self):
s = "CommonDataset(" + self._filename + \
s = "CommonDataset(" + self._filepath + \
", dataset_type='" + self._datasettype + "')"
return s

Expand Down
4 changes: 2 additions & 2 deletions paegan/cdm/grids/c_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def __init__(self, *args,**kwargs):
super(CGridDataset,self).__init__(*args, **kwargs)

def _copy(self):
new = CGridDataset(self._filename, self._datasettype)
new = CGridDataset(self._filepath, self._datasettype)
new._coordcache = copy.copy(self._coordcache)
new._current_variables = copy.copy(self._current_variables)
return new
Expand Down Expand Up @@ -86,4 +86,4 @@ def _get_data(self, var, indarray, use_local=False):
elif ndims == 6:
data = var[indarray[0], indarray[1], indarray[2],
indarray[3], indarray[4], indarray[5]]
return data
return data
2 changes: 1 addition & 1 deletion paegan/cdm/grids/n_cell.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def __init__(self, *args,**kwargs):
self.topology_var_name = None

def _copy(self):
new = NCellDataset(self._filename, self._datasettype)
new = NCellDataset(self._filepath, self._datasettype)
new._coordcache = copy.copy(self._coordcache)
new._current_variables = copy.copy(self._current_variables)
return new
Expand Down
2 changes: 1 addition & 1 deletion paegan/cdm/grids/r_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def __init__(self, *args,**kwargs):
super(RGridDataset,self).__init__(*args, **kwargs)

def _copy(self):
new = RGridDataset(self._filename, self._datasettype)
new = RGridDataset(self._filepath, self._datasettype)
new._coordcache = copy.copy(self._coordcache)
new._current_variables = copy.copy(self._current_variables)
return new
Expand Down
2 changes: 1 addition & 1 deletion paegan/cdm/grids/u_grid.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
class UGridDataset(object):
pass
pass
10 changes: 10 additions & 0 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,3 +296,13 @@ def test_rgrid_get_values(self):
assert pd._datasettype == 'rgrid'
values = pd.get_values(var="u", bbox=[-149, 59, -144, 61.5], timeinds=0)
assert values.size > 0


@unittest.skipIf(not os.path.exists(os.path.join(data_path, "pws_das_2014012600.nc")),
"Resource files are missing that are required to perform the tests.")
def test_aggregated_dataset(self):
datafile = os.path.join(data_path, "pws_das_20140126*.nc")
pd = CommonDataset.open(datafile)
assert pd._datasettype == 'rgrid'
values = pd.get_values(var="u", bbox=[-149, 59, -144, 61.5], timeinds=0)
assert values.size > 0

0 comments on commit 096e3dc

Please sign in to comment.