Merge pull request #70 from kwilcox/master

Local file(s) support and cleanup
asascience-open · Jan 31, 2014 · 096e3dc · 096e3dc
2 parents 4bbbfe3 + 2787dc5
commit 096e3dc
Show file tree

Hide file tree

Showing 6 changed files with 74 additions and 43 deletions.
diff --git a/paegan/cdm/dataset.py b/paegan/cdm/dataset.py
@@ -58,7 +58,6 @@
               "lat_psi", "LAT_PSI",
              ]
 
-
 def _sub_by_nan(data, ind):
         """
             Funtction to subset a dimension variable by replacing values
@@ -96,6 +95,36 @@ def _sub_by_nan2(data, ind):
 
 class CommonDataset(object):
 
+    @staticmethod
+    def nc_object(ncfile, tname='time'):
+
+        if isinstance(ncfile, basestring):
+            try:
+                return netCDF4.Dataset(ncfile)
+            except (IOError, RuntimeError, IndexError):
+                # Are we a set of files?
+                try:
+                    return netCDF4.MFDataset(ncfile)
+                except (IOError, RuntimeError, IndexError):
+                    try:
+                        return netCDF4.MFDataset(ncfile, aggdim=tname)
+                    except (IOError, RuntimeError, IndexError):
+                        try:
+                            # Unicode isn't working sometimes?
+                            return netCDF4.MFDataset(str(ncfile), aggdim=tname)
+                        except Exception:
+                            logger.exception("Can not open %s" % ncfile)
+                            raise
+            except Exception:
+                logger.exception("Can not open %s" % ncfile)
+                raise
+        elif isinstance(ncfile, Dataset):
+            # Passed in paegan Dataset object
+            return ncfile.nc
+        elif isinstance(ncfile, netCDF4.Dataset) or isinstance(ncfile, netCDF4.MFDataset):
+            # Passed in a netCDF4 Dataset object
+            return ncfile
+
     @staticmethod
     def open(ncfile, xname='lon', yname='lat', zname='z', tname='time', **kwargs):
         """
@@ -110,27 +139,8 @@ def open(ncfile, xname='lon', yname='lat', zname='z', tname='time', **kwargs):
         >> dataset = CommonDataset.open(url, dataset_type="cgrid")
         """
 
-        nc = None
-        filename = None
-
-        if isinstance(ncfile, str):
-            ncfile = unicode(ncfile.strip())
-
-        if isinstance(ncfile, unicode):
-            try:
-                nc = netCDF4.Dataset(ncfile)
-                filename = ncfile
-            except StandardError:
-                logger.error(ncfile)
-                raise
-        elif isinstance(ncfile, Dataset):
-            # Passed in paegan Dataset object
-            nc = ncfile.nc
-        elif isinstance(ncfile, netCDF4.Dataset):
-            # Passed in a netCDF4 Dataset object
-            nc = ncfile
-
-        datasettype = kwargs.get('dataset_type', None)
+        nc = CommonDataset.nc_object(ncfile)
+        filepath = ncfile
 
         # Find the coordinate variables for testing, unknown if not found
         keys = set(nc.variables)
@@ -147,6 +157,7 @@ def open(ncfile, xname='lon', yname='lat', zname='z', tname='time', **kwargs):
             testvarx = nc.variables[xmatches[0]]
 
         # Test the shapes of the coordinate variables to determine the grid type
+        datasettype = kwargs.get('dataset_type', None)
         if datasettype is None:
             if testvary.ndim > 1:
                 datasettype = "cgrid"
@@ -170,13 +181,13 @@ def open(ncfile, xname='lon', yname='lat', zname='z', tname='time', **kwargs):
         from paegan.cdm.grids.r_grid import RGridDataset
 
         if datasettype == 'ncell':
-            dataobj = NCellDataset(filename, datasettype,
+            dataobj = NCellDataset(filepath, datasettype,
                                    zname=zname, tname=tname, xname=xname, yname=yname)
         elif datasettype == 'rgrid':
-            dataobj = RGridDataset(filename, datasettype,
+            dataobj = RGridDataset(filepath, datasettype,
                                    zname=zname, tname=tname, xname=xname, yname=yname)
         elif datasettype == 'cgrid':
-            dataobj = CGridDataset(filename, datasettype,
+            dataobj = CGridDataset(filepath, datasettype,
                                    zname=zname, tname=tname, xname=xname, yname=yname)
         else:
             dataobj = None
@@ -185,11 +196,9 @@ def open(ncfile, xname='lon', yname='lat', zname='z', tname='time', **kwargs):
 
 
 class Dataset(object):
-    def __init__(self, filename, datasettype, xname='lon', yname='lat',
+    def __init__(self, filepath, datasettype, xname='lon', yname='lat',
                  zname='z', tname='time'):
-        self.nc = None
         self._coordcache = dict()
-        self._filename = filename
         self._datasettype = datasettype
 
         self._possiblet = _possiblet
@@ -206,6 +215,7 @@ def __init__(self, filename, datasettype, xname='lon', yname='lat',
         if tname not in self._possiblet:
             self._possiblet.append(tname)
 
+        self._filepath = filepath
         self.opennc()
         self._current_variables = list(self.nc.variables.keys())
 
@@ -222,7 +232,7 @@ def getvariableinfo(self):
         for var in self._current_variables:
             variables[var] = {}
             for attr in self.nc.variables[var].ncattrs():
-                variables[var][attr] = self.nc.variables[var].getncattr(attr)
+                variables[var][attr] = getattr(self.nc.variables[var], attr)
         return variables
 
     def lon2ind(self, var=None, **kwargs):
@@ -243,14 +253,25 @@ def get_xyind_from_bbox(self, var, bbox):
     def get_xyind_from_point(self, var, point, **kwargs):
         raise NotImplementedError
 
-    def closenc(self):
-        self.metadata = None
-        self.nc.close()
-        self.nc = None
-
     def opennc(self):
-        self.nc = netCDF4.Dataset(self._filename)
-        self.metadata = self.nc.__dict__
+        try:
+            # Open if if it None
+            assert self.nc is not None
+            # Raises an exception when the dataset has alrady been closed
+            self.nc.__str__()
+        except StandardError:
+            self.nc = CommonDataset.nc_object(self._filepath)
+            self.metadata = self.nc.__dict__
+
+    def closenc(self):
+        try:
+            # close will raise an error if the Dataset is already closed
+            self.nc.close()
+        except StandardError:
+            pass
+        finally:
+            self.metadata = None
+            self.nc = None
 
     def gettimestep(self, var=None):
         assert var in self._current_variables
@@ -382,7 +403,7 @@ def __str__(self):
 [[
   <Paegan Dataset Object>
   Dataset Type: """ + self._datasettype + """
-  Resource: """ + self._filename + """
+  Resource: """ + self._filepath + """
   Variables:
   """ + str(k) + """
 ]]"""
@@ -499,7 +520,7 @@ def get_varname_from_stdname(self, standard_name=None, match=None):
         return var_matches
 
     def __repr__(self):
-        s = "CommonDataset(" + self._filename + \
+        s = "CommonDataset(" + self._filepath + \
             ", dataset_type='" + self._datasettype + "')"
         return s
 

diff --git a/paegan/cdm/grids/c_grid.py b/paegan/cdm/grids/c_grid.py
@@ -15,7 +15,7 @@ def __init__(self, *args,**kwargs):
         super(CGridDataset,self).__init__(*args, **kwargs)
 
     def _copy(self):
-        new = CGridDataset(self._filename, self._datasettype)
+        new = CGridDataset(self._filepath, self._datasettype)
         new._coordcache = copy.copy(self._coordcache)
         new._current_variables = copy.copy(self._current_variables)
         return new
@@ -86,4 +86,4 @@ def _get_data(self, var, indarray, use_local=False):
         elif ndims == 6:
             data = var[indarray[0], indarray[1], indarray[2],
                        indarray[3], indarray[4], indarray[5]]
-        return data
+        return data
diff --git a/paegan/cdm/grids/n_cell.py b/paegan/cdm/grids/n_cell.py
@@ -18,7 +18,7 @@ def __init__(self, *args,**kwargs):
             self.topology_var_name = None
 
     def _copy(self):
-        new = NCellDataset(self._filename, self._datasettype)
+        new = NCellDataset(self._filepath, self._datasettype)
         new._coordcache = copy.copy(self._coordcache)
         new._current_variables = copy.copy(self._current_variables)
         return new

diff --git a/paegan/cdm/grids/r_grid.py b/paegan/cdm/grids/r_grid.py
@@ -15,7 +15,7 @@ def __init__(self, *args,**kwargs):
         super(RGridDataset,self).__init__(*args, **kwargs)
 
     def _copy(self):
-        new = RGridDataset(self._filename, self._datasettype)
+        new = RGridDataset(self._filepath, self._datasettype)
         new._coordcache = copy.copy(self._coordcache)
         new._current_variables = copy.copy(self._current_variables)
         return new

diff --git a/paegan/cdm/grids/u_grid.py b/paegan/cdm/grids/u_grid.py
@@ -1,2 +1,2 @@
 class UGridDataset(object):
-    pass
+    pass
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
@@ -296,3 +296,13 @@ def test_rgrid_get_values(self):
         assert pd._datasettype == 'rgrid'
         values = pd.get_values(var="u", bbox=[-149, 59, -144, 61.5], timeinds=0)
         assert values.size > 0
+
+
+    @unittest.skipIf(not os.path.exists(os.path.join(data_path, "pws_das_2014012600.nc")),
+                     "Resource files are missing that are required to perform the tests.")
+    def test_aggregated_dataset(self):
+        datafile = os.path.join(data_path, "pws_das_20140126*.nc")
+        pd = CommonDataset.open(datafile)
+        assert pd._datasettype == 'rgrid'
+        values = pd.get_values(var="u", bbox=[-149, 59, -144, 61.5], timeinds=0)
+        assert values.size > 0