Source code for PseudoNetCDF.core._files

__all__ = ['PseudoNetCDFFile', 'netcdf', 'PseudoNetCDFVariables']
import unittest
from PseudoNetCDF._getreader import registerreader
from PseudoNetCDF.netcdf import NetCDFFile, NetCDFVariable
from PseudoNetCDF.pncwarn import warn
from collections import OrderedDict
from ._dimensions import PseudoNetCDFDimension, PseudoNetCDFDimensions
from ._variables import PseudoNetCDFVariable, PseudoNetCDFMaskedVariable
import numpy as np


class OrderedDefaultDict(OrderedDict):
    def __init__(self, *args, **kwargs):
        if not args:
            self.default_factory = None
        else:
            if not (args[0] is None or callable(args[0])):
                raise TypeError('first argument must be callable or None')
            self.default_factory = args[0]
            args = args[1:]
        super(OrderedDefaultDict, self).__init__(*args, **kwargs)

    def __missing__(self, key):
        if self.default_factory is None:
            raise KeyError(key)
        self[key] = default = self.default_factory()
        return default


class PseudoNetCDFType(type):
    """
    Create a PseudoNetCDFType meta-class
    """
    def __init__(cls, name, bases, clsdict):
        pieces = str(cls).split('\'')[1].split('.')
        longname = '.'.join(
            [p for p in pieces[1:-1] if '_' != p[0] and p not in ('core',)] +
            [pieces[-1]])
        if len(cls.mro()) > 2:
            if name not in ('PseudoNetCDFFile', 'WrapPnc'):
                shortl = registerreader(name, cls)
                longl = registerreader(longname, cls)
                if not (shortl or longl):
                    warn('Not registered either as ' + name +
                         ' or ' + longname)
        super(PseudoNetCDFType, cls).__init__(name, bases, clsdict)


PseudoNetCDFSelfReg = PseudoNetCDFType('pnc', (object,), dict(__doc__='Test'))


[docs]class PseudoNetCDFFile(PseudoNetCDFSelfReg, object): """ PseudoNetCDFFile provides an interface and standard set of methods that a file should present to act like a netCDF file using the Scientific.IO.NetCDF.NetCDFFile interface. """
[docs] def getMap(self, maptype='basemap_auto', **kwds): """ Description Parameters ---------- maptype : string choices 'basemap', 'basemap_auto', 'cartopy' (not yet) basemap : attempts to open a basemap with only supplied kwds basemap_auto : automatically adds llcrnrlon,llcrnrlat,u rcrnrlon,urcrnrlat based on longitude_bounds **kwds : keywords for basemap or cartopy Returns ------- map : basemap or cartopy axis """ if maptype.startswith('basemap'): from PseudoNetCDF.coordutil import basemap_from_proj4 if maptype.endswith('_auto'): # Get edges for bounding lonb = self.variables['longitude_bounds'] latb = self.variables['latitude_bounds'] if lonb.ndim == 3: llcrnrlon = lonb[0, 0, 0] urcrnrlon = lonb[-1, -1, 2] elif lonb.ndim == 2: llcrnrlon = lonb[0, 0] urcrnrlon = lonb[-1, -1] elif lonb.ndim == 1: llcrnrlon = lonb[0] urcrnrlon = lonb[-1] if latb.ndim == 3: llcrnrlat = latb[0, 0, 0] urcrnrlat = latb[-1, -1, 2] elif latb.ndim == 2: llcrnrlat = latb[0, 0] urcrnrlat = latb[-1, -1] elif latb.ndim == 1: llcrnrlat = latb[0] urcrnrlat = latb[-1] edges = dict(llcrnrlon=llcrnrlon, llcrnrlat=llcrnrlat, urcrnrlon=urcrnrlon, urcrnrlat=urcrnrlat) kwds.update(edges) myproj = self.getproj(withgrid=True, projformat='proj4') return basemap_from_proj4(myproj, **kwds) elif maptype == 'cartopy': raise ValueError('cartopy is not yet implemented') else: raise ValueError( 'maptype must be basemap, basemap_auto, or cartopy')
[docs] def getproj(self, withgrid=False, projformat='pyproj'): """ Description Parameters ---------- withgrid : boolean use grid units instead of meters projformat : string 'pyproj' (default), 'proj4' or 'wkt' allows function to return a pyproj projection object or a string in the format of proj4 or WKT Returns ------- proj : string pyproj.Proj (wkt, proj4) or pyprojProj (pyproj) """ if projformat == 'pyproj': from PseudoNetCDF.coordutil import getproj return getproj(self, withgrid=withgrid) elif projformat == 'proj4': from PseudoNetCDF.coordutil import getproj4 return getproj4(self, withgrid=withgrid) elif projformat == 'wkt': from PseudoNetCDF.coordutil import getprojwkt return getprojwkt(self, withgrid=withgrid) else: raise ValueError('projformat must be pyproj, proj4 or wkt')
[docs] def ll2xy(self, lon, lat): """ Converts lon/lat to x distances (no false easting/northing) Parameters ---------- lon : scalar or iterable longitudes in decimal degrees lat : scalar or iterable latitudes in decimal degrees Returns ------- x, y : tuple of arrays coordinates in map projection (meters or radians) """ lon = np.asarray(lon) lat = np.asarray(lat) return self.getproj()(lon, lat)
def _getzdim(self): """ Heuristic function finds and returns the name of the vertical dimension based on common dimension names. Overwrite method for explicit control. Returns ------- result : {'LAY', 'bottom_top', 'level', 'lev', 'layer', 'lay', 'z'} Examples -------- >>> f = pnc.pncopen('', format='PseudoNetCDFFile') >>> f.createDimension('t', 4) >>> f.createDimension('x', 4) >>> f.createDimension('y', 4) >>> f.createDimension('z', 4) >>> f._getzdim() 'z' """ for dk in 'LAY bottom_top level lev layer lay z'.split(): if dk in self.dimensions: return dk else: raise KeyError('Could not find z dimensions') def _gettdim(self): """ Heuristic function finds and returns the name of the time dimension based on common dimension names. Overwrite method for explicit control. Returns ------- result : {'time', 'TSTEP', 't', 'Time'} Examples -------- >>> f = pnc.pncopen('', format='PseudoNetCDFFile') >>> f.createDimension('t', 4) >>> f.createDimension('x', 4) >>> f.createDimension('y', 4) >>> f.createDimension('z', 4) >>> f._gettdim() 't' """ for dk in 'time TSTEP t Time'.split(): if dk in self.dimensions: return dk else: raise KeyError('Could not find t dimensions') def _getydim(self): """ Heuristic function finds and returns the name of the y or width dimension based on common dimension names. Overwrite method for explicit control. Returns ------- result : {'latitude', 'lat', 'south_north', 'ROW', 'y'} Examples -------- >>> f = pnc.pncopen('', format='PseudoNetCDFFile') >>> f.createDimension('t', 4) >>> f.createDimension('x', 4) >>> f.createDimension('y', 4) >>> f.createDimension('z', 4) >>> f._getydim() 'y' """ for dk in 'latitude lat south_north ROW y'.split(): if dk in self.dimensions: return dk else: raise KeyError('Could not find y dimensions') def _getxdim(self): """ Heuristic function finds and returns the name of the x or height dimension based on common dimension names. Overwrite method for explicit control. Returns ------- result : {'longitude', 'lon', 'west_east', 'COL', 'x'} Examples -------- >>> f = pnc.pncopen('', format='PseudoNetCDFFile') >>> f.createDimension('t', 4) >>> f.createDimension('x', 4) >>> f.createDimension('y', 4) >>> f.createDimension('z', 4) >>> f._getydim() 'y' """ for dk in 'longitude long lon west_east COL x'.split(): if dk in self.dimensions: return dk else: raise KeyError('Could not find x dimensions')
[docs] def ll2ij(self, lon, lat, bounds='ignore', clean='none'): """ Converts lon/lat to 0-based indicies (0,M), (0,N) Parameters ---------- lon : scalar or iterable longitudes in decimal degrees lat : scalar or iterable latitudes in decimal degrees bounds : string ignore, error, warn if i,j are out of domain clean : string none - return values regardless of bounds; mask - mask values out of bounds; clip - return min(max(0, v), nx - 1) Returns ------- i, j : indices (0-based) for variables """ lon = np.asarray(lon) lat = np.asarray(lat) p = self.getproj(withgrid=True) x, y = p(lon, lat) i = np.asarray(x).astype('i') j = np.asarray(y).astype('i') nx = len(self.dimensions[self._getxdim()]) ny = len(self.dimensions[self._getydim()]) if bounds == 'ignore': pass else: lowi = (i < 0) lowj = (j < 0) highi = (i >= nx) highj = (j >= ny) outb = (lowi | lowj | highi | highj) nout = outb.sum() if nout > 0: message = '{} Points out of bounds; {}'.format( nout, np.where(outb)) if bounds == 'error': raise ValueError(message) else: warn(message) if clean == 'clip': i = np.minimum(np.maximum(i, 0), nx - 1) j = np.minimum(np.maximum(j, 0), ny - 1) elif clean == 'mask': i = np.ma.masked_greater(np.ma.masked_less(i, 0), nx - 1) j = np.ma.masked_greater(np.ma.masked_less(j, 0), ny - 1) return i, j
[docs] def xy2ll(self, x, y): """ Converts x, y to lon, lat (no false easting/northing) Parameters ---------- x : scalar or iterable projected west-east coordinates y : scalar or iterable projected south-north coordinates Returns ------- lon, lat : scalars or iterables longitudes and latitudes in decimal degrees """ x = np.asarray(x) y = np.asarray(y) p = self.getproj() lon, lat = p(x, y, inverse=True) return lon, lat
[docs] def ij2ll(self, i, j): """ Converts i, j to lon, lat (no false easting/northing) using cell centers assuming 0-based i/j Parameters ---------- i : scalar/iterable indicies (0-based) for the west-east dimension j : scalar/iterable indicies (0-based) for the south-north dimension Returns ------- lon, lat : scalars or iterables longitudes and latitudes in decimal degrees """ i = np.asarray(i) j = np.asarray(j) p = self.getproj(withgrid=True) lon, lat = p(i + 0.5, j + 0.5, inverse=True) return lon, lat
[docs] def date2num(self, time, timekey='time'): """ Parameters ---------- time : array-like array of datetime.datetime objects timekey : str time variable key which requires units and should have calendar. If calendar is missing, standard is the default. default 'time' Returns ------- num : array-like time in relative time as defined by units of time variable (i.e., timekey) which defaults to 'time' """ from netCDF4 import date2num try: from datetime import timezone utc = timezone.utc except ImportError: from datetime import tzinfo utc = tzinfo.utc time = np.asarray(time) # netCDF4 date2num is timezone naive; assumes UTC when not # specified and converts to UTC internally # so, if a tzinfo is involved, it should be removed if any([t.tzinfo is not None for t in time[:]]): time = np.array([ t.astimezone(utc).replace(tzinfo=None) for t in time[:] ]) timeunits = self.variables[timekey].units.strip() calendar = getattr(self.variables[timekey], 'calendar', 'standard') num = date2num(time, timeunits, calendar.strip()) return num
[docs] def time2idx(self, time, dim='time', timekey=None, **kwds): """ Convert datetime objects to dimension indices Parameters ---------- time : array-like array of datetime.datetime objects dim : str dimension name for val2idx timekey : str time variable key. None defaults to dim kwds : mappable see val2idx Returns ------- idx : array-like time index (0-based) """ if timekey is None: timekey = dim time = np.asarray(time) nums = self.date2num(time, timekey=timekey) return self.val2idx(dim=dim, val=nums, **kwds)
[docs] def time2t(self, time, ttype='nearest', index=True): """ Parameters ---------- time : array of datetime.datetime objects interp : 'nearest', 'bounds', 'bounds_close' index : return index Returns ------- t : fractional time or if index, integers for indexing """ warn( "time2t is deprecated; transition to time2idx or date2num", DeprecationWarning ) time = np.asarray(time) if ttype not in ('nearest', 'bounds', 'bounds_close'): warn('{} is not an option; defaulting to nearest'.format(ttype)) ttype = 'nearest' if ttype == 'nearest': mytimes = self.getTimes() else: mytimes = self.getTimes(bounds=True) idx = np.arange(mytimes.size) # Find minimum resoultion for res in [ 'microsecond', 'second', 'minute', 'hour', 'day', 'month', 'year' ]: minres = res resvals = [getattr(d, res) for d in time] myresvals = [getattr(d, res) for d in mytimes] if not np.allclose(myresvals, 0): break if not np.allclose(resvals, 0): break # Translate minimum resolution to numpy datetime if minres == 'microsecond': tu = 'datetime64[ns]' elif minres == 'second': tu = 'datetime64[s]' elif minres == 'minute': tu = 'datetime64[m]' elif minres == 'hour': tu = 'datetime64[h]' elif minres in ('year', 'month', 'day'): tu = 'datetime64[D]' else: tu = 'datetime64[ns]' # Convert input time to numpy datetime at resolution x = time.astype(tu).astype('d') # Convert file's time to numpy datetime at resolution xp = mytimes.astype(tu).astype('d') # Use interpolation methods with no bounding for nearest # and bounds_close if ttype in ('nearest', 'bounds_close'): out = np.interp(x, xp, idx) if index: imin = 0 imax = idx[-1] + (0 if ttype == 'nearest' else -1) out = np.minimum(np.maximum(out, imin), imax) if ttype == 'nearest': out = np.round(out, 0).astype('i') else: out = np.floor(out).astype('i') # Use interpolation methods with bounding for nearest else: out = np.interp(x, xp, idx, left=np.nan, right=np.nan) if index: out = np.ma.masked_less(np.ma.floor(out).astype('i'), 0) return out
[docs] def val2idx( self, dim, val, method='nearest', bounds='warn', left=None, right=None, clean='mask' ): """ Convert coordinate values to indices Parameters ---------- dim : str name of dimensions, which must have a coordinate variable val : array-like value in coordinate space method : str nearest, bounds, exact - each calculates the index differently - nearest : uses interp with coord values and rounds - bounds : uses interp between bounding values and truncates - exact : returns indices for exact coord values with other indices masked (clean keyword has no effect) bounds : str ignore, error, warn if i,j are out of domain left : scalar see np.interp right : scalar see np.interp clean : {'none', 'mask'} none - return values regardless of bounds; mask - mask invalid values (use with left/right=np.nan); has no affect with method exact Returns ------- i : array-like indices (0-based) for variables """ val = np.asarray(val) if method not in ('exact', 'nearest', 'bounds'): raise NotImplementedError(method + ' method not implemented') if bounds not in ('ignore', 'warn', 'error'): raise NotImplementedError(bounds + ' bounds not implemented') if clean not in ('none', 'mask'): raise NotImplementedError(clean + ' clean not implemented') dimv = self.variables[dim] dimvals = dimv[...] if dimvals.ndim > 1: raise ValueError( 'val2idx is only implemented for 1-D coordinate variables' ) if method == 'bounds': bounds_keys = [dim + '_bounds', dim + '_bnds'] if hasattr(dimv, 'bounds'): bounds_keys.insert(0, dimv.bounds) for dimbk in bounds_keys: if dimbk not in self.variables: continue dimbv = self.variables[dimbk] if dimbv.ndim == 1: dimevals = dimbv[:] elif dimbv.ndim == 2 and dimbv.shape[1] == 2: dimevals = np.append(dimbv[:, 0], dimbv[-1, 1]) else: raise ValueError( 'val2idx is only implemented for 1-D or 2-D bounds' + '; {} has {}'.format(dimbk, dimbv.shape) ) break else: warn('Approximating bounds for val2idx {}'.format(dim)) dval = np.diff(dimvals) / 2 start = dimvals[:1] end = dimvals[-1:] if (dval == dval[0]).all(): start -= dval[0] end += dval[-1] dimevals = np.concatenate([ start, dimvals[1:] - dval, end ]) else: dimevals = dimvals idx = np.arange(dimevals.size) ddimevals = np.diff(dimevals) if (ddimevals < 0).all(): dimevals[::-1] idx = idx[::-1] elif (ddimevals > 0).all(): pass else: raise ValueError('coordinate is neither ascending nor descending') # import pdb; pdb.set_trace() # left = dimevals[0] - 1 # right = dimevals[-1] + 1 fidx = np.interp(val, dimevals, idx, left=left, right=right) if method == 'bounds': if right is None or right == dimevals[-1]: fidx = np.minimum(fidx, dimvals.size - 1) if method == 'exact': fidx = np.ma.masked_where(~np.in1d(val, dimvals), fidx) if clean == 'mask': outfidx = np.ma.masked_invalid(fidx) else: outfidx = fidx if bounds != 'ignore': isleft = val < dimevals[0] isright = val > dimevals[-1] isout = isleft | isright outmesg = 'Values are out of bounds:\n{}'.format(val[isout]) if bounds == 'warn': warn(outmesg) elif bounds == 'error': raise ValueError(bounds) if method == 'nearest': outidx = np.round(outfidx, 0).astype('i') else: outidx = outfidx.astype('i') return outidx
[docs] def get_dest(self): """ Returns ------- path : str path where a new file is created on some action Notes ----- If None, a file is created in memory. Else, a netcdf file is created on disk. """ return getattr(self, '_destination', None)
[docs] def set_dest(self, path, **options): """ Sets the path where a new file is created on some action Arguments --------- path : str path for new file **options : keywords for constructor options for new file creation Returns ------- None """ options['filename'] = path return object.__setattr__(self, '_destination', options)
[docs] def get_varopt(self): """ Get options Arguments --------- None Returns ------- options : dictionary of options """ return getattr(self, '_varopt', {}).copy()
[docs] def set_varopt(self, **options): """ Set options to be used when creating any Variable Arguments --------- **options : options for createVariable optional keywords to be supplied when creating new variables in destination file Returns ------- None """ return object.__setattr__(self, '_varopt', options)
def _newlike(self): """ Internal function to return a file of the same class if a PseudoNetCDFFile """ if self.get_dest() is not None: outf = netcdf(**self.get_dest()) elif isinstance(self, PseudoNetCDFFile): outt = type(self) outf = outt.__new__(outt) else: outf = PseudoNetCDFFile() outf.set_varopt(**self.get_varopt()) return outf
[docs] def renameVariable(self, oldkey, newkey, inplace=False, copyall=True): """ Rename variable (oldkey) Parameters ---------- oldkey : string variable to be renamed newkey : string new dame for variable inplace : boolean create the new variable in this netcdf file (default False) copyall : boolean if not inplace, should all variables be copied to new file Returns ------- outf : PseudoNetCDFFile instance with renamed variable (this file if inplace = True) """ return self.renameVariables(**{oldkey: newkey})
[docs] def renameVariables(self, inplace=False, copyall=True, **newkeys): """ Rename variables for each oldkey: newkey dictionary item Parameters ---------- **newkeys : dictionary where key is the oldkey and value is the newkey inplace : boolean create the new variable in this netcdf file (default False) copyall :boolean if not inplace, should all variables be copied to new file Returns ------- outf : PseudoNetCDFFile instance with renamed variable (this file if inplace = True) """ if inplace: outf = self else: outf = self._copywith( props=True, dimensions=True, variables=copyall, data=copyall) for oldkey, newkey in newkeys.items(): outf.copyVariable(self.variables[oldkey], key=newkey) if oldkey in outf.variables: del outf.variables[oldkey] return outf
[docs] def renameDimension(self, oldkey, newkey, inplace=False): """ Rename dimension (oldkey) in dimensions and in all variables Parameters ---------- oldkey : string dimension to be renamed newkey : string new dame for dimension inplace : boolean create the new variable in this netcdf file (default False) Returns ------- outf : PseudoNetCDFFile instance with renamed variable (this file if inplace = True) """ return self.renameDimensions(inplace=inplace, **{oldkey: newkey})
[docs] def renameDimensions(self, inplace=False, **newkeys): """ Rename dimension (oldkey) in dimensions and in all variables Parameters ---------- **newkeys : dictionary where key is the oldkey and value is the newkey inplace : boolean create the new variable in this netcdf file (default False) Returns ------- outf : PseudoNetCDFFile instance with renamed variable (this file if inplace = True) """ if inplace: outf = self else: outf = self.copy() for oldkey, newkey in newkeys.items(): outf.dimensions[newkey] = outf.dimensions[oldkey] for k, v in outf.variables.items(): olddims = v.dimensions newdims = tuple([newkeys.get(dk, dk) for dk in olddims]) if newdims != olddims: v.dimensions = newdims for oldkey, newkey in newkeys.items(): del outf.dimensions[oldkey] return outf
[docs] def insertDimension(self, newonly=True, multionly=False, before=None, after=None, inplace=False, **newdims): """ Insert dimensions with keys and lengths from newdims Parameters ---------- **newdims : dictionary where key is the new dimension and value is the length newonly : boolean Only add dimension to variables that do not already have it, default True multionly : boolean Only add dimension if there are already more than one (good for ignoring coordinate dimensions) before : string if variable has this dimension, insert the new dimension before it. Otherwise, add to the beginning. (before takes precedence) after : string if variable has this dimension, insert the new dimension after it. Otherwise, add to the beginning. inplace : boolean create the new variable in this netcdf file (default False) Returns ------- outf : PseudoNetCDFFile instance will new dimension in dimensions and variables Notes ----- 1. Adding a non unity dimension will cause the data to be repeated along the new axis. 2. If order of addition matters, use multiple calls. newdimsuse will be a non-ordered dictionary """ if inplace: outf = self else: outf = self.copy(variables=False) for dk, dv in newdims.items(): if dk not in outf.dimensions: outf.createDimension(dk, dv) for vk, vv in self.variables.items(): vdims = list(vv.dimensions) if ( (newonly and dk in vdims) or (multionly and len(vdims) == 1) ): outf.copyVariable(vv, key=vk, withdata=True) continue ndims = [_dk for _dk in vdims] if before in vdims: bi = vdims.index(before) elif after in vdims: bi = vdims.index(after) + 1 elif not (before is None and after is None): outf.copyVariable(vv, key=vk, withdata=True) continue else: bi = 0 ndims.insert(bi, dk) var = outf.copyVariable( vv, key=vk, dimensions=ndims, withdata=False) var[...] = np.expand_dims(self.variables[vk][...], axis=bi) return outf
[docs] def reorderDimensions(self, oldorder, neworder, inplace=False): """ Evaluate expr and return a PseudoNetCDFFile object with resutl Parameters ---------- oldorder : iterable of strings dimension names in existing order neworder : iterable of strings dimension names in new order Returns ------- outf : PseudoNetCDFFile instance with dimensions reordered in variables """ if inplace: outf = self else: outf = self.copy(variables=True) oldorder = tuple(oldorder) neworder = tuple(neworder) for vk, vv in self.variables.items(): varneworder = [dk for dk in neworder if dk in vv.dimensions] varorder = [dk for dk in vv.dimensions] if len(varneworder) > 0: newvals = vv[:].copy() for newdi, newdk in enumerate(varneworder): axisidx = varorder.index(newdk) if axisidx == newdi: continue newvals = np.rollaxis(newvals, axis=axisidx, start=newdi) varorder.pop(axisidx) varorder.insert(newdi, newdk) assert(varorder == varneworder) newvals.dimensions = tuple(varorder) outf.variables[vk] = newvals else: pass return outf
[docs] def eval(self, expr, inplace=False, copyall=False): """ Evaluate expr and return a PseudoNetCDFFile object with resutl Parameters ---------- expr : string expression to evaluate inplace : boolean create the new variable in this netcdf file (default False) copyall : boolean if not inplace, should all variables be copied to new file Returns ------- outf : PseudoNetCDFFile instance with renamed variable (this file if inplace = True) """ import numpy as np # Copy file to temporary PseudoNetCDF file comp = compile(expr, 'none', 'exec') vardict = {k: v for k, v in self.variables.items()} for pk in self.ncattrs(): if pk not in vardict: vardict[pk] = getattr(self, pk) vardict['np'] = np vardict['self'] = self from symtable import symtable symtbl = symtable(expr, '<pncexpr>', 'exec') symbols = symtbl.get_symbols() for symbol in symbols: key = symbol.get_name() if key in vardict: tmpvar = vardict[key] if isinstance(tmpvar, (PseudoNetCDFVariable, NetCDFVariable)): break else: key = 'N/A' tmpvar = PseudoNetCDFVariable(None, 'temp', 'f', ()) if inplace: outf = self else: if copyall: outf = self.copy() else: newkeys = [key] outf = self.subsetVariables(newkeys) try: del outf.variables[key] except Exception: pass propd = dict([(k, getattr(tmpvar, k)) for k in tmpvar.ncattrs()]) propd['expression'] = expr dimt = tmpvar.dimensions vardict['outf'] = self # Assign expression to new variable. exec(comp, None, vardict) assignedkeys = [s.get_name() for s in symbols if s.is_assigned()] assignedkeys = [k for k in assignedkeys if k in vardict] for key in assignedkeys: try: del outf.variables[key] except Exception: pass val = vardict[key] # if the output variable has no dimensions, # there is likely a problem and the output # should be defined. if ( isinstance(val, (PseudoNetCDFVariable,)) and val.dimensions != () ): outf.variables[key] = val else: outf.createVariable(key, val.dtype.char, dimt, values=val, **propd) return outf
[docs] def mask( self, where=None, less=None, less_equal=None, greater=None, greater_equal=None, values=None, equal=None, invalid=False, mask=None, dims=None, fill_value=-999, coords=False, verbose=0 ): """ Apply mask to all variables of same shape or just where dimensions match. Parameters ---------- where : array-like boolean array to use as a mask see numpy.ma.masked_where greater : scalar mask when values are greater than this value see numpy.ma.masked_greater less : scalar mask when values are less than this value see numpy.ma.masked_less greater_equal : scalar mask when values are greater than or equal to this value see numpy.ma.masked_greater less_equal : scalar mask when values are less than or equal to this value see numpy.ma.masked_less values : scalar mask when values are equal to this value within standard floating point see numpy.ma.masked_values equal : scalar mask when values are exactly this value (i.e., integers) see numpy.ma.masked_equal invalid : boolean mask when values are invalid, see numpy.ma.masked_invalid mask : array-like alias for where dims : iterable of strings only apply "mask" or "where" to variables with these dimensions no effet on other masks fill_value : scalar value to use as the fill_value for new arrays coords : boolean if True, apply masks to coordinate variables. Default, False verbose : int level of verbosity for function, mostly for debugging Returns ------- outf : PseudoNetCDFFile instance with masked variables See Also -------- numpy.ma : all masks are passing throught to numpy.ma.masked_... Notes ----- mask options are not mutually exclusive. the order is where, greater, greater_equal, less, less_equal, values, equal, invalid """ from time import time if where is None and mask is not None: where = mask elif mask is not None and where is not None: raise ValueError( 'mask is an alias to where; supply one or the other, ' + 'but not both' ) if dims is None: maskdims = getattr(where, 'dimensions', dims) else: maskdims = dims coordkeys = self.getCoords() outf = self.copy(variables=False) nitems = len(self.variables.keys()) if verbose == 1: print('|' + '=' * nitems + '|', flush=True) print('|', end='', flush=True) for vk, vv in self.variables.items(): if verbose > 1: t0 = time() print(vk, end='', flush=True) elif verbose > 0: print('.', end='', flush=True) newvar = outf.copyVariable( vv, key=vk, fill_value=fill_value, withdata=False ) if vk in coordkeys and not coords: newvar[...] = vv[...] continue vals = vv[...] if where is not None: if ( maskdims == vv.dimensions or ( maskdims is None and where.shape == vals.shape ) ): vals = np.ma.masked_where(where, vals) # np.ma.putmask(newvar[...], mask==False, vv[...]) # vals = newvar[...] # else: # vals = vv[...] if greater is not None: vals = np.ma.masked_greater(vals, greater) if greater_equal is not None: vals = np.ma.masked_greater_equal(vals, greater_equal) if less is not None: vals = np.ma.masked_less(vals, less) if less_equal is not None: vals = np.ma.masked_less_equal(vals, less_equal) if values is not None: vals = np.ma.masked_values(vals, values) if equal is not None: vals = np.ma.masked_equal(vals, equal) if invalid: vals = np.ma.masked_invalid(vals) if verbose > 1: t1 = time() print(t1 - t0, end='\n', flush=True) newvar[...] = vals[...] if verbose > 1: pass elif verbose > 0: print('') return outf
[docs] def plot(self, varkey, plottype='longitude-latitude', ax_kw=None, plot_kw=None, cbar_kw=None, map_kw=None, dimreduction='mean'): """ Parameters ---------- varkey : string the variable to plot plottype : string longitude-latitude, latitude-pressure, longitude-pressure, vertical-profile, time-longitude, time-latitude, time-pressure default, longitude-latitude ax_kw : dictionary keywords for the axes to be created plot_kw : dictionary keywords for the plot (plot, scatter, or pcolormesh) to be created cbar_kw : dictionary keywords for the colorbar map_kw : dictionary keywords for the getMap routine, which is only used with plottype='longitude-latitude' dimreduction : string or function dimensions not being used in the plot are removed using applyAlongDimensions(dimkey=dimreduction) where each dimenions """ import matplotlib.pyplot as plt from ..coordutil import getbounds if ax_kw is None: ax_kw = {} if plot_kw is None: plot_kw = {} if cbar_kw is None: cbar_kw = {} if map_kw is None: map_kw = {} else: map_kw = map_kw.copy() apply2dim = {} var = self.variables[varkey] varunit = varkey if hasattr(var, 'units'): varunit += ' ' + var.units.strip() dimlens = dict([(dk, len(self.dimensions[dk])) for dk in var.dimensions]) dimpos = dict([(dk, di) for di, dk in enumerate(var.dimensions)]) xkey, ykey = plottype.split('-') if not ykey == 'profile': for dimkey in list(dimlens): if dimkey not in (xkey, ykey) and dimlens.get(dimkey, 1) > 1: apply2dim[dimkey] = dimreduction if len(apply2dim) > 0: myf = self.subsetVariables([varkey])\ .applyAlongDimensions(**apply2dim) var = myf.variables[varkey] dimlens = dict([(dk, len(self.dimensions[dk])) for dk in var.dimensions]) else: myf = self if ykey in ('profile',): vaxi = var.dimensions.index(xkey) vsize = var.shape[vaxi] vals = np.rollaxis(var[:], vaxi).reshape(vsize, -1) else: vals = var[:].squeeze() if xkey == 'time': xm = myf.getTimes() dx = np.diff(xm)[-1] x = np.append(xm, xm[-1] + dx) x = plt.matplotlib.dates.date2num(x) else: x = getbounds(myf, xkey) ax = plt.gca(**ax_kw) if ykey in ('profile',): y = getbounds(myf, xkey) x1 = vals[:].min(1) xm = vals[:].mean(1) x2 = vals[:].max(1) ax.fill_betweenx(y=y, x1=x1, x2=x2, label=varkey + '(min, max)') ax.plot(xm, y, label=varkey, **plot_kw) ax.set_ylabel(xkey) ax.set_xlabel(varunit) return ax if ykey == 'time': ym = myf.getTimes() dy = np.diff(ym)[-1] y = np.append(ym, ym[-1] + dy) y = plt.matplotlib.dates.date2num(y) else: y = getbounds(myf, ykey) if dimpos[xkey] < dimpos[ykey]: vals = vals.T p = ax.pcolormesh(x, y, vals, **plot_kw) cbar_kw.setdefault('label', varunit) ax.figure.colorbar(p, **cbar_kw) if xkey == 'time': ax.xaxis.set_major_formatter( plt.matplotlib.dates.AutoDateFormatter( plt.matplotlib.dates.AutoDateLocator() ) ) if ykey == 'time': ax.yaxis.set_major_formatter( plt.matplotlib.dates.AutoDateFormatter( plt.matplotlib.dates.AutoDateLocator() ) ) if plottype == 'longitude-latitude': try: map_kw = map_kw.copy() coastlines = map_kw.pop('coastlines', True) countries = map_kw.pop('countries', True) states = map_kw.pop('states', False) counties = map_kw.pop('counties', False) bmap = myf.getMap(**map_kw) if coastlines: bmap.drawcoastlines(ax=ax) if countries: bmap.drawcountries(ax=ax) if states: bmap.drawstates(ax=ax) if counties: bmap.drawcounties(ax=ax) except Exception: pass else: ax.set_xlabel(xkey) ax.set_ylabel(ykey) return ax
[docs] def setncatts(self, attdict): """ Set ncattrs from attdict keys and values Parameters ---------- attdict : dictionary key/value pairs of properties Returns ------- None """ for pk, pv in attdict.items(): setattr(self, pk, pv)
[docs] def getncatts(self): """ Return all ncattrs keys and values as a dictionary Returns ---------- attdict : dictionary key/value pairs of properties """ outd = OrderedDict() for pk in self.ncattrs(): outd[pk] = getattr(self, pk) return outd
[docs] @classmethod def from_ncvs(cls, *invars, **invarkw): """ Arguments --------- invars : list NetCDF-like variable must have standard_name, long_name or name invarkw : kwds NetCDF-like variables Returns ------- outf : PseudoNetcdf-like file """ outf = cls() for invar in invars: for dk, ds in zip(invar.dimensions, invar.shape): if dk in outf.dimensions: assert(ds == len(outf.dimensions[dk])) else: outf.createDimension(dk, ds) outf.copyVariable(invar) for inkey, invar in invarkw.items(): for dk, ds in zip(invar.dimensions, invar.shape): if dk in outf.dimensions: assert(ds == len(outf.dimensions[dk])) else: outf.createDimension(dk, ds) outf.copyVariable(invar, key=inkey) return outf
[docs] @classmethod def from_ncf(cls, infile): """ Arguments --------- infile : PseudoNetCDF-like file Returns ------- outf : PseudoNetcdf-like file """ outf = cls() for pk in infile.ncattrs(): pv = getattr(infile, pk) setattr(outf, pk, pv) for dk, dv in infile.dimensions.items(): outf.copyDimension(dv, key=dk) for vk, vv in infile.variables.items(): outf.copyVariable(vv, key=vk) return outf
def _copywith(self, props=True, dimensions=True, variables=False, data=False): """ Internal function for making copies of the same type Parameters ---------- props : boolean include properties (default: True) dimensions : boolean include dimensions (default: True) variables : boolean include variable structures (default: False) data : boolean include variable data (default: False) Returns ------- outf : PseudoNetCDFFile instance with some parts Notes ----- Internal function does not return variables by default. This is useful for functions like slice, apply, eval, etc. The _ in _copywith means this is a private function and the call interface may change. """ outf = self._newlike() outf._operator_exclude_vars = tuple(self._operator_exclude_vars) if props: for pk in self.ncattrs(): setattr(outf, pk, getattr(self, pk)) if dimensions: for dk, dv in self.dimensions.items(): outf.copyDimension(dv, key=dk) if variables: for vk, vv in self.variables.items(): outf.copyVariable(vv, key=vk, withdata=data) return outf
[docs] def copy(self, props=True, dimensions=True, variables=True, data=True): """ Function for making copies of the same type Parameters ---------- props : boolean include properties (default: True) dimensions : boolean include dimensions (default: True) variables : boolean include variable structures (default: True) data : boolean include variable data (default: True) Returns ------- outf : PseudoNetCDFFile instance """ return self._copywith(props=props, dimensions=dimensions, variables=variables, data=data)
[docs] def interpDimension(self, dimkey, newdimvals, coordkey=None, **interpkwds): """ Parameters ---------- dimkey : string the new dimension for interpolation newdimvals : iterable the new values to interpolate to coordkey : string the variable to use as the old coordinate values interptype : string 'linear' or 'conserve'. linear uses a linear interpolation conserve uses a mass conserving interpolation extrapolate : boolean allow extrapolation beyond bounds with linear, default False fill_value : numeric value set fill value (e.g, nan) to prevent extrapolation or edge continuation Returns ------- outf : PseudoNetCDFFile instance with all variables interpolated Notes ----- When extrapolate is false, the edge values are used for points beyond the inputs. """ from ..coordutil import getinterpweights if coordkey is None: olddimvals = self.variables[dimkey] else: olddimvals = self.variables[coordkey] if olddimvals.ndim == 1 and newdimvals.ndim == 1: weights = getinterpweights(olddimvals, newdimvals, **interpkwds) def interpd(data): if data.ndim == 1: newdata = (weights * data[:, None]).sum(0) else: newdata = (weights[None, :, :, None, None] * data[:, :, None]).sum(1) return newdata outf = self.applyAlongDimensions(**{dimkey: interpd}) else: outf = self.copy(props=True, dimensions=False, variables=False) olddim = olddimvals.dimensions newdim = newdimvals.dimensions if olddim != newdim: raise ValueError('Can only interpolate if coordinate ' + 'variable have the same named dimensions') dimaxis = olddim.index(dimkey) ndl = newdimvals.shape[dimaxis] for dk, dv in self.dimensions.items(): if dk == dimkey: dl = ndl else: dl = len(dv) outf.copyDimension(dv, key=dk, dimlen=dl) for vk, vv in self.variables.items(): nvv = outf.copyVariable( vv, key=vk, withdata=vv.dimensions != newdim) Ni, Nk = olddimvals.shape[:dimaxis], olddimvals.shape[dimaxis + 1:] s_ = np.s_ for ii in np.ndindex(Ni): for kk in np.ndindex(Nk): od = olddimvals[ii + s_[:, ] + kk] nd = newdimvals[ii + s_[:, ] + kk] weights = getinterpweights(od, nd, **interpkwds) for nvk, nvv in outf.variables.items(): if nvv.dimensions != newdim: continue vv = self.variables[nvk] interpedv = (weights * vv[ii + s_[:, ] + kk][:, None]).sum(0) nvv[ii + s_[..., ] + kk] = interpedv return outf
[docs] def applyAlongDimensions(self, verbose=0, **dimfuncs): """ Similar to numpy.apply_along_axis, but for damed dimensions and processes dimensions as well as variables Parameters ---------- dimfuncs : dictionary key value pairs where the key is a dimensions and the value is a 1D function (func1d) or a dictionary. If the value is a dictionary it must include func1d as a function and any keyword arguments as additional options verbose : integer 0 silent, 1 show variable, 2 show dimensions and variables Returns ------- outf : PseudoNetCDFFile instance with variables and dimensions after processing """ outf = self._copywith(props=True, dimensions=False) dimlens = OrderedDict() for dk, dv in self.dimensions.items(): dimlens[dk] = len(dv) for dk, df in dimfuncs.items(): dv = self.dimensions[dk] if dk in dimfuncs: if verbose > 1: print(dk, flush=True) if dk in self.variables: dvar = self.variables[dk] if dvar.ndim != 1: dvar = np.arange(len(dv)) else: dvar = np.arange(len(dv)) if isinstance(df, str): newdl = getattr(dvar[...], df)(keepdims=True).size else: newdl = df(dvar[:]).size else: newdl = len(dv) dimlens[dk] = newdl for dk, dv in self.dimensions.items(): newdl = dimlens[dk] outf.copyDimension(dv, key=dk, dimlen=newdl) if verbose == 1: print('|' + '=' * len(self.variables.keys()) + '|', flush=True) print('|', end='', flush=True) for vark, varo in self.variables.items(): vdims = varo.dimensions newvals = varo[...] dik = list(enumerate(vdims)) for di, dk in dik[::-1]: if dk in dimfuncs: if verbose > 1: print(' ' * 100, '\r', vark, dk, end='\r', flush=True) elif verbose > 0: print('.', end='', flush=True) opts = dict(axis=di, arr=newvals) dfunc = dimfuncs[dk] if isinstance(dfunc, dict): opts.update(dfunc) noopts = False else: opts['func1d'] = dfunc noopts = True if noopts and isinstance(dfunc, str): newvals = getattr(newvals, dfunc)( axis=di, keepdims=True) else: newvals = np.apply_along_axis(dfunc, di, newvals) newvaro = outf.copyVariable(varo, key=vark, withdata=False) newvaro[...] = newvals if verbose > 0: print() return outf
[docs] def getTimes(self, datetype='datetime', bounds=False): """ Get an array of datetime objects Parameters ---------- datetype : string or numpy.dtype 'datetime' or datetime64 dtype bounds : boolean get time boundaries Returns ------- out : array datetime objects or array of numpy's datetype type Notes ----- self must have a time or TFLAG variable """ from PseudoNetCDF.coordutil import _parse_ref_date from datetime import date, datetime, timedelta, timezone utc = timezone.utc _calendaryearlike = {'noleap': 1970, '365_day': 1970, 'all_leap': 1972, '366_day': 1972} if 'time' in self.variables.keys(): time = self.variables['time'] timeunits = time.units.strip() calendar = getattr(time, 'calendar', 'gregorian').lower() if bounds: if 'time_bounds' in self.variables.keys(): time = self.variables['time_bounds'] time = np.append(time[:, 0], time[-1, 1]) else: dts = np.diff(time) dt = dts.mean() if (dt != dts).all(): warn('Time bounds are approximate') time = np.append(time - dt / 2, time[-1] + dt / 2) if 'since' in timeunits: unit, base = timeunits.split(' since ') # Get the reference date refdate = _parse_ref_date(base) if calendar in _calendaryearlike: refyear = refdate.year # Get a year for relative day calculations yearlike = _calendaryearlike[calendar] # In that year, how many seconds and days are there yearseconds = (date(yearlike + 1, 1, 1) - date(yearlike, 1, 1)).total_seconds() yeardays = yearseconds / 3600 / 24 # Get a new reference date in yearlike crefdate = datetime(yearlike, 1, 1, tzinfo=utc) if refdate.month != 1 or refdate.day != 1: # Get start date in yearlike refcdate = datetime( yearlike, refdate.month, refdate.day, tzinfo=utc) # Calculate delta in years addyears = ( crefdate - refcdate).total_seconds() / yearseconds else: addyears = 0 # Convert time to fractional years, including change in # reference incrdenom = {'years': 1, 'days': yeardays, 'hours': yeardays * 24, 'minutes': yeardays * 24 * 60, 'seconds': yeardays * 24 * 60}[unit] fracyearincrs = time[:] / incrdenom + addyears # Split into years and days yearincrs = np.array(fracyearincrs // 1).astype('i') dayincrs = (fracyearincrs % 1) * yeardays # Add days to the calendar year reference cdays = [crefdate + timedelta(days=dayinc) for dayinc in dayincrs] try: # Combine calendar specific month and day with new year out = np.array([ datetime(refyear + yearinc, cday.month, cday.day, tzinfo=utc) for yearinc, cday in zip(yearincrs, cdays)]) except Exception: warn(('Years calculated from %d day year, but ' + 'month/days calculated for actual year. ' + 'Usually means data has Feb 29th in a non ' + 'leap year') % yeardays) out = np.array([ datetime(refyear + yearinc, 1, 1, tzinfo=utc) + timedelta(days=float(dayinc)) for yearinc, dayinc in zip(yearincrs, dayincrs)]) else: out = refdate + \ np.array([timedelta(**{unit: float(i)}) for i in time[:]]) else: return time elif 'TFLAG' in self.variables.keys(): dates = self.variables['TFLAG'][:][:, 0, 0] times = self.variables['TFLAG'][:][:, 0, 1] yyyys = (dates // 1000).astype('i') jjj = dates % 1000 hours = times // 10000 minutes = times % 10000 // 100 seconds = times % 100 days = jjj + (hours + minutes / 60. + seconds / 3600.) / 24. out = np.array([datetime(yyyy, 1, 1, tzinfo=utc) + timedelta(days=day - 1) for yyyy, day in zip(yyyys, days)]) if bounds: if hasattr(self, 'TSTEP'): tstep = getattr(self, 'TSTEP') sh = tstep // 10000 * 3600 sm = tstep % 10000 // 100 * 60 ss = tstep % 100 dt = timedelta(seconds=sh + sm + ss) else: dts = np.diff(out) dt = dts.mean() if (dt != dts).all(): warn('Time bounds are approximate') out = np.append(out, out[-1] + dt) elif hasattr(self, 'SDATE') and hasattr(self, 'STIME') and \ hasattr(self, 'TSTEP') and 'TSTEP' in self.dimensions: refdate = datetime.strptime( '%07d %06d+0000' % (self.SDATE, self.STIME), '%Y%j %H%M%S%z') tstepstr = '%06d' % self.TSTEP timeincr = timedelta(seconds=int(tstepstr[-2:])) + \ timedelta(minutes=int(tstepstr[-4:-2])) + \ timedelta(hours=int(tstepstr[:-4])) ntimes = len(self.dimensions['TSTEP']) if bounds: ntimes += 1 timeincrs = timeincr * np.arange(ntimes) out = refdate + timeincrs elif 'tau0' in self.variables.keys(): out = datetime(1985, 1, 1, 0, tzinfo=utc) + \ np.array([timedelta(hours=i) for i in self.variables['tau0'][:]]) if bounds and 'tau1' in self.variables.keys(): oute = ( datetime(1985, 1, 1, 0, tzinfo=utc) + np.array([ timedelta(hours=i) for i in self.variables['tau1'][:] ]) ) out = np.append(out, oute[-1]) else: raise ValueError('cannot understand time for file') if datetype == 'datetime': return out else: return np.array(out, dtype=datetype)
[docs] def stack(self, other, stackdim): """ Concatenates all variables on stackdim Parameters ---------- other : instance or list of PseudoNetCDFFiles files to add to this file along stackdim stackdim : str dimension name Returns ------- outf : PseudoNetCDFFile instance with stacked variables and dimension equal to new length """ from collections import Iterable outf = self._copywith(props=True, dimensions=False) if isinstance(other, Iterable): fs = [self] + list(other) else: fs = [self, other] dimensions = [f_.dimensions for f_ in fs] shareddims = {} for dimk, dim in self.dimensions.items(): if dimk == stackdim: continue dimlens = [len(dims[dimk]) for dims in dimensions] if all([len(dim) == i for i in dimlens]): shareddims[dimk] = len(dim) differentdims = [set(dims.keys()).difference( shareddims.keys()) for dims in dimensions] assert(all([different.union([stackdim]) == set([stackdim]) for different in differentdims])) for dimkey in shareddims: ind = self.dimensions[dimkey] outf.copyDimension(ind, key=dimkey) newdl = sum([len(dims[stackdim]) for dims in dimensions]) outf.copyDimension(self.dimensions[stackdim], key=stackdim, dimlen=newdl) for tmpf in fs: for varkey, var in tmpf.variables.items(): if stackdim not in var.dimensions: if varkey in outf.variables: if ( np.array_equal(outf.variables[varkey][...], var[...]) ): pass elif varkey not in self.dimensions: warn(('Got duplicate variables for %s without ' + 'stackable dimension; first value retained') % varkey) continue else: outvals = var[...] else: if varkey not in outf.variables.keys(): axisi = list(var.dimensions).index(stackdim) outvals = np.ma.concatenate( [f_.variables[varkey][:] for f_ in fs], axis=axisi) else: continue outvar = outf.copyVariable(var, key=varkey, withdata=False) outvar[...] = outvals return outf
[docs] def subsetVariables( self, varkeys, inplace=False, exclude=False, keepcoords=True ): """ Return a PseudoNetCDFFile with only varkeys Parameters ---------- varkeys : iterable of strings keys to keep inplace : boolean if true (default false), then remove other variable from this file exclude : boolean if True (default False), then remove just these variables keepcoords : boolean if True (default True), keep coordinate variables Returns ------- outf : PseudoNetCDFFile instance with variables """ if exclude: varkeys = list(set(list(self.variables)).difference(varkeys)) varkeys = varkeys + [k for k in self.getCoords() if k not in varkeys] if inplace: outf = self for varkey in list(outf.variables): if varkey not in varkeys: del outf.variables[varkey] else: outf = self._copywith(props=True, dimensions=True) for varkey in varkeys: varo = self.variables[varkey] newvaro = outf.copyVariable(varo, key=varkey, withdata=False) newvaro[...] = varo[...] return outf
[docs] def sliceDimensions(self, newdims=('POINTS',), verbose=0, **dimslices): """ Return a netcdflike object with dimensions sliced Parameters ---------- newdims : iterable of strings names for new dimensions. When more than one iterable applies to a variable slice, fancy indexing removes both dimensions and creates a new one of the iterable lengths **dimslices : dictionary key value pairs where the key is a dimension and the value is a valid slice object (slices, ints or iterables) if iterables are provided, all iterables must be the same size and shape. If the arrays are not 1D, newdims must have ndim names Returns ------- outf : PseudoNetCDFFile instance with variables and dimensions sliced """ outf = self._copywith(props=True, dimensions=False) newdimlens = OrderedDict() for dk, dv in self.dimensions.items(): newdimlens[dk] = len(dv) isarray = {dk: not np.isscalar(dv) and not isinstance( dv, slice) for dk, dv in dimslices.items()} anyisarray = np.sum(list(isarray.values())) > 1 if anyisarray: arraylens = np.array( [np.asarray(da).size for dk, da in dimslices.items() if isarray[dk]]) arrayshapes = np.array( [np.asarray(da).shape for dk, da in dimslices.items() if isarray[dk]]) arraylen = arraylens[0] arrayshape = arrayshapes[0] if ( not (arraylens == arraylen).all() or not (arrayshapes == arrayshape).all() ): raise ValueError( 'If slicing with arrays, they must all be the same size ' + 'and shape') for dk, ia in isarray.items(): if ia: dimslices[dk] = np.asarray(dimslices[dk]) for dk, ds in dimslices.items(): # if anyisarray and isarray[dk]: continue dv = self.dimensions[dk] if dk in dimslices: if dk in self.variables: dvar = self.variables[dk] else: dvar = np.arange(len(dv)) newdl = dvar[dimslices[dk]].size else: newdl = len(dv) newdimlens[dk] = newdl for dk, dl in newdimlens.items(): dv = self.dimensions[dk] outf.copyDimension(dv, key=dk, dimlen=dl) if anyisarray: for ni, newdim in enumerate(newdims): outf.createDimension(newdim, arrayshape[ni]) if verbose == 1: print('|' + '=' * len(self.variables.keys()) + '|', flush=True) print('|', end='', flush=True) for vark, varo in self.variables.items(): if verbose > 1: print(' ' * 100, '\r', vark, end='\r', flush=True) elif verbose > 0: print('.', end='', flush=True) odims = vdims = varo.dimensions sliceo = tuple(dimslices.get(dk, slice(None)) for dk in vdims) isdarray = [isarray.get(dk, False) for dk in vdims] needsfancy = sum(isdarray) > 1 if anyisarray and needsfancy: concatax = np.argmax(isdarray) odims = [dk for dk in vdims if not isarray.get(dk, False)] for newdim in newdims[::-1]: odims.insert(concatax, newdim) newvaro = outf.copyVariable( varo, key=vark, dimensions=odims, withdata=False) for pk in varo.ncattrs(): setattr(newvaro, pk, getattr(varo, pk)) if anyisarray and needsfancy: point_arrays = [] for ii in range(arraylen): sliceoi = [] for si in sliceo: if np.isscalar(si): sliceoi.append([si]) elif isinstance(si, slice): sliceoi.append(si) else: sliceoi.append(si.ravel()[ii]) sliceoi = tuple(sliceoi) point_arrays.append(np.expand_dims( varo[sliceoi], axis=concatax)) newvals = np.concatenate(point_arrays, axis=concatax) else: newvals = varo[sliceo] try: newvaro[...] = newvals except Exception: newvaro[...] = newvals.reshape(newvaro.shape) if verbose > 0: print() return outf
[docs] def removeSingleton(self, dimkey=None): """ Return a netcdflike object with dimensions sliced Parameters ---------- dimkey : string key of dimension to be evaluated for removal; if None, evaluate all. only singleton dimensions will be removed. Returns ------- outf : PseudoNetCDFFile instance with dimensions removed """ outf = self._copywith(props=True, dimensions=False) removed_dims = [] for dk, d in self.dimensions.items(): ni = len(d) if (dimkey is None or dk == dimkey) and ni == 1: removed_dims.append(dk) else: outf.copyDimension(d, key=dk, dimlen=ni) for vk, v in self.variables.items(): olddims = v.dimensions newdims = tuple( [dk for dk in v.dimensions if dk not in removed_dims]) sdims = tuple([(di, dk) for di, dk in enumerate( olddims) if dk not in newdims])[::-1] propd = dict([(pk, getattr(v, pk)) for pk in v.ncattrs()]) ov = outf.createVariable(vk, v.dtype.char, newdims, **propd) outvals = v[...] for di, dk in sdims: outvals = outvals.take(0, axis=di) ov[...] = outvals[...] return outf
def __repr__(self): from PseudoNetCDF.pncdump import pncdump import sys if sys.version_info.major == 3: from io import StringIO else: from StringIO import StringIO out = StringIO() pncdump(self, header=True, outfile=out) out.seek(0, 0) return out.read()
[docs] @classmethod def isMine(cls, *args, **kwds): """ True if this file or object can be identified for use by this class. Useful to override for classes that can be initialized from disk. """ return False
def __new__(mcl, *args, **kwds): new = super(PseudoNetCDFFile, mcl).__new__(mcl) new.variables = OrderedDict() new.dimensions = PseudoNetCDFDimensions() new._ncattrs = () new._operator_exclude_vars = () return new def __init__(self, *args, **properties): mode = properties.pop('mode', 'w') self._mode = mode for k, v in properties.items(): setattr(self, k, v) @classmethod def open_mfdataset(cls, *paths, stackdim=None, **kwds): files = [cls(p, **kwds) for p in paths] file1 = files[0] if stackdim is None: for dk, dv in file1.dimensions.items(): if dv.isunlimited(): stackdim = dk else: for dk in list(file1.dimensions): if dk in ('TSTEP', 'time', 'Time', 't'): stackdim = dk else: raise ValueError( 'No dimension is unlimited or time; ' + 'must specify stackdim' ) return file1.stack(files[1:], stackdim=stackdim) def iswritable(self): return (self._mode[:1] in ('a', 'w') or self._mode[:2] in ('r+',)) def __setattr__(self, k, v): if not (k[:1] == '_' or k in ('dimensions', 'variables', 'groups')): if k not in self._ncattrs: self._ncattrs += (k, ) object.__setattr__(self, k, v) def __delattr__(self, k): if k in self._ncattrs: self._ncattrs = tuple([k_ for k_ in self._ncattrs if k_ != k]) object.__delattr__(self, k)
[docs] def setCoords(self, keys, missing='ignore'): """ Set a variable as a coordinate variable Parameters ---------- keys : iterable of strings keys for coord variables missing : string action if missing 'ignore', 'skip' or 'error' ignore - add in case used later skip - do not add error - raise an error Returns ------- None Notes ----- Coordinate variables are excluded from math """ if missing == 'ignore': pass elif missing == 'skip': keys = [key for key in keys if key in self.variables] elif missing == 'error': invalid_keys = [key for key in keys if key not in self.variables] if len(invalid_keys) > 0: raise KeyError('File does not contain the variables: ' + '{}'.format(invalid_keys)) else: raise ValueError( 'Must be ignore, skip or error; received {}'.format(missing)) new = set(self._operator_exclude_vars + tuple(keys)) try: self._operator_exclude_vars = new except Exception: return new
[docs] def getCoords(self): """ Return a list of coordkeys """ return tuple([k for k in self._operator_exclude_vars])
[docs] def createDimension(self, name, length): """ Create a dimension Parameters ---------- name : string name for dimension length : integer maximum length of dimension Returns ------- dim : PseudoNetCDFDimensions new dimension """ dim = self.dimensions[name] = PseudoNetCDFDimension(self, name, length) return dim
def copyDimension(self, dim, key=None, dimlen=None, unlimited=None): if dimlen is None: dimlen = len(dim) if key is None: key = dim.name if unlimited is None: unlimited = dim.isunlimited() if isinstance(self, netcdf): if unlimited: ndv = self.createDimension(key, None) else: ndv = self.createDimension(key, dimlen) else: ndv = self.createDimension(key, dimlen) ndv.setunlimited(unlimited) return ndv
[docs] def copyVariable(self, var, key=None, dtype=None, dimensions=None, fill_value=None, withdata=True): """ Copy var into self as vark Parameters ---------- var : PseudoNetCDFVariable netCDF4.Variable-like object (must have ncattrs and setncatts) key : string key for variable in self (can be omitted if var has name, standard_name, or long_name) dtype : string or numpy.dtype change the data type to dtype dimensions : iterable of strings change the dimensions to dimensions fill_value : integer or flaot change the fill_value to this values withdata : boolean default True, copies data Returns ------- myvar : PseuodNetCDFVairable copy of var in this file """ if key is None: for propk in ['name', 'standard_name', 'long_name']: if hasattr(var, propk): key = getattr(var, propk) break else: raise AttributeError( 'varkey must be supplied because var has no name, ' + 'standard_name or long_name') if dtype is None: dtype = var.dtype if dimensions is None: dimensions = var.dimensions if fill_value is None: for pk in ('fill_value', 'missing_value', '_FillValue'): fill_value = getattr(var, pk, None) if fill_value is not None: break myvar = self.createVariable( key, dtype, dimensions, fill_value=fill_value) attrs = OrderedDict() for propk in var.ncattrs(): attrs[propk] = getattr(var, propk) myvar.setncatts(attrs) if withdata: try: myvar[:] = var[:] except Exception: myvar[...] = var[...] return myvar
[docs] def createVariable(self, name, type, dimensions, fill_value=None, **properties): """ Create a variable Parameters ---------- name : string name for new variable type : string or numpy dtype code (e.g., 'f', 'i', 'd') dimensions : tuple of strigns dimension keys that can be found in objects' dimensions dictionary Returns ------- var : new variable """ import numpy as np varopt = self.get_varopt() if fill_value is None: fill_value = varopt.get('fill_value', None) for pk, pv in varopt.items(): if pk != 'fill_value': properties.setdefault(pk, pv) if fill_value is None: for pk in 'missing_value _FillValue'.split(): fill_value = properties.get(pk, None) if fill_value is not None: break else: properties['fill_value'] = fill_value if type == 'S': type = 'c' if ( isinstance(properties.get('values', 1), np.ma.MaskedArray) or fill_value is not None ): var = self.variables[name] = PseudoNetCDFMaskedVariable( self, name, type, dimensions, **properties) else: var = self.variables[name] = PseudoNetCDFVariable( self, name, type, dimensions, **properties) return var
[docs] def close(self): """ Does nothing. Implemented for continuity with Scientific.IO.NetCDF """ pass
def dump(self, *args, **kwds): from PseudoNetCDF.pncdump import pncdump pncdump(self, *args, **kwds)
[docs] def save(self, *args, **kwds): """ Provides access to pncwrite for self Parameters ---------- see Help pncwrite Returns ------- see Help pncwrite """ from PseudoNetCDF import pncwrite return pncwrite(self, *args, **kwds)
def ncattrs(self): return self._ncattrs def setncattr(self, k, v): return setattr(self, k, v) def delncattr(self, k): self.__delattr__(k) def __add__(self, lhs): from ._functions import pncbo return pncbo(op='+', ifile1=self, ifile2=lhs, verbose=0, coordkeys=self._operator_exclude_vars) def __sub__(self, lhs): from ._functions import pncbo return pncbo(op='-', ifile1=self, ifile2=lhs, verbose=0, coordkeys=self._operator_exclude_vars) def __mul__(self, lhs): from ._functions import pncbo return pncbo(op='*', ifile1=self, ifile2=lhs, verbose=0, coordkeys=self._operator_exclude_vars) def __truediv__(self, lhs): from ._functions import pncbo return pncbo(op='/', ifile1=self, ifile2=lhs, verbose=0, coordkeys=self._operator_exclude_vars) def __floordiv__(self, lhs): from ._functions import pncbo return pncbo(op='//', ifile1=self, ifile2=lhs, verbose=0, coordkeys=self._operator_exclude_vars) def __pow__(self, lhs): from ._functions import pncbo return pncbo(op='**', ifile1=self, ifile2=lhs, verbose=0, coordkeys=self._operator_exclude_vars) def __and__(self, lhs): from ._functions import pncbo return pncbo(op='&', ifile1=self, ifile2=lhs, verbose=0, coordkeys=self._operator_exclude_vars) def __or__(self, lhs): from ._functions import pncbo return pncbo(op='|', ifile1=self, ifile2=lhs, verbose=0, coordkeys=self._operator_exclude_vars) def __xor__(self, lhs): from ._functions import pncbo return pncbo(op='^', ifile1=self, ifile2=lhs, verbose=0, coordkeys=self._operator_exclude_vars) def __mod__(self, lhs): from ._functions import pncbo return pncbo(op='%', ifile1=self, ifile2=lhs, verbose=0, coordkeys=self._operator_exclude_vars) def __lt__(self, lhs): from ._functions import pncbo return pncbo(op='<', ifile1=self, ifile2=lhs, verbose=0, coordkeys=self._operator_exclude_vars) def __gt__(self, lhs): from ._functions import pncbo return pncbo(op='>', ifile1=self, ifile2=lhs, verbose=0, coordkeys=self._operator_exclude_vars) def __eq__(self, lhs): if isinstance(lhs, (NetCDFFile, PseudoNetCDFFile)): from ._functions import pncbo return pncbo(op=' == ', ifile1=self, ifile2=lhs, verbose=0, coordkeys=self._operator_exclude_vars) else: return lhs.__eq__(self) def __le__(self, lhs): from ._functions import pncbo return pncbo(op='<=', ifile1=self, ifile2=lhs, verbose=0, coordkeys=self._operator_exclude_vars) def __ge__(self, lhs): from ._functions import pncbo return pncbo(op='>=', ifile1=self, ifile2=lhs, verbose=0, coordkeys=self._operator_exclude_vars) def __ne__(self, lhs): from ._functions import pncbo return pncbo(op='!=', ifile1=self, ifile2=lhs, verbose=0, coordkeys=self._operator_exclude_vars) slice = sliceDimensions apply = applyAlongDimensions subset = subsetVariables sync = close flush = close
_ncvarkwds = [ 'varname', 'datatype', 'dimensions', 'zlib', 'complevel', 'shuffle', 'fletcher32', 'contiguous', 'chunksizes', 'endian', 'least_significant_digit', 'fill_value' ] class netcdf(PseudoNetCDFFile, NetCDFFile): def createDimension(self, *args, **kwds): return NetCDFFile.createDimension(self, *args, **kwds) def createVariable(self, *args, **kwds): for pk, pv in self.get_varopt().items(): kwds.setdefault(pk, pv) propkwds = OrderedDict() for pk in set(kwds).difference(_ncvarkwds): propkwds[pk] = kwds.pop(pk) outval = NetCDFFile.createVariable(self, *args, **kwds) for pk, pv in propkwds.items(): outval.setncattr(pk, pv) return outval def setncattr(self, k, v): return NetCDFFile.setncattr(self, k, v) def __setattr__(self, k, v): return NetCDFFile.__setattr__(self, k, v) def __delattr__(self, k): NetCDFFile.__delattr__(self, k) def __new__(cls, *args, **kwds): return NetCDFFile.__new__(cls, *args, **kwds) def setCoords(self, keys, missing='ignore'): new = PseudoNetCDFFile.setCoords(self, keys, missing=missing) self.__dict__['_operator_exclude_vars'] = tuple(set(new)) def __init__(self, *args, **kwds): NetCDFFile.__init__(self, *args, **kwds) self.__dict__['_mode'] = kwds.get('mode', 'r') self.__dict__['_operator_exclude_vars'] = () self.__dict__['_varopt'] = {} @property def _mode(self): return self.__dict__.get('_mode', 'r') def get_dest(self): """ Returns the path where a new file is created on some action If None, a file is created in memory. Else, a netcdf file is created on disk. """ return self.__dict__.get('_destination', None) def set_dest(self, path, **options): """ Sets the path where a new file is created on some action Arguments --------- path : path for new file options : options for new file creation Returns ------- None """ options['filename'] = path self.__dict__['_destination'] = options def get_varopt(self): """ Get options Arguments --------- None Returns ------- options : dictionary of optiosn """ return self.__dict__.get('_varopt', {}).copy() def set_varopt(self, **options): """ Set options to be used when creating any Variable Arguments --------- options : options for new Variable creation Returns ------- None """ self.__dict__['_varopt'] = options def ncattrs(self): return NetCDFFile.ncattrs(self) @classmethod def from_ncf(cls, infile): outf = PseudoNetCDFFile() for pk in infile.ncattrs(): pv = getattr(infile, pk) setattr(outf, pk, pv) for dk, dv in infile.dimensions.items(): outf.copyDimension(dv, key=dk) for vk, vv in infile.variables.items(): outf.copyVariable(vv, key=vk) return outf def _newlike(self): """ Internal function to return a file of the same class if a PsueoNetCDFFile """ if self.get_dest() is not None: outf = netcdf(**self.get_dest()) else: outf = PseudoNetCDFFile() outf.set_varopt(**self.get_varopt()) return outf @classmethod def isMine(cls, path, *args, **kwds): """ True if this file or object can be identified for use by this class. Useful to override for classes that can be initialized from disk. """ tmpf = open(path, 'rb') tmpf.seek(0, 0) cdftest = tmpf.read(3) tmpf.seek(1, 0) hdftest = tmpf.read(3) tmpf.close() if cdftest == b'CDF': return True elif hdftest == b'HDF': try: cls(path, *args, **kwds) return True except Exception: return False else: return False def close(self): try: return NetCDFFile.close(self) except Exception as e: warn(str(e)) def __del__(self): self.close() registerreader('nc', netcdf) registerreader('ncf', netcdf)
[docs]class PseudoNetCDFVariables(OrderedDefaultDict): """ PseudoNetCDFVariables provides a special implementation of the default dictionary that provides efficient access to variables of a PseudoNetCDFFile. PseudoNetCDFFiles may have large variables that should only be loaded if accessed. PseudoNetCDFVariables allows a user to specify a function that can create variables on demand. """ def __init__(self, func, keys): """ func: function must take a key and provides a PseudoNetCDFVariable keys: iterable of strings keys that the dictionary should act as if it has """ super(PseudoNetCDFVariables, self).__init__() self.__func = func self.__keys = keys def __missing__(self, k): """ If the dictionary does not have a key, check if the user has provided that key. If so, call the user specifie function to create the variable. """ if k in self.keys(): return self.__func(k) else: raise KeyError('missing "%s"' % (k, ))
[docs] def addkey(self, k): """ Allow the user to extend keys after the object has been created. """ if k not in self.__keys: self.__keys.append(k)
[docs] def keys(self): return tuple( self.__keys + [k for k in dict.keys(self) if k not in self.__keys])
def __iter__(self): for k in self.keys(): yield k def __len__(self): return len([k for k in self.keys()])
[docs] def items(self): return [(k, self[k]) for k in self.keys()]
def __contains__(self, k): return k in [k for k in self.keys()]
if __name__ == '__main__': unittest.main()