Source code for PseudoNetCDF.core._functions

from __future__ import print_function, unicode_literals

__all__ = ['pncrename', 'manglenames', 'removesingleton', 'getvarpnc',
           'interpvars', 'extract_from_file', 'extract_lonlat', 'mask_vals',
           'slice_dim', 'reduce_dim', 'pncfunc', 'pncbo',
           'pncbfunc', 'pncexpr', 'seqpncbo', 'mesh_dim',
           'add_attr', 'convolve_dim', 'merge', 'stack_files', 'splitdim']
import sys
from warnings import warn
import numpy as np
from collections import OrderedDict
import functools

from ._files import PseudoNetCDFFile
from ._variables import PseudoNetCDFMaskedVariable, PseudoNetCDFVariable

# Functions to be available for pncexpr
from .. import userfuncs
import datetime

if sys.version_info.major == 2:
    def bytes(x, enc):
        return str(x)
else:
    unicode = str


# TODO: replace with dynamic values
_metakeys = ['time', 'layer', 'level', 'latitude', 'longitude',
             'time_bounds', 'latitude_bounds', 'longitude_bounds',
             'ROW', 'COL', 'LAY', 'TFLAG', 'ETFLAG']


def pncrename(ifile, type_old_new):
    outf = getvarpnc(ifile, None)
    t, o, n = type_old_new.split(',')
    if t in ('d', 'dimension'):
        outf.dimensions[n] = outf.dimensions[o]
        del outf.dimensions[o]
        for k, v in outf.variables.items():
            if o in v.dimensions:
                v.dimensions = tuple(
                    [n if d == o else d for d in v.dimensions])
    if t in ('v', 'variable'):
        outf.variables[n] = outf.variables[o]
        del outf.variables[o]

    return outf


_translator = {'-': '_', '$': 'S', ' ': '_', '+': '_add_', '(': '', ')': ''}


def manglenames(f, translator=_translator):
    outf = getvarpnc(f, None)
    varkeys = list(outf.variables.items())
    for k, var in varkeys:
        nk = k
        for olds, news in _translator.items():
            nk = nk.replace(olds, news)
        if nk != k:
            outf.variables[nk] = outf.variables[k]
            del outf.variables[k]
    return outf


def removesingleton(f, rd, coordkeys=None):
    if coordkeys is None:
        coordkeys = f.getCoords()

    outf = PseudoNetCDFFile()
    for propkey in f.ncattrs():
        setattr(outf, propkey, getattr(f, propkey))
    for dk, d in f.dimensions.items():
        unlim = d.isunlimited()
        ni = len(d)
        if ni != 1 or dk != rd:
            tempd = outf.createDimension(dk, ni)
            if unlim:
                tempd.setunlimited(True)

    for vk, v in f.variables.items():
        dims = tuple([dk for dk in v.dimensions if dk in outf.dimensions])
        sdims = tuple([dk for dk in enumerate(v.dimensions)
                       if dk[1] not in outf.dimensions])[::-1]
        propd = dict([(pk, getattr(v, pk)) for pk in v.ncattrs()])
        ov = outf.createVariable(vk, v.dtype.char, dims, **propd)
        outvals = v[...]
        for di, dk in sdims:
            outvals = outvals.take(0, axis=di)

        ov[...] = outvals[...]
    return outf


def getvarpnc(f, varkeys, coordkeys=None, copy=True):
    if coordkeys is None:
        coordkeys = f.getCoords()

    coordkeys = set(coordkeys)
    if varkeys is None:
        varkeys = list(f.variables.keys())
        varkeys = [k for k in varkeys if k not in coordkeys]
    else:
        newvarkeys = list(set(varkeys).intersection(f.variables.keys()))
        newvarkeys = [varkey for varkey in varkeys if varkey in newvarkeys]
        oldvarkeys = list(set(varkeys))
        oldvarkeys.sort()
        skipping = set(oldvarkeys).difference(newvarkeys)
        if len(skipping):
            warn('Skipping %s' % ', '.join(skipping))
        varkeys = newvarkeys

    if hasattr(f, 'copy'):
        outf = f.copy(props=True, dimensions=False,
                      variables=False, data=False)
    else:
        from PseudoNetCDF.sci_var import WrapPNC
        outf = WrapPNC(f).copy(props=True, dimensions=False,
                               variables=False, data=False)

    for varkey in varkeys:
        try:
            var = eval(varkey, None, f.variables)
        except Exception:
            var = f.variables[varkey]
        for dimk, dimv in zip(var.dimensions, var.shape):
            if dimk not in outf.dimensions:
                newdimv = outf.createDimension(dimk, dimv)
                if f.dimensions[dimk].isunlimited():
                    newdimv.setunlimited(True)
                coordkeys.add(dimk)
                try:
                    tempv = f.variables[dimk]
                    if hasattr(tempv, 'bounds'):
                        coordkeys.add(tempv.bounds.strip())
                except (ValueError, KeyError, AttributeError):
                    pass
        for coordk in coordkeys:
            if coordk in f.dimensions and coordk not in outf.dimensions:
                newdimv = outf.createDimension(
                    coordk, len(f.dimensions[coordk]))
                if f.dimensions[coordk].isunlimited():
                    newdimv.setunlimited(True)

        propd = dict([(k, getattr(var, k)) for k in var.ncattrs()])
        if hasattr(var[...], 'fill_value') and 'fill_value' not in propd:
            propd['fill_value'] = var[...].fill_value

        if 'values' in propd:
            propd['pvalues'] = propd['values']
            del propd['values']
        if 'name' in propd:
            if 'standard_name' not in propd:
                propd['standard_name'] = propd['name']
            del propd['name']
        vals = var[...]
        if copy:
            vals = vals.copy()
        outf.createVariable(varkey, var.dtype.char,
                            var.dimensions, values=vals, **propd)
    for coordkey in coordkeys:
        if coordkey in f.variables.keys():
            coordvar = f.variables[coordkey]
            propd = dict([(k, getattr(coordvar, k))
                          for k in coordvar.ncattrs()])
            outf.createVariable(coordkey, coordvar.dtype.char,
                                coordvar.dimensions,
                                values=coordvar[...], **propd)
            for dk in coordvar.dimensions:
                if dk not in outf.dimensions:
                    dv = outf.createDimension(dk, len(f.dimensions[dk]))
                    dv.setunlimited(f.dimensions[dk].isunlimited())

    return outf


[docs]def interpvars(f, weights, dimension, loginterp=[]): """ f - PseudoNetCDFFile weights - weights for new dimensions from old dimension dim(new, old) dimension - which dimensions will be reduced loginterp - iterable of keys to interp on log scale """ outf = PseudoNetCDFFile() outf.dimensions = f.dimensions.copy() if hasattr(f, 'groups'): outf.groups = OrderedDict() for grpk, grpv in f.groups.items(): outf.groups[grpk] = interpvars(grpv, weights, dimension) oldd = f.dimensions[dimension] didx, = [i for i, l in enumerate(weights.shape) if len(oldd) == l] newd = outf.createDimension(dimension, weights.shape[didx - 1]) newd.setunlimited(oldd.isunlimited()) for vark, oldvar in f.variables.items(): if dimension in oldvar.dimensions: dimidx = list(oldvar.dimensions).index(dimension) if hasattr(oldvar, '_FillValue'): kwds = dict(fill_value=oldvar._FillValue, missing_value=oldvar._FillValue) else: kwds = dict() newvar = outf.createVariable( vark, oldvar.dtype.char, oldvar.dimensions, **kwds) for ak in oldvar.ncattrs(): setattr(newvar, ak, getattr(oldvar, ak)) if len(weights.shape) <= len(oldvar.dimensions): weightslice = (None,) * (dimidx) + (Ellipsis,) + \ (None,) * len(oldvar.dimensions[dimidx + 1:]) else: weightslice = slice(None) varslice = (slice(None,),) * dimidx + (None,) weightsv = weights[weightslice] oldvarv = oldvar[varslice] if not (weightsv.ndim - 1) == oldvar.ndim: warn('Wrong number of dimensions for %s' % (vark,)) elif vark in loginterp: logv = np.ma.exp( (weightsv * np.ma.log(oldvarv)).sum(dimidx + 1)) newvar[:] = logv else: linv = (weightsv * oldvarv).sum(dimidx + 1) newvar[:] = linv else: outf.variables[vark] = oldvar return outf
def extract_from_file(f, lonlatfs, unique=False, gridded=None, method='nn', passthrough=True): from ..coordutil import getlonlatcoordstr lonlatcoordstr = "" for lonlatf in lonlatfs: lonlatcoordstr += getlonlatcoordstr(lonlatf) return extract_lonlat(f, lonlatcoordstr, unique=unique, gridded=gridded, method=method, passthrough=passthrough) def extract_lonlat(f, lonlat, unique=False, gridded=None, method='nn', passthrough=True): from PseudoNetCDF.sci_var import Pseudo2NetCDF try: from StringIO import StringIO as BytesIO except ImportError: from io import BytesIO import os outf = PseudoNetCDFFile() outf.dimensions = f.dimensions.copy() if hasattr(f, 'groups'): outf.groups = {} for grpk, grpv in f.groups.items(): outf.groups[grpk] = extract(grpv, lonlat) p2p = Pseudo2NetCDF() p2p.verbose = 0 p2p.addGlobalProperties(f, outf) if gridded is None: gridded = ( ('longitude' in f.dimensions and 'latitude' in f.dimensions) or ('COL' in f.dimensions and 'ROW' in f.dimensions) or ('x' in f.dimensions and 'y' in f.dimensions) ) if isinstance(lonlat, (str, )): lonlat = [lonlat] lonlatout = [] for ll in lonlat: if isinstance(ll, (str, )): try: if os.path.exists(ll): ll = open(ll, 'r').read().strip() except Exception as e: warn('Windows machines may have uncessary warnings; ' + str(e)) lonlatout.append(ll) lonlat = ('/'.join(lonlatout)) try: lons, lats = np.genfromtxt( BytesIO(bytes(lonlat.replace('/', '\n'), 'ASCII')), delimiter=',').T except Exception as e: print(str(e)) raise e outf.lonlatcoords = lonlat if not method == 'll2ij': longitude = f.variables['longitude'][:] latitude = f.variables['latitude'][:] latlon1d = longitude.ndim == 1 and latitude.ndim == 1 if method == 'll2ij': lonidxs, latidxs = f.ll2ij(lons, lats) def extractfunc(v, thiscoords): newslice = tuple([{'ROW': latidxs, 'COL': lonidxs, 'latitude': latidxs, 'longitude': lonidxs, 'points': latidxs, 'PERIM': latidxs}.get(d, slice(None)) for d in thiscoords]) if newslice == (): return v else: return v[:][newslice] elif method == 'nn': if latlon1d and gridded: latitude = latitude[(slice(None), None, None)] longitude = longitude[(None, slice(None), None)] else: latitude = latitude[Ellipsis, None] longitude = longitude[Ellipsis, None] lonlatdims = latitude.ndim - 1 londists = longitude - lons[(None,) * lonlatdims] latdists = latitude - lats[(None,) * lonlatdims] totaldists = ((latdists**2 + londists**2)**.5) if latlon1d and not gridded: latidxs, = lonidxs, = np.unravel_index( totaldists.reshape(-1, latdists.shape[-1]).argmin(0), totaldists.shape[:-1]) else: latidxs, lonidxs = np.unravel_index( totaldists.reshape(-1, latdists.shape[-1]).argmin(0), totaldists.shape[:-1]) def extractfunc(v, thiscoords): newslice = tuple([{'latitude': latidxs, 'longitude': lonidxs, 'points': latidxs, 'PERIM': latidxs}.get(d, slice(None)) for d in thiscoords]) if newslice == (): return v else: return v[:][newslice] elif method == 'KDTree': if latlon1d and gridded: longitude, latitude = np.meshgrid(longitude, latitude) from scipy.spatial import KDTree tree = KDTree(np.ma.array([latitude.ravel(), longitude.ravel()]).T) dists, idxs = tree.query(np.ma.array([lats, lons]).T) if latlon1d and not gridded: latidxs, = lonidxs, = np.unravel_index(idxs, latitude.shape) else: latidxs, lonidxs = np.unravel_index(idxs, latitude.shape) def extractfunc(v, thiscoords): newslice = tuple([{'latitude': latidxs, 'longitude': lonidxs, 'points': latidxs, 'PERIM': latidxs}.get(d, slice(None)) for d in thiscoords]) return v[newslice] elif method in ('linear', 'cubic'): from scipy.interpolate import LinearNDInterpolator from scipy.interpolate import CloughTocher2DInterpolator if method == 'cubic': interpclass = CloughTocher2DInterpolator else: interpclass = LinearNDInterpolator if latlon1d and gridded: longitude, latitude = np.meshgrid(longitude, latitude) points = np.array([longitude.ravel(), latitude.ravel()]).T def extractfunc(v, thiscoords): if 'latitude' not in thiscoords or 'longitude' not in thiscoords: return v newshape = [dl if d not in ('latitude', 'longitude') else -1 for di, (d, dl) in enumerate(zip(thiscoords, v.shape))] i1 = newshape.index(-1) if newshape.count(-1) > 1: i2 = newshape.index(-1, i1 + 1) assert(i1 == (i2 - 1)) newshape.pop(i2) i2df = interpclass(points, np.rollaxis( v.reshape(*newshape), i1, 0)) out = np.rollaxis( np.ma.array([i2df(lon, lat) for lat, lon in zip(lats, lons)]), 0, len(newshape)) return out latidxs = extractfunc(latitude, ('latitude', 'longitude')) elif method in ('cubic', 'quintic'): from scipy.interpolate import interp2d if latlon1d and gridded: longitude, latitude = np.meshgrid(longitude, latitude) def extractfunc(v, thiscoords): i2df = interp2d(latitude, longitude, v, kind=method) return np.ma.array([i2df(lat, lon) for lat, lon in zip(lats, lons)]) latidxs = extractfunc(latitude, '') else: raise ValueError('method must be: nn, KDTree') if unique: tmpx = OrderedDict() tmplonlat = outf.lonlatcoords.split('/') for lon, lat, lonlatstr in zip(lonidxs, latidxs, tmplonlat): if (lon, lat) not in tmpx: tmpx[(lon, lat)] = lonlatstr lonidxs, latidxs = np.array(tmpx.keys()).T outf.lonlatcoords_orig = outf.lonlatcoords outf.lonlatcoords = '/'.join([tmpx[k] for k in zip(lonidxs, latidxs)]) for k, v in f.variables.items(): try: coords = v.coordinates.split() except Exception: # special case for ioapi coords = [{'ROW': 'latitude', 'COL': 'longitude'}.get( tempdk, tempdk) for tempdk in v.dimensions] dims = v.dimensions outf.createDimension('points', len(latidxs)) if passthrough or 'longitude' in coords or 'latitude' in coords: try: del outf.variables[k] except Exception: pass newdims = [] if len(dims) != len(coords): thiscoords = dims else: thiscoords = coords for d, c in zip(dims, thiscoords): if ( d not in ('longitude', 'latitude') and c not in ('longitude', 'latitude') ): newdims.append(d) else: if 'points' not in newdims: newdims.append('points') newdims = tuple(newdims) newv = extractfunc(v, thiscoords) propd = dict([(ak, getattr(v, ak)) for ak in v.ncattrs()]) nv = outf.createVariable( k, v.dtype.char, newdims, values=newv, **propd) setattr(nv, 'coordinates', getattr( v, 'coordinates', ' '.join(coords))) for di, dk in enumerate(newdims): if dk not in outf.dimensions: outf.createDimension(dk, nv.shape[di]) return outf extract = extract_lonlat def mask_vals(f, maskdef, metakeys=_metakeys): mtype = maskdef.split(',')[0] mval = ','.join(maskdef.split(',')[1:]) if mtype == 'where': maskexpr = 'np.ma.masked_where(mask, var[:].view(np.ndarray))' # mask = eval(mval, None, f.variables) else: maskexpr = 'np.ma.masked_%s(var[:], %s)' % (mtype, mval) for varkey, var in f.variables.items(): if varkey not in metakeys: try: vout = eval(maskexpr) f.variables[varkey] = PseudoNetCDFMaskedVariable( f, varkey, var.dtype.char, var.dimensions, values=vout, **dict([(pk, getattr(var, pk)) for pk in var.ncattrs()])) except Exception as e: warn('Cannot mask %s: %s' % (varkey, str(e))) return f
[docs]def slice_dim(f, slicedef, fuzzydim=True): """ variables have dimensions (e.g., time, layer, lat, lon), which can be subset using: slice_dim(f, 'dim,start,stop,stride') e.g., slice_dim(f, 'layer,0,47,5') would sample every fifth layer starting at 0 """ inf = f historydef = "slice_dim(f, %s, fuzzydim = %s); " % (slicedef, fuzzydim) slicedef = slicedef.split(',') slicedef = [slicedef[0]] + list(map(eval, slicedef[1:])) if len(slicedef) == 2: slicedef.append(slicedef[-1] + 1) slicedef = (slicedef + [None, ])[:4] dimkey, dmin, dmax, dstride = slicedef if dimkey not in inf.dimensions: warn('%s not in file' % dimkey) return inf unlimited = inf.dimensions[dimkey].isunlimited() if fuzzydim: partial_check = [key for key in inf.dimensions if dimkey == key[:len(dimkey)] and key[len(dimkey):].isdigit()] for dimk in partial_check: inf = slice_dim(inf, '%s,%s,%s,%s' % (dimk, dmin, dmax, dstride)) from PseudoNetCDF.sci_var import Pseudo2NetCDF p2p = Pseudo2NetCDF(verbose=0) outf = PseudoNetCDFFile() p2p.addDimensions(inf, outf) p2p.addGlobalProperties(inf, outf) for varkey in inf.variables.keys(): var = inf.variables[varkey] if dimkey not in var.dimensions: p2p.addVariable(inf, outf, varkey) else: axis = list(var.dimensions).index(dimkey) vout = var[...].swapaxes( 0, axis)[dmin:dmax:dstride].swapaxes(0, axis) newlen = vout.shape[axis] newdim = outf.createDimension(dimkey, newlen) newdim.setunlimited(unlimited) outf.variables[varkey] = vout history = getattr(outf, 'history', '') history += historydef setattr(outf, 'history', history) return outf
def _getfunc(a, func): """ Get an approriate function that takes one optional keyword (axis) """ if not hasattr(func, '__call__'): if hasattr(a, func): outfunc = getattr(a, func) elif isinstance(a, np.ma.MaskedArray): outfunc = functools.partial(getattr(np.ma, func), a, axis=None, keepdims=True) # outfunc = lambda axis = None, keepdims = True: getattr( # np.ma, func)(a, axis=axis, keepdims=keepdims) elif hasattr(np, func): outfunc = functools.partial(getattr(np, func), a, axis=None, keepdims=True) # outfunc = lambda axis = None, keepdims = True: getattr( # np, func)(a, axis=axis, keepdims=keepdims) else: outfunc = functools.partial(eval(func), a, axis=None, keepdims=True) # outfunc = lambda axis = None, keepdims = True: eval( # func)(a, axis=axis, keepdims=keepdims) else: outfunc = functools.partial(np.apply_along_axis, func1d=func, arr=a, axis=None, keepdims=True) # outfunc = lambda axis = None, keepdims = True: np.apply_along_axis( # func1d=func, axis=axis, arr=a, keepdims=keepdims) return outfunc
[docs]def reduce_dim(f, reducedef, fuzzydim=True, metakeys=_metakeys): """ variable dimensions can be reduced using reduce_dim(file 'dim,function,weight') e.g., reduce_dim(layer,mean,weight). Weighting is not fully functional. """ from PseudoNetCDF.sci_var import Pseudo2NetCDF inf = f metakeys = [k for k in metakeys if k in inf.variables.keys()] historydef = "reduce_dim(f, %s, fuzzydim = %s, metakeys = %s); " % ( reducedef, fuzzydim, metakeys) import numpy as np if hasattr(reducedef, 'split') and hasattr(reducedef, 'count'): commacount = reducedef.count(',') reducevals = reducedef.split(',') else: commacount = len(reducedef) reducevals = reducedef if commacount == 3: dimkey, func, numweightkey, denweightkey = reducevals numweight = inf.variables[numweightkey] denweight = inf.variables[denweightkey] elif commacount == 2: dimkey, func, numweightkey = reducevals numweight = inf.variables[numweightkey] denweightkey = None elif commacount == 1: dimkey, func = reducevals numweightkey = None denweightkey = None if fuzzydim: partial_check = [key for key in inf.dimensions if dimkey == key[:len(dimkey)] and key[len(dimkey):].isdigit()] for dimk in partial_check: if commacount == 1: inf = reduce_dim(inf, '%s,%s' % (dimk, func),) elif commacount == 2: inf = reduce_dim(inf, '%s,%s,%s' % (dimk, func, numweightkey),) elif commacount == 3: inf = reduce_dim(inf, '%s,%s,%s,%s' % (dimk, func, numweightkey, denweightkey),) if dimkey not in inf.dimensions: warn('%s not in file' % dimkey) return inf p2p = Pseudo2NetCDF(verbose=0) outf = PseudoNetCDFFile() p2p.addDimensions(inf, outf) del outf.dimensions[dimkey] p2p.addGlobalProperties(inf, outf) # unlimited = inf.dimensions[dimkey].isunlimited() # outf.createDimension(dimkey, 1) # if unlimited: # outf.dimensions[dimkey].setunlimited(True) for varkey in inf.variables.keys(): var = inf.variables[varkey] if dimkey not in var.dimensions: p2p.addVariable(inf, outf, varkey) continue axis = list(var.dimensions).index(dimkey) # def addunitydim(var): # return var[(slice(None),) * (axis + 1) + (None,)] vreshape = var[slice(None)] # vreshape = addunitydim(var) if varkey not in metakeys: if numweightkey is None: vout = _getfunc(vreshape, func)(axis=axis, keepdims=True) elif denweightkey is None: wvar = var * \ np.array(numweight, ndmin=var.ndim)[ (slice(None),) * axis + (slice(0, var.shape[axis]),)] tmpslicenum = (slice(None),) * (axis + 1) + (None,) vout = getattr(wvar[tmpslicenum], func)(axis=axis) vout.units = (vout.units.strip() + ' * ' + numweight.units.strip()) if hasattr(vout, 'base_units'): vout.base_units = (vout.base_units.strip() + ' * ' + numweight.base_units.strip()) else: nwvar = var * \ np.array(numweight, ndmin=var.ndim)[ (slice(None),) * axis + (slice(0, var.shape[axis]),)] tmpslicenum = (slice(None),) * (axis + 1) + (None,) tmpsliceden = ((slice(None),) * axis + (slice(0, var.shape[axis]), None)) vout = ( getattr(nwvar[tmpslicenum], func)(axis=axis) / getattr(np.array(denweight, ndmin=var.ndim)[tmpsliceden], func)(axis=axis) ) else: if '_bounds' not in varkey and '_bnds' not in varkey: vout = _getfunc(vreshape, func)(axis=axis, keepdims=True) else: vout = _getfunc(vreshape, func)(axis=axis, keepdims=True) vmin = _getfunc(vreshape, 'min')(axis=axis, keepdims=True) vmax = _getfunc(vreshape, 'max')(axis=axis, keepdims=True) if 'lon' in varkey or 'time' in varkey: try: vout[..., [0, 3]] = vmin[..., [0, 3]] vout[..., [1, 2]] = vmax[..., [1, 2]] except Exception: vout[..., [0, 1]] = vmin[0, 0], vmax[0, 1] elif 'lat' in varkey: nmin = vout.shape[-1] // 2 vout[..., :nmin] = vmin[..., :nmin] vout[..., nmin:] = vmax[..., nmin:] if dimkey not in outf.dimensions: outdim = outf.createDimension(dimkey, vout.shape[axis]) outdim.setunlimited(inf.dimensions[dimkey].isunlimited()) nvar = outf.variables[varkey] = PseudoNetCDFMaskedVariable( outf, varkey, var.dtype.char, var.dimensions, values=vout) for k in var.ncattrs(): setattr(nvar, k, getattr(var, k)) history = getattr(outf, 'history', '') history += historydef setattr(outf, 'history', history) return outf
def pncfunc(func, ifile1, coordkeys=None, verbose=0): """ Perform function (func) on all variables in ifile1. The returned file (rfile) contains the result rfile = ifile1 <op> func can be a function or string """ from PseudoNetCDF.sci_var import Pseudo2NetCDF if coordkeys is None: coordkeys = ifile1.getCoords() # Copy infile1 to a temporary PseudoNetCDFFile p2p = Pseudo2NetCDF() p2p.verbose = verbose tmpfile = PseudoNetCDFFile() p2p.convert(ifile1, tmpfile) # For each variable, assign the new value # to the tmpfile variables. for k in tmpfile.variables.keys(): if k in coordkeys: continue outvar = tmpfile.variables[k] in1var = ifile1.variables[k] if not hasattr(func, '__call__'): if hasattr(in1var, func): outval = getattr(in1var, func)() elif '.' == func[:1]: outval = eval('in1var[:]' + func) else: outval = func(in1var[:]) outval = np.ma.filled(np.ma.masked_invalid(outval), -999) if outvar.ndim > 0: outvar[:] = outval else: outvar.itemset(outval) outvar.fill_value = -999 return tmpfile
[docs]def pncbo(op, ifile1, ifile2, coordkeys=None, verbose=0): """ Perform binary operation (op) on all variables in ifile1 and ifile2. The returned file (rfile) contains the result rfile = ifile1 <op> ifile2 op can be any valid operator (e.g., +, -, /, *, **, &, ||) """ tmpfile = ifile1.copy(props=True, dimensions=True, variables=False, data=False) if coordkeys is None: coordkeys = ifile1.getCoords() # For each variable, assign the new value # to the tmpfile variables. for k in ifile1.variables.keys(): in1var = ifile1.variables[k] if k in coordkeys: tmpfile.copyVariable(in1var, key=k) elif k not in ifile2.variables.keys(): warn('%s not found in ifile2; copied' % k) tmpfile.copyVariable(in1var, key=k) else: in2var = ifile2.variables[k] propd = dict([(ak, getattr(in1var, ak)) for ak in in1var.ncattrs()]) unit1 = getattr(in1var, 'units', 'unknown') unit2 = getattr(in2var, 'units', 'unknown') propd['units'] = '(%s) %s (%s)' % (unit1, op, unit2) outval = np.ma.masked_invalid( eval('in1var[...] %s in2var[...]' % op).view(np.ndarray)) outvar = tmpfile.createVariable( k, in1var.dtype.char, in1var.dimensions, fill_value=-999, values=outval) outvar.setncatts(propd) return tmpfile
def pncbfunc(func, ifile1, ifile2, coordkeys=None, verbose=0): """ Perform binary function (func) on all variables in ifile1 and ifile2. The returned file (rfile) contains the result rfile = ifile1 <op> ifile2 op can be any valid operator (e.g., +, -, /, *, **, &, ||) """ from PseudoNetCDF.sci_var import Pseudo2NetCDF if coordkeys is None: coordkeys = ifile1.getCoords() # Copy infile1 to a temporary PseudoNetCDFFile p2p = Pseudo2NetCDF() p2p.verbose = verbose tmpfile = PseudoNetCDFFile() p2p.convert(ifile1, tmpfile) # For each variable, assign the new value # to the tmpfile variables. for k in tmpfile.variables.keys(): if k in coordkeys: continue outvar = tmpfile.variables[k] in1var = ifile1.variables[k] if k not in ifile2.variables.keys(): warn('%s not found in ifile2' % k) continue in2var = ifile2.variables[k] outval = np.ma.filled(np.ma.masked_invalid( func(in1var[...], in2var[...])), -999) if outvar.ndim > 0: outvar[:] = outval else: outvar.itemset(outval) outvar.fill_value = -999 return tmpfile def _namemangler(k): k = k.replace('$', 'dollar') k = k.replace('-', 'hyphen') k = k.replace('(', 'lparen') k = k.replace(')', 'rparen') return k
[docs]def pncexpr(expr, ifile, verbose=0): """ Evaluate an arbitrary expression in the context of ifile.variables and add the result to the file with appropriate units. """ from PseudoNetCDF.sci_var import WrapPNC from symtable import symtable # Copy file to temporary PseudoNetCDF file comp = compile(expr, 'none', 'exec') # varkeys = [key for key in comp.co_names if key in ifile.variables] # getvarpnc(ifile, varkeys) varpnc = tmpfile = WrapPNC(ifile, None) # Get NetCDF variables as a dictionary with # names mangled to allow special characters # in the names vardict = dict([(_namemangler(k), varpnc.variables[k]) for k in varpnc.variables.keys()]) # Compile the expr symtbl = symtable(expr, '<pncexpr>', 'exec') symbols = symtbl.get_symbols() for symbol in symbols: key = symbol.get_name() if key in vardict: tmpvar = vardict[key] break else: tmpvar = PseudoNetCDFVariable(None, 'temp', 'f', ()) propd = dict([(k, getattr(tmpvar, k)) for k in tmpvar.ncattrs()]) dimt = tmpvar.dimensions # Add all used constants as properties # of the output file vardict['ifile'] = ifile vardict['infile'] = ifile vardict['np'] = np vardict['datetime'] = datetime for fname in dir(userfuncs): vardict[fname] = getattr(userfuncs, fname) exec('from scipy.constants import *', None, vardict) for k in ifile.ncattrs(): if k not in vardict: vardict[k] = getattr(ifile, k) # oldkeys = set(vardict.keys()) # Assign expression to new variable. exec(comp, None, vardict) # newkeys = set(vardict.keys()) # assignedkeys = [k for k in newkeys.difference(oldkeys) if k[:1] != '_'] assignedkeys = [s.get_name() for s in symbols if s.is_assigned()] assignedkeys = [k for k in assignedkeys if k in vardict] for key in assignedkeys: val = vardict[key] # if the output variable has no dimensions, there is likely a problem # and the output should be defined. if isinstance(val, (PseudoNetCDFVariable,)) and val.dimensions != (): tmpfile.variables[key] = val else: tmpfile.createVariable(key, val.dtype.char, dimt, values=val, **propd) return tmpfile
def seqpncbo(ops, ifiles, coordkeys=None): for op in ops: ifile1, ifile2 = ifiles[:2] newfile = pncbo(op=op, ifile1=ifile1, ifile2=ifile2, coordkeys=coordkeys) del ifiles[:2] ifiles.insert(0, newfile) return ifiles def mesh_dim(f, mesh_def): dimkey, meshfactor, aggfunc = mesh_def.split(',') meshfactor = float(meshfactor) def spread(a, n, axis): return a.repeat(n, axis) * meshfactor try: aggfunc = eval(aggfunc) except Exception: aggfunc = getattr(np, aggfunc) if meshfactor < 1.: oldres = int(1. / meshfactor) assert(1. / meshfactor == oldres) newres = 1 elif meshfactor > 1.: newres = int(meshfactor) assert(meshfactor == newres) oldres = 1 from PseudoNetCDF.MetaNetCDF import newresolution nrf = newresolution(f, dimension=dimkey, oldres=oldres, newres=newres, repeat_method=aggfunc, condense_method=aggfunc) f.dimensions[dimkey] = nrf.dimensions[dimkey] for k, v in f.variables.items(): if dimkey in v.dimensions: f.variables[k] = nrf.variables[k] return f def add_attr(f, attr_def): pieces = attr_def.split(',') att_nm, var_nm, mode, att_typ = pieces[:4] att_val = ','.join(pieces[4:]) if var_nm == 'global': var = f else: var = f.variables[var_nm] if not (att_typ == 'c' and isinstance(att_val, (str, unicode))): att_val = np.array(eval(att_val), dtype=att_typ) if mode in ('a',): att_val = np.append(getattr(var, att_nm, []), att_val) if mode in ('a', 'c', 'm', 'o'): setattr(var, att_nm, att_val) elif mode in ('d',): delattr(var, att_nm) else: raise KeyError('mode must be either a c m o or d') def convolve_dim(f, convolve_def): convolve_parts = convolve_def.split(',') dimkey = convolve_parts.pop(0) mode = convolve_parts.pop(0) weights = np.array(convolve_parts, dtype='f') outf = PseudoNetCDFFile() from PseudoNetCDF.pncgen import Pseudo2NetCDF p2p = Pseudo2NetCDF(verbose=0) p2p.addGlobalProperties(f, outf) p2p.addDimensions(f, outf) dim = outf.dimensions[dimkey] dim = outf.createDimension(dimkey, len( np.convolve(weights, np.arange(len(dim)), mode=mode))) dim.setunlimited(f.dimensions[dimkey].isunlimited()) for vark, var in f.variables.items(): lconvolve = dimkey in var.dimensions p2p.addVariable(f, outf, vark, data=not lconvolve) if lconvolve: axisi = list(var.dimensions).index(dimkey) values = np.apply_along_axis(func1d=lambda x_: np.convolve( weights, x_, mode=mode), axis=axisi, arr=var[:]) if isinstance(var[:], np.ma.MaskedArray): values = np.ma.masked_invalid(values) outf.variables[vark][:] = values return outf def merge(fs): outf = getvarpnc(fs[0], None) for f in fs[1:]: for p in f.ncattrs(): if p not in outf.ncattrs(): setattr(outf, p, getattr(f, p)) for d, v in f.dimensions.items(): if d not in outf.dimensions: nv = outf.createDimension(d, len(v)) nv.setunlimited(v.isunlimited()) for k, v in f.variables.items(): if k in outf.variables: if ( v.shape != outf.variables[k].shape or not (v[...] == outf.variables[k][...]).all() ): warn('%s already in output' % k) else: propd = dict([(p, getattr(v, p)) for p in v.ncattrs()]) outf.createVariable( k, v.dtype.char, v.dimensions, values=v, **propd) return outf
[docs]def stack_files(fs, stackdim, coordkeys=None): """ Create files with dimensions extended by stacking. Currently, there is no sanity check... """ f = PseudoNetCDFFile() tmpf = fs[0] if coordkeys is None: coordkeys = tmpf.getCoords() dimensions = [f_.dimensions for f_ in fs] shareddims = {} for dimk, dim in tmpf.dimensions.items(): if dimk == stackdim: continue dimlens = map(len, [dims[dimk] for dims in dimensions]) if all([len(dim) == i for i in dimlens]): shareddims[dimk] = len(dim) differentdims = [set(dims.keys()).difference( shareddims.keys()) for dims in dimensions] assert(all([different == set([stackdim]) for different in differentdims])) from PseudoNetCDF.sci_var import Pseudo2NetCDF p2p = Pseudo2NetCDF(verbose=0) p2p.addDimensions(tmpf, f) f.createDimension(stackdim, sum( [len(dims[stackdim]) for dims in dimensions])) p2p.addGlobalProperties(tmpf, f) for tmpf in fs: for varkey, var in tmpf.variables.items(): if stackdim not in var.dimensions: if varkey in f.variables: if varkey not in coordkeys: warn( ('Got duplicate variables for %s ' % varkey) + 'without stackable dimension; first value ' + 'retained') else: p2p.addVariable(tmpf, f, varkey, data=True) else: if varkey not in f.variables.keys(): axisi = list(var.dimensions).index(stackdim) values = np.ma.concatenate( [f_.variables[varkey][:] for f_ in fs], axis=axisi) p2p.addVariable(tmpf, f, varkey, data=False) f.variables[varkey][:] = values return f
def splitdim(inf, olddim, newdims, newshape): oldsize = len(inf.dimensions[olddim]) newsize = np.prod(newshape) if newsize != oldsize: raise ValueError('New shape, must match old dimension length: ' + '%d %d %s' % (oldsize, newsize, newshape)) if len(newdims) != len(newshape): raise ValueError('Shape and dimensions must match in length') from PseudoNetCDF.sci_var import Pseudo2NetCDF p2n = Pseudo2NetCDF() outf = PseudoNetCDFFile() for dk, d in inf.dimensions.items(): if dk == olddim: for dk, dl in zip(newdims, newshape): outf.createDimension(dk, dl) else: p2n.addDimension(inf, outf, dk) for vk, invar in inf.variables.items(): if olddim in invar.dimensions: outdims = [] outshape = [] for dk in invar.dimensions: if dk == olddim: outdims.extend(newdims) outshape.extend(newshape) else: outdims.append(dk) outshape.append(len(inf.dimensions[dk])) outvar = outf.createVariable(vk, invar.dtype.char, tuple(outdims)) p2n.addVariableProperties(invar, outvar) outvar[:] = invar[:].reshape(*outshape) else: p2n.addVariable(inf, outf, vk) return outf