Source code for PseudoNetCDF.noaafiles._cdump

import numpy as np
from PseudoNetCDF import PseudoNetCDFFile
from datetime import datetime
from .arltime import arl2time

strptime = datetime.strptime

_packhdrfmt = np.dtype('>i,>S4,>i,>i')

_ijcdt = np.dtype(dict(names='IJC',
                       formats='>i2,>i2,>f'.split(',')))


[docs]class arlconcdump(PseudoNetCDFFile):
[docs]    @classmethod
    def isMine(cls, *args, **kwds):
        kwds.pop('metaonly', None)
        try:
            cls(*args, metaonly=True, **kwds)
            return True
        except Exception:
            return False

    def __init__(self, path, metaonly=False, vector=False):
        self._infile = open(path, 'rb')
        self._vector = vector
        self._infile.seek(0, 0)
        self._rec12345()
        if not metaonly:
            self._datarec()
        lat = self.createVariable(
            'latitude', 'f', ('latitude',), units='degrees_north',
            values=(self.LLCRNR_LAT + self.DELTA_LAT / 2 +
                    np.arange(self.NLATS) * self.DELTA_LAT)
        )
        lon = self.createVariable(
            'longitude', 'f', ('longitude',), units='degrees_east',
            values=(self.LLCRNR_LON + self.DELTA_LON / 2 +
                    np.arange(self.NLONS) * self.DELTA_LON)
        )
        self.createVariable(
            'latitude_bounds', 'f', ('latitude', 'nv'), units='degrees_north',
            values=np.array([lat - self.DELTA_LAT / 2,
                             lat + self.DELTA_LAT / 2]).T
        )
        self.createVariable(
            'longitude_bounds', 'f', ('longitude', 'nv'), units='degrees_east',
            values=np.array([lon - self.DELTA_LON / 2,
                             lon + self.DELTA_LON / 2]).T
        )
        if self._vector:
            tempf = self.subsetVariables(['latitude', 'latitude_bounds',
                                          'longitude', 'longitude_bounds',
                                          'time', 'time_bounds',
                                          'layer'])\
                        .sliceDimensions(latitude=self.variables['J'],
                                         longitude=self.variables['I'],
                                         time=self.variables['T'],
                                         layer=self.variables['K'])\
                        .renameDimensions(latitude='points',
                                          longitude='points',
                                          time='points',
                                          layer='points')
            for key in list(tempf.variables):
                self.variables[key] = tempf.variables[key]

    def _rec12345(self):
        """
        Record #1

        CHAR*4 Meteorological MODEL Identification
        INT*4 Meteorological file starting time (YEAR, MONTH, DAY, HOUR,
                                                 FORECAST-HOUR)
        INT*4 NUMBER of starting locations
        INT*4 Concentration packing flag (0=no 1=yes)
        """
        infile = self._infile
        rec1 = np.fromfile(
            infile, dtype='>i,>S4,>i,>i,>i,>i,>i,>i,>i,>i', count=1)[0]
        assert(rec1['f0'] == rec1['f9'])
        nloc = self.NSTARTLOCS = rec1['f7']
        rec1['f8'] == 1
        self.METMODEL = rec1['f1'].decode()
        self.MET_YEAR = rec1['f2']
        self.MET_MONTH = rec1['f3']
        self.MET_DAY = rec1['f4']
        self.MET_HOUR = rec1['f5']
        self.MET_FORECAST_HOUR = rec1['f6']
        self.STARTING_LOCATIONS = rec1['f7']
        self.PACKED = rec1['f8']

        """
        Record #2 Loop to record: Number of starting locations

        INT*4 Release starting time (YEAR, MONTH, DAY, HOUR)
        REAL*4 Starting location and height (LATITUDE, LONGITUDE, METERS)
        INT*4 Release starting time (MINUTES)
        """
        rec2 = np.fromfile(infile, dtype='>i,>4i,>3f,>i,>i', count=nloc)
        assert((rec2['f0'] == rec2['f4']).all())
        self.createDimension('starts', nloc)
        sy = self.createVariable('START_YEAR', 'i', ('starts',), units='year')
        sy[:] = rec2['f1'][:, 0]
        sm = self.createVariable(
            'START_MONTH', 'i', ('starts',), units='month')
        sm[:] = rec2['f1'][:, 1]
        sd = self.createVariable('START_DAY', 'i', ('starts',), units='day')
        sd[:] = rec2['f1'][:, 2]
        sh = self.createVariable('START_HOUR', 'i', ('starts',), units='hour')
        sh[:] = rec2['f1'][:, 3]
        slat = self.createVariable(
            'START_LAT', 'f', ('starts',), units='degrees_north')
        slat[:] = rec2['f2'][:, 0]
        slon = self.createVariable(
            'START_LON', 'f', ('starts',), units='degrees_east')
        slon[:] = rec2['f2'][:, 1]
        salt = self.createVariable(
            'START_ALT', 'f', ('starts',), units='meters')
        salt[:] = rec2['f2'][:, 2]

        """
        Record #3

        INT*4 Number of (LATITUDE-POINTS, LONGITUDE-POINTS)
        REAL*4 Grid spacing (DELTA-LATITUDE,DELTA-LONGITUDE)
        REAL*4 Grid lower left corner (LATITUDE, LONGITUDE)
        """
        rec3 = np.fromfile(infile, dtype='>i,>2i,>2f,>2f,>i', count=1)[0]
        assert(rec3['f0'] == rec3['f4'])

        self.NLATS = rec3['f1'][0]
        self.NLONS = rec3['f1'][1]
        self.DELTA_LAT = rec3['f2'][0]
        self.DELTA_LON = rec3['f2'][1]
        self.LLCRNR_LAT = rec3['f3'][0]
        self.LLCRNR_LON = rec3['f3'][1]
        """
        Record #4

        INT*4 NUMBER of vertical levels in concentration grid
        INT*4 HEIGHT of each level (meters above ground)
        """
        tmp = np.fromfile(infile, dtype='>i,>i', count=1)[0]
        nlays = self.NLAYS = tmp['f1']
        infile.seek(-8, 1)
        rec4 = np.fromfile(
            infile, dtype='>i,>i,>{}i,>i'.format(nlays), count=1)[0]
        assert(rec4['f0'] == rec4['f3'])
        self.createDimension('layer', nlays)
        var = self.createVariable('layer', 'i', ('layer',))
        var.units = 'meters agl'
        var[:] = rec4['f2']

        """
        Record #5

        INT*4 NUMBER of different pollutants in grid
        CHAR*4 Identification STRING for each pollutant
        """
        tmp = np.fromfile(infile, dtype='>i,>i', count=1)
        npols = self.NPOLS = tmp['f1'][0]
        if npols > 1 and self._vector:
            raise IOError('The vector option and multipollutant files ' +
                          'are not compatible')
        infile.seek(-8, 1)
        rec5 = np.fromfile(infile, dtype='>i,>i,>({},)S4,>i'.format(
            npols), count=1).squeeze()
        assert(rec5['f0'] == rec5['f3'])
        self.POLLUTANTS = b','.join(rec5['f2']).decode()

    def _datarec(self):
        infile = self._infile
        nlats = self.NLATS
        nlons = self.NLONS
        nlays = self.NLAYS
        npols = self.NPOLS
        pack = self.PACKED == 1
        now = infile.tell()
        infile.seek(0, 2)
        total = infile.tell()
        infile.seek(now, 0)
        datasize = total - now
        datasize
        thdr = np.dtype('>i,>6i,>i')
        nopackdfmt = '>({},{})f'.format(nlats, nlons)
        nopackfmt = np.dtype(
            dict(
                names=['B', 'POL', 'LAY', 'data', 'E'],
                formats=['>i', '>S4', '>i', nopackdfmt, '>i']
            )
        )
        outs = []
        pols = []
        lays = []
        starts = []
        stops = []
        ti = 0
        while infile.tell() != total:
            """
            Record #6 Loop to record: Number of output times

            INT*4 Sample start (YEAR MONTH DAY HOUR MINUTE FORECAST)
            """
            start = np.fromfile(infile, dtype=thdr, count=1)
            starts.append(start['f1'][0])
            """
            Record #7 Loop to record: Number of output times

            INT*4 Sample stop (YEAR MONTH DAY HOUR MINUTE FORECAST)
            """
            stop = np.fromfile(infile, dtype=thdr, count=1)
            stops.append(stop['f1'][0])
            """
            Record #8 Loop to record: Number levels, Number of pollutant types

            CHAR*4 Pollutant type identification STRING
            INT*4 Output LEVEL (meters) of this record

            No Packing (all elements)
            REAL*4 Concentration output ARRAY

            Packing (only non-zero elements)

            INT*4 Loop non-zero elements
            INT*2 First (I) index value
            INT*2 - Second (J) index value
            REAL*4 - Concentration at (I,J)**_
            """
            if pack:
                # c = np.zeros((npols, nlays, nlats, nlons), dtype='f')
                for pi in range(npols):
                    for li in range(nlays):
                        tmp = np.fromfile(infile, _packhdrfmt, count=1)
                        myp = tmp['f1'][0]
                        myl = tmp['f2'][0]
                        pols.append(myp)
                        lays.append(myl)
                        myc = tmp['f3'][0]
                        tmp = np.fromfile(infile,
                                          np.dtype([('data', _ijcdt, myc),
                                                    ('f1', '>i', 1)]), count=1)
                        tdata = tmp['data'][0]
                        Jidx = tdata['J'] - 1
                        Iidx = tdata['I'] - 1

                        # c[pi, li, Jidx, Iidx] = tdata['C']
                        if self._vector:
                            if myc > 0:
                                zl = np.array(np.ones_like(Jidx), ndmin=1)
                                outs.append([
                                    ti * zl, pi * zl, li * zl, Jidx,
                                    Iidx, tdata['C']]
                                )
                        else:
                            outs.append([ti, pi, li, Jidx, Iidx, tdata['C']])
            else:
                npblock = np.fromfile(
                    infile, dtype=nopackfmt, count=npols * nlays)
                c = npblock['data']
                outs.append(c.reshape(npols, nlays, nlats, nlons))
            ti += 1

        ntimes = ti
        if isinstance(outs[0], list):
            if self._vector:
                datablock = np.concatenate(
                    [
                        np.array(out, ndmin=2, dtype='f').reshape(6, -1)
                        for out in outs
                    ],
                    axis=1
                ).T
                npoints = datablock.shape[0]
            else:
                datablock = np.zeros((ntimes, npols, nlays, nlats, nlons),
                                     dtype='f')
                for ti, pi, li, ji, ii, v in outs:
                    datablock[ti, pi, li, ji, ii] = v
        else:
            datablock = np.stack(outs, axis=0)
        pols = np.array(pols).reshape(ntimes, npols, nlays)
        lays = np.array(lays).reshape(ntimes, npols, nlays)
        self.createDimension('time', ntimes)
        if self._vector:
            self.createDimension('points', npoints)
        self.createDimension('layer', nlays)
        self.createDimension('latitude', nlats)
        self.createDimension('longitude', nlons)
        self.createDimension('nv', 2)
        assert((lays[:, [0], :] == lays[:, :, :]).all())
        assert((pols[:, :, [0]] == pols[:, :, :]).all())
        if self._vector:
            poldims = ('points',)
            for key, idx in [('T', 0), ('J', -3), ('I', -2), ('K', -4)]:
                self.createVariable(
                    key, 'i', poldims,
                    units='0-based index',
                    values=datablock[:, idx].astype('i')
                )
        else:
            poldims = ('time', 'layer', 'latitude', 'longitude')
        for pi, pol in enumerate(pols[0, :, 0]):
            if self._vector:
                poldata = datablock[:, -1][datablock[:, 1] == pi]
            else:
                poldata = datablock[:, pi]
            var = self.createVariable(pol.decode(), 'f', poldims,
                                      values=poldata)
            var.units = 'arbitrary'
            var.description = pol.decode()

        time = self.createVariable(
            'time', 'd', ('time',),
            units='seconds since 1970-01-01 00:00:00+0000'
        )
        time_bounds = self.createVariable(
            'time_bounds', 'd', ('time', 'nv'),
            units='seconds since 1970-01-01 00:00:00+0000'
        )
        rdate = strptime('1970-01-01 00:00:00+0000', '%Y-%m-%d %H:%M:%S%z')
        sdt = np.array([(arl2time(YY, MM, DD, HH, mm) - rdate).total_seconds()
                       for YY, MM, DD, HH, mm, FF in starts])
        edt = np.array([(arl2time(YY, MM, DD, HH, mm) - rdate).total_seconds()
                       for YY, MM, DD, HH, mm, FF in stops])
        time_bounds[:, 0] = sdt
        time_bounds[:, 1] = edt
        time[:] = time_bounds.mean(1)


if __name__ == '__main__':
    print(arlconcdump.isMine('cdump24'))
    f = arlconcdump('cdump24')