Source code for qnd.ncf

"""QnD netCDF3 interface."""
from __future__ import absolute_import

import sys
import weakref
from collections import OrderedDict
from warnings import warn

from numpy import (dtype, prod, fromfile, asarray, array, zeros, concatenate,
                   ascontiguousarray, int64)
from numpy.core.defchararray import decode as npdecode, encode as npencode

from .frontend import QGroup
from .generic import opener
from .utils import leading_args

__all__ = ['opennc']

PY2 = sys.version_info < (3,)
if PY2:
    range = xrange  # noqa

    def itemsof(d): return d.iteritems()  # noqa
else:
    basestring = str

    def itemsof(d): return d.items()  # noqa


[docs]def opennc(filename, mode='r', auto=1, **kwargs):
    """Open netCDF-3 file returning a QnD QGroup.

    A netCDF-3 file differs from other self-describing binary file formats
    because no data addresses can be known until every variable to be
    stored is declared.  Therefore, when writing a netCDF-3 file, you
    must declare every variable before you can begin writing anything.

    The qnd API is somewhat at odds with this semantics because it encourages
    you to declare and write each variable in a single step.  The native
    netCDF-3 API forces you to declare everything, then call an `enddef`
    method to complete all variable declarations and permit you to begin
    writing data.  The qnd.ncf backend uses the first call to the ordinary
    qnd `flush` method to emulate the netCDF-3 `enddef` mode switch -- thus
    nothing will be written to the file until the first call to `flush`.
    To minimize the difference between ncf and other qnd backends, if you
    do use the usual qnd declare-and-write idiom, the ncf backend will save
    the variable value in memory until the first `flush` call, which will
    trigger the actual writing of all such saved values.

    Note that closing the file flushes it, so that is also a viable way to
    finish a netCDF-3 file.  Furthermore, when you overwrite any record
    variable in `recording` mode, ncf will implicitly `flush` the file,
    since no new variables can be declared after that.

    Note that you use the standard QnD API, a copy of every variable
    you write to the file until you begin the second record will be
    kept in memory, which could potentially be a problem.  If you wish
    to declare all variables before writing anything, so that your
    code is aligned with the netCDF API, do something like this::

       f = opennc("myfile??.nc", "w")  # wildcards expand to 00, 01, 02, ...
       # declare non-record variables from in-memory arrays
       f.nrvar1 = nrvar1.dtype, nrvar1.shape
       f.nrvar2 = nrvar2.dtype, nrvar2.shape
       # declare record variables from in-memory arrays
       f.recording(1)
       f.rvar1 = rvar1.dtype, rvar1.shape
       f.rvar2 = rvar2.dtype, rvar2.shape
       # flushing the file is equivalent to netCDF ENDDEF mode switch
       f.flush()
       # now write the current values of all the variables
       f.nrvar1 = nrvar1
       f.nrvar2 = nrvar2
       # writing the record variables writes their values for first record
       f.rvar1 = rvar1
       f.rvar2 = rvar2
       # change values of record variables and write the second record
       f.rvar1 = rvar1
       f.rvar2 = rvar2
       # when you've written all records, close the file
       f.close()

    Parameters
    ----------
    filename : str
       Name of file to open.  See notes below for file family.
    mode : str
       One of 'r' (default, read-only), 'r+' (read-write, must exist),
       'a' (read-write, create if does not exist), 'w' (create, clobber if
       exists), 'w-' (create, fail if exists).
    auto : int
       The intial state of auto-read mode.  If the QGroup handle returned
       by openh5 is `f`, then ``f.varname`` reads an array variable, but not
       a subgroup when auto=1, the default.  With auto=0, the variable
       reference reads neither (permitting later partial reads in the case
       of array variables).  With auto=2, a variable reference recursively
       reads subgroups, bringing a whole tree into memory.
    **kwargs
       Other keywords.  The maxsize keyword sets the size of files in a
       family generated in recording==1 mode; a new file will begin when
       the first item in a new record would begin beyond `maxsize`.  The
       default maxsize is 128 MiB (134 MB).  The v64 keyword, if provided
       and true, causes new files to be created using the 64-bit netCDF
       format; the default is to create 32-bit files.  (But a file family
       always uses a single format.)
       The nextaddr_mode keyword can be used to indicate whether the next
       new record in 'a' or 'r+' mode should go into a new file.  The
       default behavior is that it should, which is the pdbf module default;
       this is nextaddr_mode true.  Use nextaddr_mode=0 to continue filling
       the final existing file until maxsize.

    Returns
    -------
    f : QGroup
       A file handle implementing the QnD interface.

    Notes
    -----
    The `filename` may be an iterable, one string per file in order.  The
    sequence may extend beyond the files which actually exist for 'r+', 'a',
    'w', or 'w-' modes.

    Alternatively `filename` specifies a family if it contains shell globbing
    wildcard characters.  Existing matching files are sorted first by length,
    then alphabetically (ensuring that 'file100' comes after 'file99', for
    example).  If there is only a single wildcard group, it also serves to
    define a sequence of future family names beyond those currently existing
    for 'r+', 'a', 'w', or 'w-' modes.  A '?' pattern is treated the same as
    a '[0-9]' pattern if all its matches are digits or if the pattern
    matches no existing files.  Similarly, a '*' acts like the minimum number
    of all-digit matches, or three digits if there are no matches.

    """
    maxsize = kwargs.pop('maxsize', 134217728)
    v64 = kwargs.pop('v64', False)
    mode = mode.lower()
    if mode.startswith('a') or mode.startswith('r+'):
        nextaddr_mode = kwargs.pop('nextaddr_mode', 2) or 1
    else:
        nextaddr_mode = 1
    kwargs['nexaddr_mode'] = nextaddr_mode
    handle, n = opener(filename, mode, **kwargs)
    root = NCGroup(handle, maxsize, v64)
    for i in range(n):
        try:
            ncparse(handle, root, i)
        except IOError:
            # Something went terribly wrong.  If this is first file, we die.
            name = handle.filename(i)
            if not i:
                raise IOError("Fatal errors opening netCDF file {}"
                              "".format(name))
            handle.open(i-1)
            warn("file family stopped by incompatible {}".format(name))
    handle.callbacks(root.flusher, root.initializer)  # may call initializer
    return QGroup(root, auto=auto)


# https://www.unidata.ucar.edu/software/netcdf/docs/
#                              file_format_specifications.html
# All numbers are in XDR (big-endian) format.
#
# header    = magic  numrecs  dim_list  gatt_list  var_list
# magic     = 'C'  'D'  'F'  version
# version   = '\x01' (32-bit offset) | '\x02' (64-bit offset)
# numrecs   = NON_NEG | STREAMING
# dim_list  = ABSENT | 0x00 00 00 0A  NON_NEG  dim*
# gatt_list = att_list
# var_list  = ABSENT | 0x00 00 00 0B  NON_NEG  attr*
# att_list  = ABSENT | 0x00 00 00 0C  NON_NEG  var*
# ABSENT    = 0x00 00 00 00  0x00 00 00 00
# STREAMING = 0xFF FF FF FF
# dim       = name  NON_NEG   (0 length means record dimension)
# name      = NON_NEG  namestring   (0 padded to 4 byte boundary, _.@+-)
# attr      = name  nc_type  NON_NEG  values  (0 padded to 4 byte boundary)
# nc_type   = 1|2|3|4|5|6   (byte|char|short|int|float|double)
# var       = name  NON_NEG  dimid*  att_list  nc_type  vsize  OFFSET
# dimid     = 0-origin index into dim_list
# vsize     = >i4 number of bytes, or 2**32-1 if more than 4GiB
#             write vsize as if padded, but if only 1 record variable of
#             nc_type byte, char, or short, do not use padding
#             - for record variables, byte size of entire record (as if padded)
# OFFSET    = >i4 for version 1, >i8 for version 2
#
# Default fill values:
# char \x00, byte \x81, short \x80 01, int \x80 00 00 01
# float \x7C F0 00 00, double \x47 9E 00 00 00 00 00 00  =9.969209968386869e36
#
# The netCDF-3 header _almost_ has a simple XDR description; the only
# problem is that an attribute attr definition may have a value which is
# a counted array of short (2 byte integers), which XDR does not support.
# (The 64-bit format requires a hack to represent the offset values, and
# its own XDR specification using that hack.)

def ncparse(handle, root, ifile):
    i4be = _netcdf_stypes[3]
    if ifile:
        if not root.nrecs:
            raise IOError("first file in apparent family has no record vars")
        f = handle.open(ifile - 1)
        headsize = root.headsize
        f.seek(0)
        static0 = f.read(headsize)
        f = handle.open(ifile)
        magic = f.read(4)
        if magic == static0[:4]:
            nrecs = int(fromfile(f, i4be, 1)[0])
            static1 = f.read(headsize - 8)
        else:
            static1 = nrecs = None
        if static1 != static0[8:]:
            raise IOError("static variables do not match previous file")
        if nrecs == -1:
            f.seek(0, 2)
            nrecs = (f.tell() - headsize) // root.recsize
        root.nrecs.append(nrecs)
        return
    f = handle.open(ifile)
    magic = fromfile(f, 'S4', 1)[0]
    version = magic[3:]  # in python3, magic[3] is int(1) != b'\x01'
    if magic[:3] != b'CDF' or version not in b'\x01\x02':
        raise IOError("bad magic in netCDF-3 header")
    v64 = version != b'\x01'
    iobe = dtype('>i8') if v64 else i4be
    nrecs = int(fromfile(f, i4be, 1)[0])  # -1 indicates STREAMING
    tag, count = fromfile(f, i4be, 2)
    if tag != 10 and (count or tag):
        raise IOError("bad dim_list in netCDF-3 header")
    dims, recid = [], None
    while count > 0:
        count -= 1
        name = _get_name(f)
        size = int(fromfile(f, i4be, 1)[0])
        if not size:
            recid = len(dims)
        dims.append((name, size))
    attrs = [(None, _get_attrs(f))]
    tag, count = fromfile(f, i4be, 2)
    if tag != 11 and (count or tag):
        raise IOError("bad dim_list in netCDF-3 header")
    variables, recsize, special_case = OrderedDict(), 0, 0
    recaddr = lastaddr = None
    nrecvar = 0
    while count > 0:
        count -= 1
        name = _get_name(f)
        ndim = int(fromfile(f, i4be, 1)[0])
        shape = tuple(fromfile(f, i4be, ndim).astype(int)) if ndim else ()
        attrs.append((name, _get_attrs(f)))
        nctype = int(fromfile(f, i4be, 1)[0])
        if nctype < 1 or nctype > 6:
            raise IOError("bad nc_type (not in 1-6) in netCDF-3 header")
        stype = _netcdf_stypes[nctype - 1]
        fromfile(f, i4be, 1)  # ignore vsize
        offset = int(fromfile(f, iobe, 1)[0])
        # Note: offset is the byte address of the variable in the file
        #       - byte address of first block of a record variable
        if offset < 0:
            raise IOError("bad variable offset in netCDF-3 header")
        unlim = shape and shape[0] == recid
        if unlim:
            shape = shape[1:]
        try:
            sshape = tuple(dims[i][0] for i in shape)
        except IndexError:
            raise IOError("bad dimension index in netCDF-3 header")
        shape = tuple(dims[i][1] for i in shape)
        item = NCLeaf(root, len(variables), offset, stype, shape, sshape)
        variables[name] = itemx = NCList(root, item) if unlim else item
        if unlim:
            itemx.count += nrecs
            nrecvar += 1
            if nrecvar == 1:
                nbytes = stype.itemsize
                if nbytes & 3:
                    if shape:
                        nbytes *= prod(shape) if shape else 1
                    if nbytes & 3:
                        special_case = nbytes
            recsize += _measure_item(item)
            if recaddr is None or offset < recaddr:
                recaddr = offset
        elif lastaddr is None or offset >= lastaddr:
            lastaddr = offset + _measure_item(item)
    if nrecvar == 1 and special_case:
        # Implement special rule for byte, char, or short single record
        # variable; such records are not forced to 4 byte boundaries.
        recsize = special_case
    headsize = f.tell()
    if nrecs == -1 and recsize:
        # Handle special streaming record count by using file size.
        f.seek(0, 2)
        size = f.tell()
        f.seek(headsize)
        nrecs = (size - recaddr) // recsize
    root.variables = variables
    root.dims = OrderedDict(dims)
    root.attrs = OrderedDict(attrs)
    root.headsize = headsize
    root.recaddr = recaddr or lastaddr or headsize
    root.recsize = recsize
    root.nrecs.append(nrecs)
    root.v64 = v64


def _get_name(f):
    nchar = int(fromfile(f, '>i4', 1)[0])
    rem = nchar & 3
    ntot = nchar + 4 - rem if rem else nchar
    name = fromfile(f, 'S1', ntot)[:nchar].view('S' + str(nchar))
    return _bytes_as_str(name)


def _bytes_as_str(text):
    if hasattr(text, 'ravel'):
        text = text.ravel()[0]
    if isinstance(text, bytes):
        need_unicode = False
        if PY2:
            try:
                text.decode('ascii')
            except UnicodeDecodeError:
                need_unicode = True
        else:
            need_unicode = True
        if need_unicode:
            try:
                text = text.decode('utf8')
            except UnicodeDecodeError:  # ignore, but violates netCDF-3 spec
                text = text.decode('latin1')
    return text


def _text_as_bytes(text):
    if hasattr(text, 'ravel'):
        text = text.ravel()[0]
    return text if isinstance(text, bytes) else text.encode('utf8')


def _get_attrs(f):
    i4be = _netcdf_stypes[3]
    tag, count = fromfile(f, i4be, 2)
    if tag != 12 and (count or tag):
        raise IOError("bad attr_list in netCDF-3 header")
    attrs = []
    while count > 0:
        count -= 1
        name = _get_name(f)
        nctype = int(fromfile(f, i4be, 1)[0])
        if nctype < 1 or nctype > 6:
            raise IOError("bad nc_type (not in 1-6) in netCDF-3 header")
        if nctype == 2:
            values = _get_name(f)
        else:
            nvalues = int(fromfile(f, i4be, 1)[0])
            stype = _netcdf_stypes[nctype - 1]
            values = fromfile(f, stype, nvalues)
            rem = values.nbytes & 3
            if rem:
                fromfile(f, 'u1', 4 - rem)
            if values.size == 1:
                values = values[0]
            if not stype.isnative:
                values = values.astype(stype.newbyteorder('='))
        attrs.append((name, values))
    return OrderedDict(attrs)


class NCGroup(object):
    def __init__(self, handle, maxsize=134217728, v64=False):
        self.handle = handle  # a generic.MultiFile
        self.variables, self.dims, self.attrs = {}, {}, {}
        self.headsize = self.recaddr = self.recsize = 0
        self.nrecs = []  # list of record counts in files of family
        self.maxsize = maxsize
        self.v64 = v64
        self.pending = None  # holds pre-flush variable values

    @staticmethod
    def isgroup():
        return 1

    @staticmethod
    def islist():
        return 0

    isleaf = islist

    def root(self):
        return self  # no such thing as directories in netCDF3

    def close(self):
        self.handle.close()

    def flush(self):
        self.handle.flush()

    def __len__(self):
        return len(self.variables)

    def __iter__(self):
        return iter(self.variables)

    def lookup(self, name):
        return self.variables.get(name)

    def declare(self, name, dtype, shape, unlim=None):
        if self.headsize:
            raise RuntimeError("netCDF file defined, no more declarations")
        if shape and not all(shape):
            raise TypeError("netCDF does not support 0-length dimensions")
        stype = _get_stype(dtype)
        sshape = tuple('_' + str(s) for s in shape) if shape else ()
        dims, variables = self.dims, self.variables
        if unlim:
            dims.setdefault('_0', 0)
        for s, n in zip(sshape, shape):
            dims.setdefault(s, n)
        # Set offset to unlim for now, will be set in initializer.
        item = NCLeaf(self, len(variables), unlim, stype, shape, sshape)
        if unlim:
            item = NCList(self, item)
        variables[name] = item
        return item

    # qnd.QAttribute uses only __iter__, get, items, __len__, __contains__
    # In PY2, the dict returned here has an inefficient items() method,
    # but it is not worth fixing that here.

    def attget(self, vname):
        return self.attrs.get(vname if vname else None)

    def attset(self, vname, aname, dtype, shape, value):
        if self.headsize:
            raise RuntimeError("netCDF file defined, no setting attributes")
        stype = _get_stype(dtype)
        strtype = _netcdf_stypes[1]
        if stype == strtype:
            if shape:
                raise TypeError("netCDF does not support array of strings"
                                "as an attribute value")
            value = _bytes_as_str(value)
        else:
            value = asarray(value, stype)
            if shape:
                if len(shape) > 1:
                    raise TypeError("netCDF does not support "
                                    "multi-dimensional attribute values")
                if value.shape != shape:
                    value = value.reshape(shape)
            if not stype.isnative:
                value = value.astype(stype.newbyteorder('='))
        if not vname:
            vname = None
        attrs = self.attrs.get(vname)
        if not attrs:
            self.attrs[vname] = attrs = OrderedDict()
        attrs[aname] = value

    def record_delta(self, irec):
        """Compute delta to add to record variable offset to reach irec."""
        handle, nrecs, maxsize = self.handle, self.nrecs, self.maxsize
        if not self.headsize:
            if not nrecs:  # This is first record variable.
                nrecs.append(1)
            if not irec:
                return 0  # First record is being declared, delta unknown.
            # Beginning to write second record may force first flush,
            # freezing the netCDF file structure, an implicit ENDDEF.
            # Effectively, this flush is writing the first record, even
            # though this call has irec==1 and declaring the first variable
            # of the second record.
            self.flusher(self.handle.open(0))
        recsize = self.recsize
        rec0 = array(nrecs).cumsum()
        # searchsorted needs strictly monotonic array
        # However, because of the 0.5 offset and the fact that irec is
        # an integer, this apparently can never cause a problem here
        # (the monotonicity problem only arises if irec matches two
        # consecutive equal values of rec0-0.5, which could happen if
        # some file has no records).
        ifile = (rec0 - 0.5).searchsorted(irec)
        if ifile >= rec0.size:
            if handle.nextaddr:
                # Handle special case of the first record written after a
                # family is opened in 'a' or 'r+' mode.
                maxsize = 0
            # This is a new record.  We check if maxsize has been exceeded,
            # and force a new file in the family to be created if so.
            n = nrecs[-1]
            if n and (self.recaddr + recsize*n >= maxsize):
                f = handle.open(ifile)  # implicit flush during open
                nrecs.append(0)
                self.initializer(f)
                irec -= rec0[-1]
            else:
                ifile -= 1  # add record to last existing file
                if ifile:
                    irec -= rec0[ifile - 1]
            handle.nextaddr = int64(0)  # special case only triggers once
            nrecs[-1] += 1
        elif ifile:
            irec -= rec0[ifile - 1]
        return handle.zero_address(ifile) + recsize * irec

    def flusher(self, f):
        # The flush method has to serve as ENDDEF for newly created netCDF
        # families, see comments for initializer method below.
        if not self.headsize:
            # Only get here for first file of newly created family.
            self.headsize = 1  # impossible since magic number is 4 bytes
            self.initializer(f)
        # The only metadata that may need to be written is nrecs.
        # The file handle f is the last file in the family.
        if self.nrecs:
            f.seek(4)
            array(self.nrecs[-1], '>i4').tofile(f)

    def initializer(self, f):
        # Called indirectly by handle.callbacks during ncopen in "w" mode.
        # This is the only case in which this point is reachable with zero
        # self.headsize, because ncparse would have filled it in in "r" or
        # "r+" mode, and it would have been written for the first file in
        # the family if this is not the first file.
        # For the first file of a newly created netCDF family, we want to
        # wait for an explicit call to flush() to write the first file header,
        # which is the QnD implementation of the ENDDEF call in the netCDF API.
        first_flush = self.headsize == 1  # impossible value set in flusher
        if first_flush:
            self.headsize = 0
        else:
            return
        # The file f positioned at address 0.
        i4be = _netcdf_stypes[3]
        v64 = self.v64
        array(b'CDF' + (b'\x02' if v64 else b'\x01')).tofile(f)
        array(0, i4be).tofile(f)
        handle = self.handle
        ifile = handle.current_file()
        if ifile:
            # Just copy header and non-record variables to new file.
            f = handle.open(0)
            f.seek(8)
            value = f.read(self.recaddr)
            f = handle.open(ifile)
            f.seek(8)
            f.write(value)
            return

        # This is first file of family.
        dims, variables, attrs = self.dims, self.variables, self.attrs
        if not dims:
            zeros(2, i4be).tofile(f)
        else:
            array((10, len(dims)), i4be).tofile(f)
            for name, size in itemsof(dims):
                _put_name(f, name)
                array(size, i4be).tofile(f)
        _put_attrs(f, attrs.get(None))
        if not variables:
            zeros(2, i4be).tofile(f)
        else:
            array((11, len(variables)), i4be).tofile(f)
        headsize = f.tell()  # including vars tag and count
        if first_flush:
            # The offsets in the variables array are unknown until the
            # symbol table is written, which makes it hard to write for
            # the first file in a family.  We make a clumsy two passes
            # to compute the length of the var_list if the offsets have
            # not yet been set.
            # add space for name length, ndim, nctype, vsize, and offset
            headsize += (20 + 4*v64) * len(variables)
            nrecs = self.nrecs
            for name, item in itemsof(variables):
                unlim = isinstance(item, NCList)
                if unlim:
                    item = item.leaf
                    if not nrecs:
                        nrecs.append(0)
                ndim = len(item.shape or ()) + unlim  # so shape None okay
                vattrs = attrs.get(name)
                namelen = _put_name(None, name)
                headsize += namelen + 4*ndim + _measure_attrs(vattrs)
            offset = self.headsize = headsize
            # Now we can fill in all the offsets and find recaddr.
            recitems = []
            for name, item in itemsof(variables):
                if isinstance(item, NCList):
                    item = item.leaf
                if item.offset:  # This is unlim, see NCGroup.declare.
                    recitems.append(item)
                    continue
                item.offset = offset
                offset += _measure_item(item)
            self.recaddr = offset
            for item in recitems:
                item.offset = offset
                offset += _measure_item(item)
            self.recsize = offset - self.recaddr
        recaddr, recsize = self.recaddr, self.recsize
        dimids = {name: i for i, name in enumerate(dims)}
        recid = None
        for i, (_, n) in enumerate(itemsof(dims)):
            if not n:
                recid = i
                break
        recid = [] if recid is None else [recid]
        iobe = dtype('>i8') if v64 else i4be
        rem = recsize & 3
        if rem:
            recsize += 4 - rem  # used only for vsize
        if recsize > 0xffffffff:
            recsize = 0xffffffff  # vsize overflow convention
        for name, item in itemsof(variables):
            if isinstance(item, NCList):
                item = item.leaf
            stype, offset = item.stype, item.offset
            nctype = _netcdf_stypes.index(stype) + 1
            sshape = item.sshape or ()
            unlim = offset >= recaddr
            _put_name(f, name)
            array(len(sshape) + unlim, i4be).tofile(f)
            sshape = (recid if unlim else []) + [dimids[s] for s in sshape]
            array(sshape, i4be).tofile(f)
            _put_attrs(f, attrs.get(name))
            vsize = recsize if unlim else _measure_item(item)
            array([nctype, vsize], i4be).tofile(f)
            array(offset, iobe).tofile(f)
        headsize = f.tell()
        if headsize != self.headsize:
            raise IOError("netCDF header size mismatch (BUG?)")
        # Header finished, write any pending variables now.
        pending = self.pending
        self.pending = None
        if pending:
            byindex = {}
            for _, item in itemsof(variables):
                if isinstance(item, NCList):
                    item = item.leaf
                byindex[item.index] = item
            for index, value in itemsof(pending):
                byindex[index].write(value)


def _put_name(f, name):
    name = _text_as_bytes(name)
    nchar = len(name)
    rem = nchar & 3
    if f is None:
        rem = (4 - rem) if rem else 0
        return nchar + rem  # not including 4 byte nchar count
    if rem:
        name = name + b'\0'*(4 - rem)
    array(nchar, _netcdf_stypes[3]).tofile(f)
    f.write(name)
    return None


def _put_attrs(f, attrs):
    i4be = _netcdf_stypes[3]
    if not attrs:
        zeros(2, i4be).tofile(f)
        return
    array((12, len(attrs)), i4be).tofile(f)
    for name, value in itemsof(attrs):
        if isinstance(value, basestring):
            nctype = 2
            value = _text_as_bytes(value)
            n = len(value)
            rem = n & 3
            if rem:
                value += b'\0' * (4 - rem)
            value = array(value)
        else:
            value = value.asarray(value)
            dtype = value.dtype
            size = dtype.itemsize
            if dtype.kind == 'f':
                nctype = 5 + (size == 8)
            elif size == 1:
                nctype = 1
            else:
                nctype = 3 + (size == 4)
            stype = _netcdf_stypes[nctype - 1]
            if dtype != stype:
                value = value.astype(stype)
            n = value.size
            if nctype == 3 and (value.size & 1):
                value = concatenate((value.ravel(), zeros(1, stype)))
        _put_name(f, name)
        array((nctype, n), i4be).tofile(f)
        value.tofile(f)


def _measure_attrs(attrs):
    size = 8
    if attrs:
        for name, value in itemsof(attrs):
            size += 24  # name length, nctype, value count
            size += ((len(_text_as_bytes(name)) + 3) >> 2) << 2
            if isinstance(value, basestring):
                size += len(_text_as_bytes(value))
            else:
                size += value.asarray(value).nbytes
            size = ((size + 3) >> 2) << 2
    return size


def _measure_item(item):
    size = item.shape
    size = prod(size) if size else 1
    nbytes = item.stype.itemsize * size
    return ((nbytes + 3) >> 2) << 2


def _get_stype(dtype):
    # bewawre misfeature numpy (1.16.4) dtype('f8') tests == None
    kind = 'X' if dtype is None or dtype in (dict, list,
                                             object) else dtype.kind
    stype = None
    if kind in 'bui':
        size = dtype.itemsize
        sizes = (1, 2, 4, 8)
        if size in sizes:
            stype = _netcdf_stypes[(0, 2, 3, 3)[sizes.index(size)]]
    elif kind == 'f':
        size = dtype.itemsize
        sizes = (2, 4, 8, 12, 16)
        if size in sizes:
            stype = _netcdf_stypes[(4, 4, 5, 5, 5)[sizes.index(size)]]
    elif kind in 'SU':
        stype = _netcdf_stypes[1]
    if stype is None:
        raise TypeError("netCDF-3 does not support this dtype")
    return stype


_netcdf_stypes = [dtype('i1'), dtype('S1'), dtype('>i2'), dtype('>i4'),
                  dtype('>f4'), dtype('>f8')]


class NCLeaf(object):
    __slots__ = 'parent', 'index', 'offset', 'stype', 'shape', 'sshape'

    def __init__(self, parent, index, offset, stype, shape, sshape, _wrp=None):
        self.parent = parent if _wrp else weakref.ref(parent)
        self.index = index
        self.offset = offset
        self.stype = stype
        self.shape = shape
        self.sshape = sshape

    @staticmethod
    def isleaf():
        return 1

    @staticmethod
    def isgroup():
        return 0

    islist = isgroup

    def shift_by(self, delta):
        state = [getattr(self, nm) for nm in self.__slots__]
        state[2] += delta
        return NCLeaf(*state, _wrp=1)

    def root(self):
        return self.parent()

    def _dtype(self):
        dtype = self.stype
        return dtype if dtype.isnative else dtype.newbyteorder('=')

    def query(self):
        # return dtype, shape, sshape
        shape, sshape = self.shape or (), self.sshape
        return self._dtype(), shape, sshape if sshape else shape

    def read(self, args=()):
        parent = self.parent()
        if not parent.headsize:
            raise RuntimeError("cannot read from netCDF file in 'w' mode"
                               " before first flush")
        stype, shape = self.stype, self.shape
        args, shape, offset = leading_args(args, shape)
        f = parent.handle.seek(self.offset + stype.itemsize * offset)
        size = prod(shape) if shape else 1
        value = fromfile(f, stype, size).reshape(shape)[args]
        if not stype.isnative:
            value = value.astype(stype.newbyteorder('='))
        if stype == _netcdf_stypes[1]:
            # Present this as a str or array of str.
            # Note that final netCDF dimension is really length of string.
            shape = value.shape
            if shape:
                shape, strlen = shape[:-1], shape[-1]
                value = value.view('S' + str(strlen)).reshape(shape)
            if PY2:
                try:
                    npdecode(value, 'ascii')
                    need_unicode = False
                except UnicodeDecodeError:
                    need_unicode = True
            else:
                need_unicode = True
            if need_unicode:
                try:
                    value = npdecode(value, 'utf8')
                except UnicodeDecodeError:
                    value = npdecode(value, 'latin1')
            if not shape:
                value = value[()]
        return value

    def write(self, value, args=()):
        parent = self.parent()
        if not parent.headsize:
            if args:
                raise IndexError("no partial writes during declaration")
            pending = parent.pending
            if pending is None:
                pending = parent.pending = {}
            pending[self.index] = value
            return
        offset, stype, shape = self.offset, self.stype, self.shape
        args, shape, off = leading_args(args, shape)
        if off:
            offset += stype.itemsize * off
        value = asarray(value)
        kind = value.dtype.kind
        if kind in 'SU':
            if kind == 'U':
                value = npencode(value, 'utf8')
            shape = value.shape
            value = value.reshape(shape + (1,)).view('S1')
        f = parent.handle.seek(offset)
        if args:
            # Must do read-modify-write for potentially non-contiguous write.
            addr = f.tell()
            v = fromfile(f, stype, prod(shape) if shape else 1).reshape(shape)
            v[args] = value
            value = v
            f.seek(addr)
        else:
            value = ascontiguousarray(value, stype)
            if value.shape != shape:
                # Avoid the recent (numpy 1.10) broadcast_to function.
                v = zeros(shape, stype)
                v[()] = value
                value = v
        value.tofile(f)


class NCList(object):
    """NCLeaf wrapper for record variables."""
    __slots__ = 'parent', 'leaf', 'count'

    def __init__(self, parent, leaf):
        self.parent = weakref.ref(parent)
        self.leaf = leaf
        self.count = 0  # record count needed to know when new record created

    @staticmethod
    def islist():
        return 1

    @staticmethod
    def isgroup():
        return 0

    isleaf = isgroup

    def root(self):
        return self.parent()

    # len, iter, index, declare are list methods called by QList

    def __len__(self):
        return sum(self.parent().nrecs)

    def __iter__(self):
        for i in range(len(self)):
            yield self.index(i)

    def index(self, ndx):
        nrecs = len(self)
        if ndx < 0:
            ndx = ndx + nrecs
        if ndx < 0 or ndx >= nrecs:
            return None  # out of range, let caller raise any exception
        parent = self.parent()
        delta = parent.record_delta(ndx)
        return self.leaf.shift_by(delta)

    def declare(self, dtype, shape):
        # Ignore dtype and shape here; conformability with the NCLeaf
        # dtype and shape will be enforced during NCLeaf.write.
        parent = self.parent()
        delta = parent.record_delta(self.count)
        self.count += 1  # nrecs in NCGroup incremented in record_delta
        return self.leaf.shift_by(delta)
Source code for qnd.ncf

qnd

Navigation

Related Topics