"""QnD netCDF3 interface."""
from __future__ import absolute_import
import sys
import weakref
from collections import OrderedDict
from warnings import warn
from numpy import (dtype, prod, fromfile, asarray, array, zeros, concatenate,
ascontiguousarray, int64)
from numpy.core.defchararray import decode as npdecode, encode as npencode
from .frontend import QGroup
from .generic import opener
from .utils import leading_args
__all__ = ['opennc']
PY2 = sys.version_info < (3,)
if PY2:
range = xrange # noqa
def itemsof(d): return d.iteritems() # noqa
else:
basestring = str
def itemsof(d): return d.items() # noqa
[docs]def opennc(filename, mode='r', auto=1, **kwargs):
"""Open netCDF-3 file returning a QnD QGroup.
A netCDF-3 file differs from other self-describing binary file formats
because no data addresses can be known until every variable to be
stored is declared. Therefore, when writing a netCDF-3 file, you
must declare every variable before you can begin writing anything.
The qnd API is somewhat at odds with this semantics because it encourages
you to declare and write each variable in a single step. The native
netCDF-3 API forces you to declare everything, then call an `enddef`
method to complete all variable declarations and permit you to begin
writing data. The qnd.ncf backend uses the first call to the ordinary
qnd `flush` method to emulate the netCDF-3 `enddef` mode switch -- thus
nothing will be written to the file until the first call to `flush`.
To minimize the difference between ncf and other qnd backends, if you
do use the usual qnd declare-and-write idiom, the ncf backend will save
the variable value in memory until the first `flush` call, which will
trigger the actual writing of all such saved values.
Note that closing the file flushes it, so that is also a viable way to
finish a netCDF-3 file. Furthermore, when you overwrite any record
variable in `recording` mode, ncf will implicitly `flush` the file,
since no new variables can be declared after that.
Note that you use the standard QnD API, a copy of every variable
you write to the file until you begin the second record will be
kept in memory, which could potentially be a problem. If you wish
to declare all variables before writing anything, so that your
code is aligned with the netCDF API, do something like this::
f = opennc("myfile??.nc", "w") # wildcards expand to 00, 01, 02, ...
# declare non-record variables from in-memory arrays
f.nrvar1 = nrvar1.dtype, nrvar1.shape
f.nrvar2 = nrvar2.dtype, nrvar2.shape
# declare record variables from in-memory arrays
f.recording(1)
f.rvar1 = rvar1.dtype, rvar1.shape
f.rvar2 = rvar2.dtype, rvar2.shape
# flushing the file is equivalent to netCDF ENDDEF mode switch
f.flush()
# now write the current values of all the variables
f.nrvar1 = nrvar1
f.nrvar2 = nrvar2
# writing the record variables writes their values for first record
f.rvar1 = rvar1
f.rvar2 = rvar2
# change values of record variables and write the second record
f.rvar1 = rvar1
f.rvar2 = rvar2
# when you've written all records, close the file
f.close()
Parameters
----------
filename : str
Name of file to open. See notes below for file family.
mode : str
One of 'r' (default, read-only), 'r+' (read-write, must exist),
'a' (read-write, create if does not exist), 'w' (create, clobber if
exists), 'w-' (create, fail if exists).
auto : int
The intial state of auto-read mode. If the QGroup handle returned
by openh5 is `f`, then ``f.varname`` reads an array variable, but not
a subgroup when auto=1, the default. With auto=0, the variable
reference reads neither (permitting later partial reads in the case
of array variables). With auto=2, a variable reference recursively
reads subgroups, bringing a whole tree into memory.
**kwargs
Other keywords. The maxsize keyword sets the size of files in a
family generated in recording==1 mode; a new file will begin when
the first item in a new record would begin beyond `maxsize`. The
default maxsize is 128 MiB (134 MB). The v64 keyword, if provided
and true, causes new files to be created using the 64-bit netCDF
format; the default is to create 32-bit files. (But a file family
always uses a single format.)
The nextaddr_mode keyword can be used to indicate whether the next
new record in 'a' or 'r+' mode should go into a new file. The
default behavior is that it should, which is the pdbf module default;
this is nextaddr_mode true. Use nextaddr_mode=0 to continue filling
the final existing file until maxsize.
Returns
-------
f : QGroup
A file handle implementing the QnD interface.
Notes
-----
The `filename` may be an iterable, one string per file in order. The
sequence may extend beyond the files which actually exist for 'r+', 'a',
'w', or 'w-' modes.
Alternatively `filename` specifies a family if it contains shell globbing
wildcard characters. Existing matching files are sorted first by length,
then alphabetically (ensuring that 'file100' comes after 'file99', for
example). If there is only a single wildcard group, it also serves to
define a sequence of future family names beyond those currently existing
for 'r+', 'a', 'w', or 'w-' modes. A '?' pattern is treated the same as
a '[0-9]' pattern if all its matches are digits or if the pattern
matches no existing files. Similarly, a '*' acts like the minimum number
of all-digit matches, or three digits if there are no matches.
"""
maxsize = kwargs.pop('maxsize', 134217728)
v64 = kwargs.pop('v64', False)
mode = mode.lower()
if mode.startswith('a') or mode.startswith('r+'):
nextaddr_mode = kwargs.pop('nextaddr_mode', 2) or 1
else:
nextaddr_mode = 1
kwargs['nexaddr_mode'] = nextaddr_mode
handle, n = opener(filename, mode, **kwargs)
root = NCGroup(handle, maxsize, v64)
for i in range(n):
try:
ncparse(handle, root, i)
except IOError:
# Something went terribly wrong. If this is first file, we die.
name = handle.filename(i)
if not i:
raise IOError("Fatal errors opening netCDF file {}"
"".format(name))
handle.open(i-1)
warn("file family stopped by incompatible {}".format(name))
handle.callbacks(root.flusher, root.initializer) # may call initializer
return QGroup(root, auto=auto)
# https://www.unidata.ucar.edu/software/netcdf/docs/
# file_format_specifications.html
# All numbers are in XDR (big-endian) format.
#
# header = magic numrecs dim_list gatt_list var_list
# magic = 'C' 'D' 'F' version
# version = '\x01' (32-bit offset) | '\x02' (64-bit offset)
# numrecs = NON_NEG | STREAMING
# dim_list = ABSENT | 0x00 00 00 0A NON_NEG dim*
# gatt_list = att_list
# var_list = ABSENT | 0x00 00 00 0B NON_NEG attr*
# att_list = ABSENT | 0x00 00 00 0C NON_NEG var*
# ABSENT = 0x00 00 00 00 0x00 00 00 00
# STREAMING = 0xFF FF FF FF
# dim = name NON_NEG (0 length means record dimension)
# name = NON_NEG namestring (0 padded to 4 byte boundary, _.@+-)
# attr = name nc_type NON_NEG values (0 padded to 4 byte boundary)
# nc_type = 1|2|3|4|5|6 (byte|char|short|int|float|double)
# var = name NON_NEG dimid* att_list nc_type vsize OFFSET
# dimid = 0-origin index into dim_list
# vsize = >i4 number of bytes, or 2**32-1 if more than 4GiB
# write vsize as if padded, but if only 1 record variable of
# nc_type byte, char, or short, do not use padding
# - for record variables, byte size of entire record (as if padded)
# OFFSET = >i4 for version 1, >i8 for version 2
#
# Default fill values:
# char \x00, byte \x81, short \x80 01, int \x80 00 00 01
# float \x7C F0 00 00, double \x47 9E 00 00 00 00 00 00 =9.969209968386869e36
#
# The netCDF-3 header _almost_ has a simple XDR description; the only
# problem is that an attribute attr definition may have a value which is
# a counted array of short (2 byte integers), which XDR does not support.
# (The 64-bit format requires a hack to represent the offset values, and
# its own XDR specification using that hack.)
def ncparse(handle, root, ifile):
i4be = _netcdf_stypes[3]
if ifile:
if not root.nrecs:
raise IOError("first file in apparent family has no record vars")
f = handle.open(ifile - 1)
headsize = root.headsize
f.seek(0)
static0 = f.read(headsize)
f = handle.open(ifile)
magic = f.read(4)
if magic == static0[:4]:
nrecs = int(fromfile(f, i4be, 1)[0])
static1 = f.read(headsize - 8)
else:
static1 = nrecs = None
if static1 != static0[8:]:
raise IOError("static variables do not match previous file")
if nrecs == -1:
f.seek(0, 2)
nrecs = (f.tell() - headsize) // root.recsize
root.nrecs.append(nrecs)
return
f = handle.open(ifile)
magic = fromfile(f, 'S4', 1)[0]
version = magic[3:] # in python3, magic[3] is int(1) != b'\x01'
if magic[:3] != b'CDF' or version not in b'\x01\x02':
raise IOError("bad magic in netCDF-3 header")
v64 = version != b'\x01'
iobe = dtype('>i8') if v64 else i4be
nrecs = int(fromfile(f, i4be, 1)[0]) # -1 indicates STREAMING
tag, count = fromfile(f, i4be, 2)
if tag != 10 and (count or tag):
raise IOError("bad dim_list in netCDF-3 header")
dims, recid = [], None
while count > 0:
count -= 1
name = _get_name(f)
size = int(fromfile(f, i4be, 1)[0])
if not size:
recid = len(dims)
dims.append((name, size))
attrs = [(None, _get_attrs(f))]
tag, count = fromfile(f, i4be, 2)
if tag != 11 and (count or tag):
raise IOError("bad dim_list in netCDF-3 header")
variables, recsize, special_case = OrderedDict(), 0, 0
recaddr = lastaddr = None
nrecvar = 0
while count > 0:
count -= 1
name = _get_name(f)
ndim = int(fromfile(f, i4be, 1)[0])
shape = tuple(fromfile(f, i4be, ndim).astype(int)) if ndim else ()
attrs.append((name, _get_attrs(f)))
nctype = int(fromfile(f, i4be, 1)[0])
if nctype < 1 or nctype > 6:
raise IOError("bad nc_type (not in 1-6) in netCDF-3 header")
stype = _netcdf_stypes[nctype - 1]
fromfile(f, i4be, 1) # ignore vsize
offset = int(fromfile(f, iobe, 1)[0])
# Note: offset is the byte address of the variable in the file
# - byte address of first block of a record variable
if offset < 0:
raise IOError("bad variable offset in netCDF-3 header")
unlim = shape and shape[0] == recid
if unlim:
shape = shape[1:]
try:
sshape = tuple(dims[i][0] for i in shape)
except IndexError:
raise IOError("bad dimension index in netCDF-3 header")
shape = tuple(dims[i][1] for i in shape)
item = NCLeaf(root, len(variables), offset, stype, shape, sshape)
variables[name] = itemx = NCList(root, item) if unlim else item
if unlim:
itemx.count += nrecs
nrecvar += 1
if nrecvar == 1:
nbytes = stype.itemsize
if nbytes & 3:
if shape:
nbytes *= prod(shape) if shape else 1
if nbytes & 3:
special_case = nbytes
recsize += _measure_item(item)
if recaddr is None or offset < recaddr:
recaddr = offset
elif lastaddr is None or offset >= lastaddr:
lastaddr = offset + _measure_item(item)
if nrecvar == 1 and special_case:
# Implement special rule for byte, char, or short single record
# variable; such records are not forced to 4 byte boundaries.
recsize = special_case
headsize = f.tell()
if nrecs == -1 and recsize:
# Handle special streaming record count by using file size.
f.seek(0, 2)
size = f.tell()
f.seek(headsize)
nrecs = (size - recaddr) // recsize
root.variables = variables
root.dims = OrderedDict(dims)
root.attrs = OrderedDict(attrs)
root.headsize = headsize
root.recaddr = recaddr or lastaddr or headsize
root.recsize = recsize
root.nrecs.append(nrecs)
root.v64 = v64
def _get_name(f):
nchar = int(fromfile(f, '>i4', 1)[0])
rem = nchar & 3
ntot = nchar + 4 - rem if rem else nchar
name = fromfile(f, 'S1', ntot)[:nchar].view('S' + str(nchar))
return _bytes_as_str(name)
def _bytes_as_str(text):
if hasattr(text, 'ravel'):
text = text.ravel()[0]
if isinstance(text, bytes):
need_unicode = False
if PY2:
try:
text.decode('ascii')
except UnicodeDecodeError:
need_unicode = True
else:
need_unicode = True
if need_unicode:
try:
text = text.decode('utf8')
except UnicodeDecodeError: # ignore, but violates netCDF-3 spec
text = text.decode('latin1')
return text
def _text_as_bytes(text):
if hasattr(text, 'ravel'):
text = text.ravel()[0]
return text if isinstance(text, bytes) else text.encode('utf8')
def _get_attrs(f):
i4be = _netcdf_stypes[3]
tag, count = fromfile(f, i4be, 2)
if tag != 12 and (count or tag):
raise IOError("bad attr_list in netCDF-3 header")
attrs = []
while count > 0:
count -= 1
name = _get_name(f)
nctype = int(fromfile(f, i4be, 1)[0])
if nctype < 1 or nctype > 6:
raise IOError("bad nc_type (not in 1-6) in netCDF-3 header")
if nctype == 2:
values = _get_name(f)
else:
nvalues = int(fromfile(f, i4be, 1)[0])
stype = _netcdf_stypes[nctype - 1]
values = fromfile(f, stype, nvalues)
rem = values.nbytes & 3
if rem:
fromfile(f, 'u1', 4 - rem)
if values.size == 1:
values = values[0]
if not stype.isnative:
values = values.astype(stype.newbyteorder('='))
attrs.append((name, values))
return OrderedDict(attrs)
class NCGroup(object):
def __init__(self, handle, maxsize=134217728, v64=False):
self.handle = handle # a generic.MultiFile
self.variables, self.dims, self.attrs = {}, {}, {}
self.headsize = self.recaddr = self.recsize = 0
self.nrecs = [] # list of record counts in files of family
self.maxsize = maxsize
self.v64 = v64
self.pending = None # holds pre-flush variable values
@staticmethod
def isgroup():
return 1
@staticmethod
def islist():
return 0
isleaf = islist
def root(self):
return self # no such thing as directories in netCDF3
def close(self):
self.handle.close()
def flush(self):
self.handle.flush()
def __len__(self):
return len(self.variables)
def __iter__(self):
return iter(self.variables)
def lookup(self, name):
return self.variables.get(name)
def declare(self, name, dtype, shape, unlim=None):
if self.headsize:
raise RuntimeError("netCDF file defined, no more declarations")
if shape and not all(shape):
raise TypeError("netCDF does not support 0-length dimensions")
stype = _get_stype(dtype)
sshape = tuple('_' + str(s) for s in shape) if shape else ()
dims, variables = self.dims, self.variables
if unlim:
dims.setdefault('_0', 0)
for s, n in zip(sshape, shape):
dims.setdefault(s, n)
# Set offset to unlim for now, will be set in initializer.
item = NCLeaf(self, len(variables), unlim, stype, shape, sshape)
if unlim:
item = NCList(self, item)
variables[name] = item
return item
# qnd.QAttribute uses only __iter__, get, items, __len__, __contains__
# In PY2, the dict returned here has an inefficient items() method,
# but it is not worth fixing that here.
def attget(self, vname):
return self.attrs.get(vname if vname else None)
def attset(self, vname, aname, dtype, shape, value):
if self.headsize:
raise RuntimeError("netCDF file defined, no setting attributes")
stype = _get_stype(dtype)
strtype = _netcdf_stypes[1]
if stype == strtype:
if shape:
raise TypeError("netCDF does not support array of strings"
"as an attribute value")
value = _bytes_as_str(value)
else:
value = asarray(value, stype)
if shape:
if len(shape) > 1:
raise TypeError("netCDF does not support "
"multi-dimensional attribute values")
if value.shape != shape:
value = value.reshape(shape)
if not stype.isnative:
value = value.astype(stype.newbyteorder('='))
if not vname:
vname = None
attrs = self.attrs.get(vname)
if not attrs:
self.attrs[vname] = attrs = OrderedDict()
attrs[aname] = value
def record_delta(self, irec):
"""Compute delta to add to record variable offset to reach irec."""
handle, nrecs, maxsize = self.handle, self.nrecs, self.maxsize
if not self.headsize:
if not nrecs: # This is first record variable.
nrecs.append(1)
if not irec:
return 0 # First record is being declared, delta unknown.
# Beginning to write second record may force first flush,
# freezing the netCDF file structure, an implicit ENDDEF.
# Effectively, this flush is writing the first record, even
# though this call has irec==1 and declaring the first variable
# of the second record.
self.flusher(self.handle.open(0))
recsize = self.recsize
rec0 = array(nrecs).cumsum()
# searchsorted needs strictly monotonic array
# However, because of the 0.5 offset and the fact that irec is
# an integer, this apparently can never cause a problem here
# (the monotonicity problem only arises if irec matches two
# consecutive equal values of rec0-0.5, which could happen if
# some file has no records).
ifile = (rec0 - 0.5).searchsorted(irec)
if ifile >= rec0.size:
if handle.nextaddr:
# Handle special case of the first record written after a
# family is opened in 'a' or 'r+' mode.
maxsize = 0
# This is a new record. We check if maxsize has been exceeded,
# and force a new file in the family to be created if so.
n = nrecs[-1]
if n and (self.recaddr + recsize*n >= maxsize):
f = handle.open(ifile) # implicit flush during open
nrecs.append(0)
self.initializer(f)
irec -= rec0[-1]
else:
ifile -= 1 # add record to last existing file
if ifile:
irec -= rec0[ifile - 1]
handle.nextaddr = int64(0) # special case only triggers once
nrecs[-1] += 1
elif ifile:
irec -= rec0[ifile - 1]
return handle.zero_address(ifile) + recsize * irec
def flusher(self, f):
# The flush method has to serve as ENDDEF for newly created netCDF
# families, see comments for initializer method below.
if not self.headsize:
# Only get here for first file of newly created family.
self.headsize = 1 # impossible since magic number is 4 bytes
self.initializer(f)
# The only metadata that may need to be written is nrecs.
# The file handle f is the last file in the family.
if self.nrecs:
f.seek(4)
array(self.nrecs[-1], '>i4').tofile(f)
def initializer(self, f):
# Called indirectly by handle.callbacks during ncopen in "w" mode.
# This is the only case in which this point is reachable with zero
# self.headsize, because ncparse would have filled it in in "r" or
# "r+" mode, and it would have been written for the first file in
# the family if this is not the first file.
# For the first file of a newly created netCDF family, we want to
# wait for an explicit call to flush() to write the first file header,
# which is the QnD implementation of the ENDDEF call in the netCDF API.
first_flush = self.headsize == 1 # impossible value set in flusher
if first_flush:
self.headsize = 0
else:
return
# The file f positioned at address 0.
i4be = _netcdf_stypes[3]
v64 = self.v64
array(b'CDF' + (b'\x02' if v64 else b'\x01')).tofile(f)
array(0, i4be).tofile(f)
handle = self.handle
ifile = handle.current_file()
if ifile:
# Just copy header and non-record variables to new file.
f = handle.open(0)
f.seek(8)
value = f.read(self.recaddr)
f = handle.open(ifile)
f.seek(8)
f.write(value)
return
# This is first file of family.
dims, variables, attrs = self.dims, self.variables, self.attrs
if not dims:
zeros(2, i4be).tofile(f)
else:
array((10, len(dims)), i4be).tofile(f)
for name, size in itemsof(dims):
_put_name(f, name)
array(size, i4be).tofile(f)
_put_attrs(f, attrs.get(None))
if not variables:
zeros(2, i4be).tofile(f)
else:
array((11, len(variables)), i4be).tofile(f)
headsize = f.tell() # including vars tag and count
if first_flush:
# The offsets in the variables array are unknown until the
# symbol table is written, which makes it hard to write for
# the first file in a family. We make a clumsy two passes
# to compute the length of the var_list if the offsets have
# not yet been set.
# add space for name length, ndim, nctype, vsize, and offset
headsize += (20 + 4*v64) * len(variables)
nrecs = self.nrecs
for name, item in itemsof(variables):
unlim = isinstance(item, NCList)
if unlim:
item = item.leaf
if not nrecs:
nrecs.append(0)
ndim = len(item.shape or ()) + unlim # so shape None okay
vattrs = attrs.get(name)
namelen = _put_name(None, name)
headsize += namelen + 4*ndim + _measure_attrs(vattrs)
offset = self.headsize = headsize
# Now we can fill in all the offsets and find recaddr.
recitems = []
for name, item in itemsof(variables):
if isinstance(item, NCList):
item = item.leaf
if item.offset: # This is unlim, see NCGroup.declare.
recitems.append(item)
continue
item.offset = offset
offset += _measure_item(item)
self.recaddr = offset
for item in recitems:
item.offset = offset
offset += _measure_item(item)
self.recsize = offset - self.recaddr
recaddr, recsize = self.recaddr, self.recsize
dimids = {name: i for i, name in enumerate(dims)}
recid = None
for i, (_, n) in enumerate(itemsof(dims)):
if not n:
recid = i
break
recid = [] if recid is None else [recid]
iobe = dtype('>i8') if v64 else i4be
rem = recsize & 3
if rem:
recsize += 4 - rem # used only for vsize
if recsize > 0xffffffff:
recsize = 0xffffffff # vsize overflow convention
for name, item in itemsof(variables):
if isinstance(item, NCList):
item = item.leaf
stype, offset = item.stype, item.offset
nctype = _netcdf_stypes.index(stype) + 1
sshape = item.sshape or ()
unlim = offset >= recaddr
_put_name(f, name)
array(len(sshape) + unlim, i4be).tofile(f)
sshape = (recid if unlim else []) + [dimids[s] for s in sshape]
array(sshape, i4be).tofile(f)
_put_attrs(f, attrs.get(name))
vsize = recsize if unlim else _measure_item(item)
array([nctype, vsize], i4be).tofile(f)
array(offset, iobe).tofile(f)
headsize = f.tell()
if headsize != self.headsize:
raise IOError("netCDF header size mismatch (BUG?)")
# Header finished, write any pending variables now.
pending = self.pending
self.pending = None
if pending:
byindex = {}
for _, item in itemsof(variables):
if isinstance(item, NCList):
item = item.leaf
byindex[item.index] = item
for index, value in itemsof(pending):
byindex[index].write(value)
def _put_name(f, name):
name = _text_as_bytes(name)
nchar = len(name)
rem = nchar & 3
if f is None:
rem = (4 - rem) if rem else 0
return nchar + rem # not including 4 byte nchar count
if rem:
name = name + b'\0'*(4 - rem)
array(nchar, _netcdf_stypes[3]).tofile(f)
f.write(name)
return None
def _put_attrs(f, attrs):
i4be = _netcdf_stypes[3]
if not attrs:
zeros(2, i4be).tofile(f)
return
array((12, len(attrs)), i4be).tofile(f)
for name, value in itemsof(attrs):
if isinstance(value, basestring):
nctype = 2
value = _text_as_bytes(value)
n = len(value)
rem = n & 3
if rem:
value += b'\0' * (4 - rem)
value = array(value)
else:
value = value.asarray(value)
dtype = value.dtype
size = dtype.itemsize
if dtype.kind == 'f':
nctype = 5 + (size == 8)
elif size == 1:
nctype = 1
else:
nctype = 3 + (size == 4)
stype = _netcdf_stypes[nctype - 1]
if dtype != stype:
value = value.astype(stype)
n = value.size
if nctype == 3 and (value.size & 1):
value = concatenate((value.ravel(), zeros(1, stype)))
_put_name(f, name)
array((nctype, n), i4be).tofile(f)
value.tofile(f)
def _measure_attrs(attrs):
size = 8
if attrs:
for name, value in itemsof(attrs):
size += 24 # name length, nctype, value count
size += ((len(_text_as_bytes(name)) + 3) >> 2) << 2
if isinstance(value, basestring):
size += len(_text_as_bytes(value))
else:
size += value.asarray(value).nbytes
size = ((size + 3) >> 2) << 2
return size
def _measure_item(item):
size = item.shape
size = prod(size) if size else 1
nbytes = item.stype.itemsize * size
return ((nbytes + 3) >> 2) << 2
def _get_stype(dtype):
# bewawre misfeature numpy (1.16.4) dtype('f8') tests == None
kind = 'X' if dtype is None or dtype in (dict, list,
object) else dtype.kind
stype = None
if kind in 'bui':
size = dtype.itemsize
sizes = (1, 2, 4, 8)
if size in sizes:
stype = _netcdf_stypes[(0, 2, 3, 3)[sizes.index(size)]]
elif kind == 'f':
size = dtype.itemsize
sizes = (2, 4, 8, 12, 16)
if size in sizes:
stype = _netcdf_stypes[(4, 4, 5, 5, 5)[sizes.index(size)]]
elif kind in 'SU':
stype = _netcdf_stypes[1]
if stype is None:
raise TypeError("netCDF-3 does not support this dtype")
return stype
_netcdf_stypes = [dtype('i1'), dtype('S1'), dtype('>i2'), dtype('>i4'),
dtype('>f4'), dtype('>f8')]
class NCLeaf(object):
__slots__ = 'parent', 'index', 'offset', 'stype', 'shape', 'sshape'
def __init__(self, parent, index, offset, stype, shape, sshape, _wrp=None):
self.parent = parent if _wrp else weakref.ref(parent)
self.index = index
self.offset = offset
self.stype = stype
self.shape = shape
self.sshape = sshape
@staticmethod
def isleaf():
return 1
@staticmethod
def isgroup():
return 0
islist = isgroup
def shift_by(self, delta):
state = [getattr(self, nm) for nm in self.__slots__]
state[2] += delta
return NCLeaf(*state, _wrp=1)
def root(self):
return self.parent()
def _dtype(self):
dtype = self.stype
return dtype if dtype.isnative else dtype.newbyteorder('=')
def query(self):
# return dtype, shape, sshape
shape, sshape = self.shape or (), self.sshape
return self._dtype(), shape, sshape if sshape else shape
def read(self, args=()):
parent = self.parent()
if not parent.headsize:
raise RuntimeError("cannot read from netCDF file in 'w' mode"
" before first flush")
stype, shape = self.stype, self.shape
args, shape, offset = leading_args(args, shape)
f = parent.handle.seek(self.offset + stype.itemsize * offset)
size = prod(shape) if shape else 1
value = fromfile(f, stype, size).reshape(shape)[args]
if not stype.isnative:
value = value.astype(stype.newbyteorder('='))
if stype == _netcdf_stypes[1]:
# Present this as a str or array of str.
# Note that final netCDF dimension is really length of string.
shape = value.shape
if shape:
shape, strlen = shape[:-1], shape[-1]
value = value.view('S' + str(strlen)).reshape(shape)
if PY2:
try:
npdecode(value, 'ascii')
need_unicode = False
except UnicodeDecodeError:
need_unicode = True
else:
need_unicode = True
if need_unicode:
try:
value = npdecode(value, 'utf8')
except UnicodeDecodeError:
value = npdecode(value, 'latin1')
if not shape:
value = value[()]
return value
def write(self, value, args=()):
parent = self.parent()
if not parent.headsize:
if args:
raise IndexError("no partial writes during declaration")
pending = parent.pending
if pending is None:
pending = parent.pending = {}
pending[self.index] = value
return
offset, stype, shape = self.offset, self.stype, self.shape
args, shape, off = leading_args(args, shape)
if off:
offset += stype.itemsize * off
value = asarray(value)
kind = value.dtype.kind
if kind in 'SU':
if kind == 'U':
value = npencode(value, 'utf8')
shape = value.shape
value = value.reshape(shape + (1,)).view('S1')
f = parent.handle.seek(offset)
if args:
# Must do read-modify-write for potentially non-contiguous write.
addr = f.tell()
v = fromfile(f, stype, prod(shape) if shape else 1).reshape(shape)
v[args] = value
value = v
f.seek(addr)
else:
value = ascontiguousarray(value, stype)
if value.shape != shape:
# Avoid the recent (numpy 1.10) broadcast_to function.
v = zeros(shape, stype)
v[()] = value
value = v
value.tofile(f)
class NCList(object):
"""NCLeaf wrapper for record variables."""
__slots__ = 'parent', 'leaf', 'count'
def __init__(self, parent, leaf):
self.parent = weakref.ref(parent)
self.leaf = leaf
self.count = 0 # record count needed to know when new record created
@staticmethod
def islist():
return 1
@staticmethod
def isgroup():
return 0
isleaf = isgroup
def root(self):
return self.parent()
# len, iter, index, declare are list methods called by QList
def __len__(self):
return sum(self.parent().nrecs)
def __iter__(self):
for i in range(len(self)):
yield self.index(i)
def index(self, ndx):
nrecs = len(self)
if ndx < 0:
ndx = ndx + nrecs
if ndx < 0 or ndx >= nrecs:
return None # out of range, let caller raise any exception
parent = self.parent()
delta = parent.record_delta(ndx)
return self.leaf.shift_by(delta)
def declare(self, dtype, shape):
# Ignore dtype and shape here; conformability with the NCLeaf
# dtype and shape will be enforced during NCLeaf.write.
parent = self.parent()
delta = parent.record_delta(self.count)
self.count += 1 # nrecs in NCGroup incremented in record_delta
return self.leaf.shift_by(delta)