Skip to content

Commit c4dd1fa

Browse files
author
Alex Kleeman
committed
Initial commit.
- Contains a skeleton which wraps netCDF4 and scipy.io.netcdf_file.
1 parent f6202cb commit c4dd1fa

File tree

2 files changed

+183
-0
lines changed

2 files changed

+183
-0
lines changed

src/__init__.py

Whitespace-only changes.

src/data.py

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
import os
2+
import netCDF4 as nc4
3+
4+
from scipy.io import netcdf
5+
from cStringIO import StringIO
6+
from collections import OrderedDict
7+
8+
class Attributes(dict):
9+
pass
10+
11+
class Variable(object):
12+
"""
13+
A netcdf-like variable consisting of dimensions, data and attributes
14+
which describe a single variable. A single variable object is not
15+
fully described outside the context of its parent Dataset.
16+
"""
17+
def __init__(self, dims, data, attributes):
18+
self.dimensions = dims
19+
self.data = data
20+
self.attributes = attributes
21+
22+
def __getattribute__(self, key):
23+
"""
24+
We want Variable to inherit some of the attributes of
25+
the underlaying data.
26+
"""
27+
if key in ['dtype', 'shape', 'size']:
28+
return getattr(self.data, key)
29+
else:
30+
return object.__getattribute__(self, key)
31+
32+
class Dataset(object):
33+
"""
34+
A netcdf-like data object consisting of dimensions, variables and
35+
attributes which together form a self describing data set.
36+
"""
37+
38+
def _load_scipy(self, scipy_nc, *args, **kwdargs):
39+
"""
40+
Interprets a netcdf file-like object using scipy.io.netcdf.
41+
The contents of the netcdf object are loaded into memory.
42+
"""
43+
try:
44+
nc = netcdf.netcdf_file(scipy_nc, mode='r', *args, **kwdargs)
45+
except:
46+
scipy_nc = StringIO(scipy_nc)
47+
scipy_nc.seek(0)
48+
nc = netcdf.netcdf_file(scipy_nc, mode='r', *args, **kwdargs)
49+
50+
def from_scipy_variable(sci_var):
51+
return Variable(dims = sci_var.dimensions,
52+
data = sci_var.data,
53+
attributes = sci_var._attributes)
54+
55+
object.__setattr__(self, 'attributes', Attributes())
56+
self.attributes.update(nc._attributes)
57+
58+
object.__setattr__(self, 'dimensions', OrderedDict())
59+
dimensions = OrderedDict((k, len(d))
60+
for k, d in nc.dimensions.iteritems())
61+
self.dimensions.update(dimensions)
62+
63+
object.__setattr__(self, 'variables', OrderedDict())
64+
OrderedDict = OrderedDict((vn, from_scipy_variable(v))
65+
for vn, v in nc.variables.iteritems())
66+
self.variables.update()
67+
68+
def _load_netcdf4(self, netcdf_path, *args, **kwdargs):
69+
"""
70+
Interprets the contents of netcdf_path using the netCDF4
71+
package.
72+
"""
73+
nc = nc4.Dataset(netcdf_path, *args, **kwdargs)
74+
75+
def from_netcdf4_variable(nc4_var):
76+
attributes = dict((k, nc4_var.getncattr(k)) for k in nc4_var.ncattrs())
77+
return Variable(dims = tuple(nc4_var.dimensions),
78+
data = nc4_var[:],
79+
attributes = attributes)
80+
81+
object.__setattr__(self, 'attributes', Attributes())
82+
self.attributes.update(dict((k.encode(), nc.getncattr(k)) for k in nc.ncattrs()))
83+
84+
object.__setattr__(self, 'dimensions', OrderedDict())
85+
dimensions = OrderedDict((k.encode(), len(d)) for k, d in nc.dimensions.iteritems())
86+
self.dimensions.update(dimensions)
87+
88+
object.__setattr__(self, 'variables', OrderedDict())
89+
self.variables.update(dict((vn.encode(), from_netcdf4_variable(v))
90+
for vn, v in nc.variables.iteritems()))
91+
92+
def __init__(self, nc, *args, **kwdargs):
93+
if isinstance(nc, basestring) and not nc.startswith('CDF'):
94+
"""
95+
If the initialization nc is a string and it doesn't
96+
appear to be the contents of a netcdf file we load
97+
it using the netCDF4 package
98+
"""
99+
self._load_netcdf4(nc, *args, **kwdargs)
100+
else:
101+
"""
102+
If nc is a file-like object we read it using
103+
the scipy.io.netcdf package
104+
"""
105+
self._load_scipy(nc)
106+
107+
def __setattr__(self, attr, value):
108+
""""__setattr__ is overloaded to prevent operations that could
109+
cause loss of data consistency. If you really intend to update
110+
dir(self), use the self.__dict__.update method or the
111+
super(type(a), self).__setattr__ method to bypass."""
112+
raise AttributeError("__setattr__ is disabled")
113+
114+
def dump(self, filepath, *args, **kwdargs):
115+
"""
116+
Dump the contents to a location on disk using
117+
the netCDF4 package
118+
"""
119+
nc = nc4.Dataset(filepath, mode='w', *args, **kwdargs)
120+
for d, l in self.dimensions.iteritems():
121+
nc.createDimension(d, size=l)
122+
for vn, v in self.variables.iteritems():
123+
nc.createVariable(vn, v.dtype, v.dimensions)
124+
nc.variables[vn][:] = v.data[:]
125+
for k, a in v.attributes.iteritems():
126+
try:
127+
nc.variables[vn].setncattr(k, a)
128+
except:
129+
import pdb; pdb.set_trace()
130+
131+
nc.setncatts(self.attributes)
132+
return nc
133+
134+
def dumps(self):
135+
"""
136+
Serialize the contents to a string. The serialization
137+
creates an in memory netcdf version 3 string using
138+
the scipy.io.netcdf package.
139+
"""
140+
fobj = StringIO()
141+
nc = netcdf.netcdf_file(fobj, mode='w')
142+
for d, l in self.dimensions.iteritems():
143+
nc.createDimension(d, l)
144+
145+
for vn, v in self.variables.iteritems():
146+
147+
nc.createVariable(vn, v.dtype, v.dimensions)
148+
nc.variables[vn][:] = v.data[:]
149+
for k, a in v.attributes.iteritems():
150+
setattr(nc.variables[vn], k, a)
151+
for k, a in self.attributes.iteritems():
152+
setattr(nc, k, a)
153+
nc.flush()
154+
return fobj.getvalue()
155+
156+
if __name__ == "__main__":
157+
base_dir = os.path.dirname(__file__)
158+
test_dir = os.path.join(base_dir, '..', 'test', )
159+
write_test_path = os.path.join(test_dir, 'test_output.nc')
160+
ecmwf_netcdf = os.path.join(test_dir, 'ECMWF_ERA-40_subset.nc')
161+
162+
import time
163+
st = time.time()
164+
nc = Dataset(ecmwf_netcdf)
165+
print "Seconds to read from filepath : ", time.time() - st
166+
167+
st = time.time()
168+
nc.dump(write_test_path)
169+
print "Seconds to write : ", time.time() - st
170+
171+
st = time.time()
172+
nc_string = nc.dumps()
173+
print "Seconds to serialize : ", time.time() - st
174+
175+
st = time.time()
176+
nc = Dataset(nc_string)
177+
print "Seconds to deserialize : ", time.time() - st
178+
179+
st = time.time()
180+
with open(ecmwf_netcdf, 'r') as f:
181+
nc = Dataset(f)
182+
print "Seconds to read from fobj : ", time.time() - st
183+

0 commit comments

Comments
 (0)