1
+ import numpy as np
2
+ import unicodedata
3
+
4
+ NULL = '\x00 '
5
+ NC_BYTE = '\x00 \x00 \x00 \x01 '
6
+ NC_CHAR = '\x00 \x00 \x00 \x02 '
7
+ NC_SHORT = '\x00 \x00 \x00 \x03 '
8
+ # netCDF-3 only supports 32-bit integers
9
+ NC_INT = '\x00 \x00 \x00 \x04 '
10
+ NC_FLOAT = '\x00 \x00 \x00 \x05 '
11
+ NC_DOUBLE = '\x00 \x00 \x00 \x06 '
12
+
13
+ # Map between netCDF type and numpy dtype and vice versa. Due to a bug
14
+ # in the __hash__() method of numpy dtype objects (fixed in development
15
+ # release of numpy), we need to explicitly match byteorder for dict
16
+ # lookups to succeed. Here we normalize to native byte order.
17
+ #
18
+ # NC_CHAR is a special case because netCDF represents strings as
19
+ # character arrays. When NC_CHAR is encountered as the type of an
20
+ # attribute value, this TYPEMAP is not consulted and the data is read
21
+ # as a string. However, when NC_CHAR is encountered as the type of a
22
+ # variable, then the data is read is a numpy array of 1-char elements
23
+ # (equivalently, length-1 raw "strings"). There is no support for numpy
24
+ # arrays of multi-character strings.
25
+ TYPEMAP = {
26
+ # we could use np.dtype's as key/values except __hash__ comparison of
27
+ # numpy.dtype is broken in older versions of numpy. If you must compare
28
+ # and cannot upgrade, use __eq__.This bug is
29
+ # known to be fixed in numpy version 1.3
30
+ NC_BYTE : 'int8' ,
31
+ NC_CHAR : '|S1' ,
32
+ NC_SHORT : 'int16' ,
33
+ NC_INT : 'int32' ,
34
+ NC_FLOAT : 'float32' ,
35
+ NC_DOUBLE : 'float64' ,
36
+ }
37
+ for k in TYPEMAP .keys ():
38
+ TYPEMAP [TYPEMAP [k ]] = k
39
+
40
+ # Special characters that are permitted in netCDF names except in the
41
+ # 0th position of the string
42
+ _specialchars = '_.@+- !"#$%&\()*,:;<=>?[]^`{|}~'
43
+
44
+ # The following are reserved names in CDL and may not be used as names of
45
+ # variables, dimension, attributes
46
+ _reserved_names = set ([
47
+ 'byte' ,
48
+ 'char' ,
49
+ 'short' ,
50
+ 'ushort' ,
51
+ 'int' ,
52
+ 'uint' ,
53
+ 'int64' ,
54
+ 'uint64' ,
55
+ 'float'
56
+ 'real' ,
57
+ 'double' ,
58
+ 'bool' ,
59
+ 'string' ,
60
+ ])
61
+
62
+ def coerce_type (arr ):
63
+ """Coerce a numeric data type to a type that is compatible with
64
+ netCDF-3
65
+
66
+ netCDF-3 can not handle 64-bit integers, but on most platforms
67
+ Python integers are int64. To work around this discrepancy, this
68
+ helper function coerces int64 arrays to int32. An exception is
69
+ raised if this coercion is not safe.
70
+
71
+ netCDF-3 can not handle booleans, but booleans can be trivially
72
+ (albeit wastefully) represented as bytes. To work around this
73
+ discrepancy, this helper function coerces bool arrays to int8.
74
+ """
75
+ # Comparing the char attributes of numpy dtypes is inelegant, but this is
76
+ # the fastest test of equivalence that is invariant to endianness
77
+ if arr .dtype .char == 'l' : # np.dtype('int64')
78
+ cast_arr = arr .astype (
79
+ np .dtype ('int32' ).newbyteorder (arr .dtype .byteorder ))
80
+ if not (cast_arr == arr ).all ():
81
+ raise ValueError ("array contains integer values that " +
82
+ "are not representable as 32-bit signed integers" )
83
+ return cast_arr
84
+ elif arr .dtype .char == '?' : # np.dtype('bool')
85
+ # bool
86
+ cast_arr = arr .astype (
87
+ np .dtype ('int8' ).newbyteorder (arr .dtype .byteorder ))
88
+ return cast_arr
89
+ else :
90
+ return arr
91
+
92
+ def _isalnumMUTF8 (c ):
93
+ """Return True if the given UTF-8 encoded character is alphanumeric
94
+ or multibyte.
95
+
96
+ Input is not checked!
97
+ """
98
+ return (c .isalnum () or (len (c .encode ('utf-8' )) > 1 ))
99
+
100
+ def is_valid_name (s ):
101
+ """Test whether an object can be validly converted to a netCDF
102
+ dimension, variable or attribute name
103
+
104
+ Earlier versions of the netCDF C-library reference implementation
105
+ enforced a more restricted set of characters in creating new names,
106
+ but permitted reading names containing arbitrary bytes. This
107
+ specification extends the permitted characters in names to include
108
+ multi-byte UTF-8 encoded Unicode and additional printing characters
109
+ from the US-ASCII alphabet. The first character of a name must be
110
+ alphanumeric, a multi-byte UTF-8 character, or '_' (reserved for
111
+ special names with meaning to implementations, such as the
112
+ "_FillValue" attribute). Subsequent characters may also include
113
+ printing special characters, except for '/' which is not allowed in
114
+ names. Names that have trailing space characters are also not
115
+ permitted.
116
+ """
117
+ if not isinstance (s , basestring ):
118
+ return False
119
+ if not isinstance (s , unicode ):
120
+ s = unicode (s , 'utf-8' )
121
+ num_bytes = len (s .encode ('utf-8' ))
122
+ return ((unicodedata .normalize ('NFC' , s ) == s ) and
123
+ (s not in _reserved_names ) and
124
+ (num_bytes >= 0 ) and
125
+ ('/' not in s ) and
126
+ (s [- 1 ] != ' ' ) and
127
+ (_isalnumMUTF8 (s [0 ]) or (s [0 ] == '_' )) and
128
+ all ((_isalnumMUTF8 (c ) or c in _specialchars for c in s ))
129
+ )
0 commit comments