13
13
import re
14
14
import ROOT
15
15
16
+
16
17
__all__ = ['read_root' ]
17
18
19
+
18
20
def expand_braces (orig ):
19
21
r = r'.*(\{.+?[^\\]\})'
20
22
p = re .compile (r )
@@ -39,21 +41,22 @@ def expand_braces(orig):
39
41
40
42
return list (set (res ))
41
43
42
- def get_matching_variables (branches , patterns , fail = True ):
43
44
45
+ def get_matching_variables (branches , patterns , fail = True ):
44
46
selected = []
45
47
46
48
for p in patterns :
47
49
found = False
48
50
for b in branches :
49
51
if fnmatch (b , p ):
50
52
found = True
51
- if fnmatch (b , p ) and not b in selected :
53
+ if fnmatch (b , p ) and b not in selected :
52
54
selected .append (b )
53
55
if not found and fail :
54
56
raise ValueError ("Pattern '{}' didn't match any branch" .format (p ))
55
57
return selected
56
58
59
+
57
60
def read_root (path , key = None , columns = None , ignore = None , chunksize = None , where = None , * args , ** kwargs ):
58
61
"""
59
62
Read a ROOT file into a pandas DataFrame.
@@ -124,6 +127,7 @@ def read_root(path, key=None, columns=None, ignore=None, chunksize=None, where=N
124
127
f = ROOT .TFile (path )
125
128
n_entries = f .Get (key ).GetEntries ()
126
129
f .Close ()
130
+
127
131
def genchunks ():
128
132
for chunk in range (int (ceil (float (n_entries ) / chunksize ))):
129
133
arr = root2array (path , key , all_vars , start = chunk * chunksize , stop = (chunk + 1 ) * chunksize , selection = where , * args , ** kwargs )
@@ -133,6 +137,7 @@ def genchunks():
133
137
arr = root2array (path , key , all_vars , selection = where , * args , ** kwargs )
134
138
return convert_to_dataframe (arr )
135
139
140
+
136
141
def convert_to_dataframe (array ):
137
142
indices = list (filter (lambda x : x .startswith ('__index__' ), array .dtype .names ))
138
143
if len (indices ) == 0 :
@@ -150,6 +155,7 @@ def convert_to_dataframe(array):
150
155
raise ValueError ("More than one index found in file" )
151
156
return df
152
157
158
+
153
159
def to_root (df , path , key = 'default' , mode = 'w' , * args , ** kwargs ):
154
160
"""
155
161
Write DataFrame to a ROOT file.
@@ -162,15 +168,15 @@ def to_root(df, path, key='default', mode='w', *args, **kwargs):
162
168
Name of tree that the DataFrame will be saved as
163
169
mode: string, {'w', 'a'}
164
170
Mode that the file should be opened in (default: 'w')
165
-
171
+
166
172
Notes
167
173
-----
168
174
169
175
Further *args and *kwargs are passed to root_numpy's array2root.
170
176
171
177
>>> df = DataFrame({'x': [1,2,3], 'y': [4,5,6]})
172
178
>>> df.to_root('test.root')
173
-
179
+
174
180
The DataFrame index will be saved as a branch called '__index__*',
175
181
where * is the name of the index in the original DataFrame
176
182
"""
@@ -193,6 +199,6 @@ def to_root(df, path, key='default', mode='w', *args, **kwargs):
193
199
arr = df_ .to_records (index = False )
194
200
array2root (arr , path , key , mode = mode , * args , ** kwargs )
195
201
202
+
196
203
# Patch pandas DataFrame to support to_root method
197
204
DataFrame .to_root = to_root
198
-
0 commit comments