@@ -61,16 +61,16 @@ def get_matching_variables(branches, patterns, fail=True):
61
61
return selected
62
62
63
63
64
- def read_root (path , key = None , columns = None , ignore = None , chunksize = None , where = None , flatten = False , * args , ** kwargs ):
64
+ def read_root (paths , key = None , columns = None , ignore = None , chunksize = None , where = None , flatten = False , * args , ** kwargs ):
65
65
"""
66
- Read a ROOT file into a pandas DataFrame.
66
+ Read a ROOT file, or list of ROOT files, into a pandas DataFrame.
67
67
Further *args and *kwargs are passed to root_numpy's root2array.
68
68
If the root file contains a branch matching __index__*, it will become the DataFrame's index.
69
69
70
70
Parameters
71
71
----------
72
- path : string
73
- The path to the root file.
72
+ paths : string or list
73
+ The path(s) to the root file(s)
74
74
key: string
75
75
The key of the tree to load.
76
76
columns: str or sequence of str
@@ -98,16 +98,22 @@ def read_root(path, key=None, columns=None, ignore=None, chunksize=None, where=N
98
98
>>> df = read_root('test.root', 'MyTree', columns=['A{B,C}*', 'D'], where='ABB > 100')
99
99
100
100
"""
101
+
102
+ if not isinstance (paths , list ):
103
+ paths = [paths ]
104
+ # Use a single file to search for trees and branches
105
+ seed_path = paths [0 ]
106
+
101
107
if not key :
102
- trees = list_trees (path )
108
+ trees = list_trees (seed_path )
103
109
if len (trees ) == 1 :
104
110
key = trees [0 ]
105
111
elif len (trees ) == 0 :
106
- raise ValueError ('No trees found in {}' .format (path ))
112
+ raise ValueError ('No trees found in {}' .format (seed_path ))
107
113
else :
108
- raise ValueError ('More than one tree found in {}' .format (path ))
114
+ raise ValueError ('More than one tree found in {}' .format (seed_path ))
109
115
110
- branches = list_branches (path , key )
116
+ branches = list_branches (seed_path , key )
111
117
112
118
if not columns :
113
119
all_vars = branches
@@ -139,20 +145,22 @@ def do_flatten(arr):
139
145
return arr
140
146
141
147
if chunksize :
142
- f = ROOT .TFile .Open (path )
143
- n_entries = f .Get (key ).GetEntries ()
144
- f .Close ()
148
+ tchain = ROOT .TChain (key )
149
+ for path in paths :
150
+ tchain .Add (path )
151
+ n_entries = tchain .GetEntries ()
152
+ # XXX could explicitly clean up the opened TFiles with TChain::Reset
145
153
146
154
def genchunks ():
147
155
for chunk in range (int (ceil (float (n_entries ) / chunksize ))):
148
- arr = root2array (path , key , all_vars , start = chunk * chunksize , stop = (chunk + 1 ) * chunksize , selection = where , * args , ** kwargs )
156
+ arr = root2array (paths , key , all_vars , start = chunk * chunksize , stop = (chunk + 1 ) * chunksize , selection = where , * args , ** kwargs )
149
157
if flatten :
150
158
arr = do_flatten (arr )
151
159
yield convert_to_dataframe (arr )
152
160
153
161
return genchunks ()
154
162
155
- arr = root2array (path , key , all_vars , selection = where , * args , ** kwargs )
163
+ arr = root2array (paths , key , all_vars , selection = where , * args , ** kwargs )
156
164
if flatten :
157
165
arr = do_flatten (arr )
158
166
return convert_to_dataframe (arr )
0 commit comments