Skip to content
This repository was archived by the owner on Jan 9, 2023. It is now read-only.

Commit 378b64a

Browse files
committed
add __len__ method to read_root with chunks
This allows usage in progress bars, such as tqdm: for chunk in tqdm(read_root("filename.root", chunksize=100)): do_something(chunk)
1 parent bf6b624 commit 378b64a

File tree

1 file changed

+17
-11
lines changed

1 file changed

+17
-11
lines changed

root_pandas/readwrite.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -252,19 +252,25 @@ def read_root(paths, key=None, columns=None, ignore=None, chunksize=None, where=
252252
for path in paths:
253253
tchain.Add(path)
254254
n_entries = tchain.GetEntries()
255+
n_chunks = int(ceil(float(n_entries) / chunksize))
255256
# XXX could explicitly clean up the opened TFiles with TChain::Reset
256257

257-
def genchunks():
258-
current_index = 0
259-
for chunk in range(int(ceil(float(n_entries) / chunksize))):
260-
arr = root2array(paths, key, all_vars, start=chunk * chunksize, stop=(chunk+1) * chunksize, selection=where, *args, **kwargs)
261-
if len(arr) == 0:
262-
continue
263-
if flatten:
264-
arr = do_flatten(arr, flatten)
265-
yield convert_to_dataframe(arr, start_index=current_index)
266-
current_index += len(arr)
267-
return genchunks()
258+
class genchunk(object):
259+
def __len__(self):
260+
return n_chunks
261+
262+
def __iter__(self):
263+
current_index = 0
264+
for chunk in range(n_chunks):
265+
arr = root2array(paths, key, all_vars, start=chunk * chunksize, stop=(chunk+1) * chunksize, selection=where, *args, **kwargs)
266+
if len(arr) == 0:
267+
continue
268+
if flatten:
269+
arr = do_flatten(arr, flatten)
270+
yield convert_to_dataframe(arr, start_index=current_index)
271+
current_index += len(arr)
272+
273+
return genchunk()
268274

269275
arr = root2array(paths, key, all_vars, selection=where, *args, **kwargs)
270276
if flatten:

0 commit comments

Comments
 (0)