-
Notifications
You must be signed in to change notification settings - Fork 35
root pandas is now automatically dropping columns... #17
Changes from 2 commits
8adc1da
c031cdc
34f5d0d
3bf916a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
from math import ceil | ||
import re | ||
import ROOT | ||
import warnings | ||
|
||
from .utils import stretch | ||
|
||
|
@@ -147,6 +148,18 @@ def do_flatten(arr): | |
arr = append_fields(arr_, '__array_index', idx, usemask=False, asrecarray=True) | ||
return arr | ||
|
||
def remove_high_dimensions(arr): | ||
allowed_dimensions = [0] | ||
first_row = arr[0] | ||
good_cols = [True if x.ndim in allowed_dimensions else False for x in first_row] | ||
col_names = np.array(list(arr.dtype.names)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you need the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't know, thanks! |
||
good_names = col_names[np.array(good_cols)] | ||
bad_names = col_names[np.array([not x for x in good_cols])] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might make sense to define |
||
for bad_name in bad_names: | ||
warnings.warn("Dropped {bad_name} branch because dimension is unfit for DataFrame" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be better to only emit a single warning in my opinion. How about |
||
.format(bad_name=bad_name), UserWarning) | ||
return arr[good_names] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might be a bit of a problem. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does the FutureWarning make any suggestions regarding alternatives? |
||
|
||
if chunksize: | ||
tchain = ROOT.TChain(key) | ||
for path in paths: | ||
|
@@ -159,13 +172,15 @@ def genchunks(): | |
arr = root2array(paths, key, all_vars, start=chunk * chunksize, stop=(chunk+1) * chunksize, selection=where, *args, **kwargs) | ||
if flatten: | ||
arr = do_flatten(arr) | ||
arr = remove_high_dimensions(arr) | ||
yield convert_to_dataframe(arr) | ||
|
||
return genchunks() | ||
|
||
arr = root2array(paths, key, all_vars, selection=where, *args, **kwargs) | ||
if flatten: | ||
arr = do_flatten(arr) | ||
arr = remove_high_dimensions(arr) | ||
return convert_to_dataframe(arr) | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about
def remove_nonscalar(arr)
to make the purpose a bit clearer?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wanted to leave the name flexible. But I am okay calling it like this, in which case I will also skip the explicit
allowed_dimensions = [0]
constant.