Skip to content
This repository was archived by the owner on Jan 9, 2023. It is now read-only.

Commit d161d5a

Browse files
alexpearceibab
authored andcommitted
Use noexpand: prefix for formula branches
root_numpy supports branch names to be formulas, such as 2*sqrt(x), which are parsed by ROOT’s TFormula class. The use of * by root_pandas for branch name matching conflicted with this behaviour. This commit adds the NOEXPAND_PREFIX string, which causes column names to skip the branch name matching mechanisms. Closes #14.
1 parent d7e2956 commit d161d5a

File tree

2 files changed

+47
-2
lines changed

2 files changed

+47
-2
lines changed

root_pandas/readwrite.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
'to_root',
2525
]
2626

27+
NOEXPAND_PREFIX = 'noexpand:'
28+
2729

2830
def expand_braces(orig):
2931
r = r'.*(\{.+?[^\\]\})'
@@ -72,6 +74,25 @@ def get_matching_variables(branches, patterns, fail=True):
7274
return selected
7375

7476

77+
def filter_noexpand_columns(columns):
78+
"""Return columns not containing and containing the noexpand prefix.
79+
80+
Parameters
81+
----------
82+
columns: sequence of str
83+
A sequence of strings to be split
84+
85+
Returns
86+
-------
87+
Two lists, the first containing strings without the noexpand prefix, the
88+
second containing those that do with the prefix filtered out.
89+
"""
90+
prefix_len = len(NOEXPAND_PREFIX)
91+
noexpand = [c[prefix_len:] for c in columns if c.startswith(NOEXPAND_PREFIX)]
92+
other = [c for c in columns if not c.startswith(NOEXPAND_PREFIX)]
93+
return other, noexpand
94+
95+
7596
def read_root(paths, key=None, columns=None, ignore=None, chunksize=None, where=None, flatten=False, *args, **kwargs):
7697
"""
7798
Read a ROOT file, or list of ROOT files, into a pandas DataFrame.
@@ -86,6 +107,9 @@ def read_root(paths, key=None, columns=None, ignore=None, chunksize=None, where=
86107
The key of the tree to load.
87108
columns: str or sequence of str
88109
A sequence of shell-patterns (can contain *, ?, [] or {}). Matching columns are read.
110+
The columns beginning with `noexpand:` are not interpreted as shell-patterns,
111+
allowing formula columns such as `noexpand:2*x`. The column in the returned DataFrame
112+
will not have the `noexpand:` prefix.
89113
ignore: str or sequence of str
90114
A sequence of shell-patterns (can contain *, ?, [] or {}). All matching columns are ignored (overriding the columns argument).
91115
chunksize: int
@@ -137,8 +161,9 @@ def read_root(paths, key=None, columns=None, ignore=None, chunksize=None, where=
137161
if index_branches:
138162
columns = columns[:]
139163
columns.append(index_branches[0])
164+
columns, noexpand = filter_noexpand_columns(columns)
140165
columns = list(itertools.chain.from_iterable(list(map(expand_braces, columns))))
141-
all_vars = get_matching_variables(branches, columns)
166+
all_vars = get_matching_variables(branches, columns) + noexpand
142167

143168
if ignore:
144169
if isinstance(ignore, string_types):

tests/test.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,4 +204,24 @@ def test_drop_nonscalar_columns():
204204
assert(np.all(df.a.values == np.array([3, 2])))
205205
assert(np.all(df.d.values == np.array([True, False])))
206206

207-
os.remove(path)
207+
os.remove(path)
208+
209+
def test_noexpand_prefix():
210+
xs = np.array([1, 2, 3])
211+
df = pd.DataFrame({'x': xs})
212+
df.to_root('tmp.root')
213+
214+
# Not using the prefix should throw, as there's no matching branch name
215+
try:
216+
df = read_root('tmp.root', columns=['2*x'])
217+
except ValueError:
218+
pass
219+
else:
220+
assert False
221+
222+
# Could also use TMath::Sqrt here
223+
df = read_root('tmp.root', columns=['noexpand:2*sqrt(x)'])
224+
# Note that the column name shouldn't have the noexpand prefix
225+
assert np.all(df['2*sqrt(x)'].values == 2*np.sqrt(xs))
226+
227+
os.remove('tmp.root')

0 commit comments

Comments
 (0)