Skip to content
This repository was archived by the owner on Jan 9, 2023. It is now read-only.

Commit 4ab672f

Browse files
authored
Merge pull request #54 from chrisburr/develop
Add support for columns of arrays and general tidying
2 parents 1dc249c + 354bc15 commit 4ab672f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+215
-84
lines changed

.travis.yml

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,27 @@
1-
#sudo: false
2-
# travis-ci.org build & test configuration
31
language: python
42

53
matrix:
6-
include:
7-
- python: 2.7
8-
env: PYTHON=2.7 ROOT=5.34.32
9-
- python: 2.7
10-
env: PYTHON=2.7 ROOT=6.04
11-
- python: 3.4
12-
env: PYTHON=3.4 ROOT=5.34.32
13-
- python: 3.4
14-
env: PYTHON=3.4 ROOT=6.04
15-
- python: 3.5
16-
env: PYTHON=3.4 ROOT=5.34.32
17-
- python: 3.5
18-
env: PYTHON=3.4 ROOT=6.04
19-
- python: 3.6
20-
env: PYTHON=3.4 ROOT=5.34.32
21-
- python: 3.6
22-
env: PYTHON=3.4 ROOT=6.04
23-
#install: source ci/install.sh
24-
install:
25-
- if [ "${TRAVIS_OS_NAME}" == "osx" ]; then curl --silent http://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh -o miniconda.sh; fi
26-
- if [ "${TRAVIS_OS_NAME}" == "linux" ]; then wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; fi
4+
include:
5+
- python: 2.7
6+
env: PYTHON=2.7 ROOT=5.34.32
7+
- python: 2.7
8+
env: PYTHON=2.7 ROOT=6.04
9+
- python: 3.4
10+
env: PYTHON=3.4 ROOT=5.34.32
11+
- python: 3.4
12+
env: PYTHON=3.4 ROOT=6.04
2713

14+
install:
15+
- if [ "${TRAVIS_OS_NAME}" == "osx" ]; then curl --silent http://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh
16+
-o miniconda.sh; fi
17+
- if [ "${TRAVIS_OS_NAME}" == "linux" ]; then wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
18+
-O miniconda.sh; fi
2819
- bash miniconda.sh -b -p $HOME/miniconda
2920
- export PATH="$HOME/miniconda/bin:$PATH"
3021
- hash -r
3122
- conda config --set always_yes yes --set changeps1 no
3223
- conda update -q conda
33-
- conda info -a # Useful for debugging any issues with conda
24+
- conda info -a
3425
- conda config --add channels http://conda.anaconda.org/NLeSC
3526
- conda config --set show_channel_urls yes
3627
- conda create -q -n testenv python=${PYTHON} root=${ROOT} rootpy pandas nose
@@ -41,7 +32,16 @@ install:
4132
script: nosetests --with-coverage --cover-package=root_pandas
4233

4334
after_success:
44-
- time coveralls
35+
- time coveralls
4536

4637
notifications:
47-
email: false
38+
email: false
39+
40+
deploy:
41+
provider: pypi
42+
user: chrisburr
43+
password:
44+
secure: MyD2Q4zASzpXWaOBnbkGGm7luYB2SrrBVdX4faN0JmSmDcssn/exu2XDAIwhbZhg3uZC4bq7mBUpPiw/3Mx1f5kFgWlnjpnSRDaGhGLLc6rBp9Kqt6IOWcQ64yQ+S6LIuJ+tjbTMJAlNZgy3HDEwBWXKBvectWKJPZdVCenfMPA=
45+
on:
46+
tags: true
47+
branch: master

root_pandas/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,7 @@
11
from .readwrite import read_root
22
from .readwrite import to_root
3+
4+
__all__ = [
5+
'read_root',
6+
'to_root',
7+
]

root_pandas/readwrite.py

Lines changed: 52 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929

3030
def expand_braces(orig):
31-
r = r'.*(\{.+?[^\\]\})'
31+
r = r'.*?(\{.+[^\\]\})'
3232
p = re.compile(r)
3333

3434
s = orig[:]
@@ -40,12 +40,10 @@ def expand_braces(orig):
4040
open_brace = s.find(sub)
4141
close_brace = open_brace + len(sub) - 1
4242
if sub.find(',') != -1:
43-
for pat in sub.strip('{}').split(','):
43+
for pat in sub[1:-1].split(','):
4444
res.extend(expand_braces(s[:open_brace] + pat + s[close_brace+1:]))
45-
4645
else:
4746
res.extend(expand_braces(s[:open_brace] + sub.replace('}', '\\}') + s[close_brace+1:]))
48-
4947
else:
5048
res.append(s.replace('\\}', '}'))
5149

@@ -59,6 +57,7 @@ def get_nonscalar_columns(array):
5957
bad_names = col_names[bad_cols]
6058
return list(bad_names)
6159

60+
6261
def get_matching_variables(branches, patterns, fail=True):
6362
selected = []
6463

@@ -93,6 +92,30 @@ def filter_noexpand_columns(columns):
9392
return other, noexpand
9493

9594

95+
def do_flatten(arr, flatten):
96+
if flatten is True:
97+
warnings.warn(" The option flatten=True is deprecated. Please specify the branches you would like "
98+
"to flatten in a list: flatten=['foo', 'bar']", FutureWarning)
99+
arr_, idx = stretch(arr, return_indices=True)
100+
else:
101+
nonscalar = get_nonscalar_columns(arr)
102+
fields = [x for x in arr.dtype.names if (x not in nonscalar or x in flatten)]
103+
104+
for col in flatten:
105+
if col in nonscalar:
106+
pass
107+
elif col in fields:
108+
raise ValueError("Requested to flatten {col} but it has a scalar type"
109+
.format(col=col))
110+
else:
111+
raise ValueError("Requested to flatten {col} but it wasn't loaded from the input file"
112+
.format(col=col))
113+
114+
arr_, idx = stretch(arr, fields=fields, return_indices=True)
115+
arr = append_fields(arr_, '__array_index', idx, usemask=False, asrecarray=True)
116+
return arr
117+
118+
96119
def read_root(paths, key=None, columns=None, ignore=None, chunksize=None, where=None, flatten=False, *args, **kwargs):
97120
"""
98121
Read a ROOT file, or list of ROOT files, into a pandas DataFrame.
@@ -175,22 +198,6 @@ def read_root(paths, key=None, columns=None, ignore=None, chunksize=None, where=
175198
for var in ignored:
176199
all_vars.remove(var)
177200

178-
def do_flatten(arr, flatten):
179-
if flatten is True:
180-
warnings.warn(" The option flatten=True is deprecated. Please specify the branches you would like "
181-
"to flatten in a list: flatten=['foo', 'bar']", FutureWarning)
182-
arr_, idx = stretch(arr, return_indices=True)
183-
else:
184-
nonscalar = get_nonscalar_columns(arr)
185-
fields = [x for x in arr.dtype.names if (x not in nonscalar or x in flatten)]
186-
will_drop = [x for x in arr.dtype.names if x not in fields]
187-
if will_drop:
188-
warnings.warn("Ignored the following non-scalar branches: {bad_names}"
189-
.format(bad_names=", ".join(will_drop)), UserWarning)
190-
arr_, idx = stretch(arr, fields=fields, return_indices=True)
191-
arr = append_fields(arr_, '__array_index', idx, usemask=False, asrecarray=True)
192-
return arr
193-
194201
if chunksize:
195202
tchain = ROOT.TChain(key)
196203
for path in paths:
@@ -216,26 +223,45 @@ def genchunks():
216223

217224
def convert_to_dataframe(array, start_index=None):
218225
nonscalar_columns = get_nonscalar_columns(array)
219-
if nonscalar_columns:
220-
warnings.warn("Ignored the following non-scalar branches: {bad_names}"
221-
.format(bad_names=", ".join(nonscalar_columns)), UserWarning)
222-
indices = list(filter(lambda x: x.startswith('__index__') and x not in nonscalar_columns, array.dtype.names))
226+
227+
# Columns containing 2D arrays can't be loaded so convert them 1D arrays of arrays
228+
reshaped_columns = {}
229+
for col in nonscalar_columns:
230+
if array[col].ndim >= 2:
231+
reshaped = np.zeros(len(array[col]), dtype='O')
232+
for i, row in enumerate(array[col]):
233+
reshaped[i] = row
234+
reshaped_columns[col] = reshaped
235+
236+
indices = list(filter(lambda x: x.startswith('__index__'), array.dtype.names))
223237
if len(indices) == 0:
224238
index = None
225239
if start_index is not None:
226240
index = RangeIndex(start=start_index, stop=start_index + len(array))
227-
df = DataFrame.from_records(array, exclude=nonscalar_columns, index=index)
241+
df = DataFrame.from_records(array, exclude=reshaped_columns, index=index)
228242
elif len(indices) == 1:
229243
# We store the index under the __index__* branch, where
230244
# * is the name of the index
231-
df = DataFrame.from_records(array, index=indices[0], exclude=nonscalar_columns)
245+
df = DataFrame.from_records(array, exclude=reshaped_columns, index=indices[0])
232246
index_name = indices[0][len('__index__'):]
233247
if not index_name:
234248
# None means the index has no name
235249
index_name = None
236250
df.index.name = index_name
237251
else:
238252
raise ValueError("More than one index found in file")
253+
254+
# Manually the columns which were reshaped
255+
for key, reshaped in reshaped_columns.items():
256+
df[key] = reshaped
257+
258+
# Reshaping can cause the order of columns to change so we have to change it back
259+
if reshaped_columns:
260+
# Filter to remove __index__ columns
261+
columns = [c for c in array.dtype.names if c in df.columns]
262+
assert len(columns) == len(df.columns), (columns, df.columns)
263+
df = df.reindex_axis(columns, axis=1, copy=False)
264+
239265
return df
240266

241267

root_pandas/utils.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
# Copyright (c) 2012 rootpy developers and contributors
2-
#
2+
#
33
# Permission is hereby granted, free of charge, to any person obtaining a copy of
44
# this software and associated documentation files (the "Software"), to deal in
55
# the Software without restriction, including without limitation the rights to
66
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
77
# the Software, and to permit persons to whom the Software is furnished to do so,
88
# subject to the following conditions:
9-
#
9+
#
1010
# The above copyright notice and this permission notice shall be included in all
1111
# copies or substantial portions of the Software.
12-
#
12+
#
1313
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1414
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
1515
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
@@ -24,6 +24,7 @@
2424
import numpy as np
2525
VLEN = np.vectorize(len)
2626

27+
2728
def stretch(arr, fields=None, return_indices=False):
2829
"""Stretch an array.
2930
Stretch an array by ``hstack()``-ing multiple array fields while
@@ -104,5 +105,5 @@ def stretch(arr, fields=None, return_indices=False):
104105
if return_indices:
105106
idx = np.concatenate(list(map(np.arange, len_array)))
106107
return ret, idx
107-
108+
108109
return ret

tests/samples/HZZ-lz4.root

280 KB
Binary file not shown.

tests/samples/HZZ-lzma.root

180 KB
Binary file not shown.

tests/samples/HZZ-uncompressed.root

666 KB
Binary file not shown.

tests/samples/HZZ-zlib.root

217 KB
Binary file not shown.

tests/samples/HZZ.root

213 KB
Binary file not shown.

tests/samples/README.md

Lines changed: 3 additions & 0 deletions

tests/samples/Zmumu-lz4.root

208 KB
Binary file not shown.

tests/samples/Zmumu-lzma.root

165 KB
Binary file not shown.

tests/samples/Zmumu-uncompressed.root

338 KB
Binary file not shown.

tests/samples/Zmumu-zlib.root

175 KB
Binary file not shown.

tests/samples/Zmumu.root

175 KB
Binary file not shown.

tests/samples/foriter.root

5.82 KB
Binary file not shown.

tests/samples/foriter2.root

6.06 KB
Binary file not shown.

tests/samples/histograms.root

5.24 KB
Binary file not shown.

tests/samples/issue21.root

8.13 KB
Binary file not shown.

tests/samples/issue30.root

5.92 KB
Binary file not shown.

tests/samples/issue31.root

7.23 KB
Binary file not shown.

tests/samples/issue33.root

3.86 KB
Binary file not shown.

tests/samples/issue38a.root

7.41 KB
Binary file not shown.

tests/samples/issue38b.root

9.98 KB
Binary file not shown.

tests/samples/issue49.root

77.3 KB
Binary file not shown.

tests/samples/mc10events.root

177 KB
Binary file not shown.

tests/samples/nesteddirs.root

44.5 KB
Binary file not shown.
65.7 KB
Binary file not shown.
48 KB
Binary file not shown.
65.7 KB
Binary file not shown.
48 KB
Binary file not shown.
65.7 KB
Binary file not shown.
48 KB
Binary file not shown.
65.8 KB
Binary file not shown.
48 KB
Binary file not shown.
65.9 KB
Binary file not shown.
48.2 KB
Binary file not shown.
65.9 KB
Binary file not shown.
48.2 KB
Binary file not shown.
65.9 KB
Binary file not shown.
48.1 KB
Binary file not shown.
47.2 KB
Binary file not shown.
66.1 KB
Binary file not shown.
48.3 KB
Binary file not shown.
47.4 KB
Binary file not shown.
Binary file not shown.
48.3 KB
Binary file not shown.

tests/samples/sample-6.10.05-lz4.root

49.1 KB
Binary file not shown.
47.4 KB
Binary file not shown.
Binary file not shown.
48.3 KB
Binary file not shown.

tests/samples/simple.root

5.48 KB
Binary file not shown.
32.6 KB
Binary file not shown.

tests/samples/small-flat-tree.root

15.1 KB
Binary file not shown.

0 commit comments

Comments
 (0)