Skip to content
This repository was archived by the owner on Jan 9, 2023. It is now read-only.

Add support for columns of arrays and general tidying #54

Merged
merged 7 commits into from
Feb 3, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 26 additions & 26 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,36 +1,27 @@
#sudo: false
# travis-ci.org build & test configuration
language: python

matrix:
include:
- python: 2.7
env: PYTHON=2.7 ROOT=5.34.32
- python: 2.7
env: PYTHON=2.7 ROOT=6.04
- python: 3.4
env: PYTHON=3.4 ROOT=5.34.32
- python: 3.4
env: PYTHON=3.4 ROOT=6.04
- python: 3.5
env: PYTHON=3.4 ROOT=5.34.32
- python: 3.5
env: PYTHON=3.4 ROOT=6.04
- python: 3.6
env: PYTHON=3.4 ROOT=5.34.32
- python: 3.6
env: PYTHON=3.4 ROOT=6.04
#install: source ci/install.sh
install:
- if [ "${TRAVIS_OS_NAME}" == "osx" ]; then curl --silent http://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh -o miniconda.sh; fi
- if [ "${TRAVIS_OS_NAME}" == "linux" ]; then wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; fi
include:
- python: 2.7
env: PYTHON=2.7 ROOT=5.34.32
- python: 2.7
env: PYTHON=2.7 ROOT=6.04
- python: 3.4
env: PYTHON=3.4 ROOT=5.34.32
- python: 3.4
env: PYTHON=3.4 ROOT=6.04

install:
- if [ "${TRAVIS_OS_NAME}" == "osx" ]; then curl --silent http://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh
-o miniconda.sh; fi
- if [ "${TRAVIS_OS_NAME}" == "linux" ]; then wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-O miniconda.sh; fi
- bash miniconda.sh -b -p $HOME/miniconda
- export PATH="$HOME/miniconda/bin:$PATH"
- hash -r
- conda config --set always_yes yes --set changeps1 no
- conda update -q conda
- conda info -a # Useful for debugging any issues with conda
- conda info -a
- conda config --add channels http://conda.anaconda.org/NLeSC
- conda config --set show_channel_urls yes
- conda create -q -n testenv python=${PYTHON} root=${ROOT} rootpy pandas nose
Expand All @@ -41,7 +32,16 @@ install:
script: nosetests --with-coverage --cover-package=root_pandas

after_success:
- time coveralls
- time coveralls

notifications:
email: false
email: false

deploy:
provider: pypi
user: chrisburr
password:
secure: MyD2Q4zASzpXWaOBnbkGGm7luYB2SrrBVdX4faN0JmSmDcssn/exu2XDAIwhbZhg3uZC4bq7mBUpPiw/3Mx1f5kFgWlnjpnSRDaGhGLLc6rBp9Kqt6IOWcQ64yQ+S6LIuJ+tjbTMJAlNZgy3HDEwBWXKBvectWKJPZdVCenfMPA=
on:
tags: true
branch: master
5 changes: 5 additions & 0 deletions root_pandas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
from .readwrite import read_root
from .readwrite import to_root

__all__ = [
'read_root',
'to_root',
]
78 changes: 52 additions & 26 deletions root_pandas/readwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@


def expand_braces(orig):
r = r'.*(\{.+?[^\\]\})'
r = r'.*?(\{.+[^\\]\})'
p = re.compile(r)

s = orig[:]
Expand All @@ -40,12 +40,10 @@ def expand_braces(orig):
open_brace = s.find(sub)
close_brace = open_brace + len(sub) - 1
if sub.find(',') != -1:
for pat in sub.strip('{}').split(','):
for pat in sub[1:-1].split(','):
res.extend(expand_braces(s[:open_brace] + pat + s[close_brace+1:]))

else:
res.extend(expand_braces(s[:open_brace] + sub.replace('}', '\\}') + s[close_brace+1:]))

else:
res.append(s.replace('\\}', '}'))

Expand All @@ -59,6 +57,7 @@ def get_nonscalar_columns(array):
bad_names = col_names[bad_cols]
return list(bad_names)


def get_matching_variables(branches, patterns, fail=True):
selected = []

Expand Down Expand Up @@ -93,6 +92,30 @@ def filter_noexpand_columns(columns):
return other, noexpand


def do_flatten(arr, flatten):
if flatten is True:
warnings.warn(" The option flatten=True is deprecated. Please specify the branches you would like "
"to flatten in a list: flatten=['foo', 'bar']", FutureWarning)
arr_, idx = stretch(arr, return_indices=True)
else:
nonscalar = get_nonscalar_columns(arr)
fields = [x for x in arr.dtype.names if (x not in nonscalar or x in flatten)]

for col in flatten:
if col in nonscalar:
pass
elif col in fields:
raise ValueError("Requested to flatten {col} but it has a scalar type"
.format(col=col))
else:
raise ValueError("Requested to flatten {col} but it wasn't loaded from the input file"
.format(col=col))

arr_, idx = stretch(arr, fields=fields, return_indices=True)
arr = append_fields(arr_, '__array_index', idx, usemask=False, asrecarray=True)
return arr


def read_root(paths, key=None, columns=None, ignore=None, chunksize=None, where=None, flatten=False, *args, **kwargs):
"""
Read a ROOT file, or list of ROOT files, into a pandas DataFrame.
Expand Down Expand Up @@ -175,22 +198,6 @@ def read_root(paths, key=None, columns=None, ignore=None, chunksize=None, where=
for var in ignored:
all_vars.remove(var)

def do_flatten(arr, flatten):
if flatten is True:
warnings.warn(" The option flatten=True is deprecated. Please specify the branches you would like "
"to flatten in a list: flatten=['foo', 'bar']", FutureWarning)
arr_, idx = stretch(arr, return_indices=True)
else:
nonscalar = get_nonscalar_columns(arr)
fields = [x for x in arr.dtype.names if (x not in nonscalar or x in flatten)]
will_drop = [x for x in arr.dtype.names if x not in fields]
if will_drop:
warnings.warn("Ignored the following non-scalar branches: {bad_names}"
.format(bad_names=", ".join(will_drop)), UserWarning)
arr_, idx = stretch(arr, fields=fields, return_indices=True)
arr = append_fields(arr_, '__array_index', idx, usemask=False, asrecarray=True)
return arr

if chunksize:
tchain = ROOT.TChain(key)
for path in paths:
Expand All @@ -216,26 +223,45 @@ def genchunks():

def convert_to_dataframe(array, start_index=None):
nonscalar_columns = get_nonscalar_columns(array)
if nonscalar_columns:
warnings.warn("Ignored the following non-scalar branches: {bad_names}"
.format(bad_names=", ".join(nonscalar_columns)), UserWarning)
indices = list(filter(lambda x: x.startswith('__index__') and x not in nonscalar_columns, array.dtype.names))

# Columns containing 2D arrays can't be loaded so convert them 1D arrays of arrays
reshaped_columns = {}
for col in nonscalar_columns:
if array[col].ndim >= 2:
reshaped = np.zeros(len(array[col]), dtype='O')
for i, row in enumerate(array[col]):
reshaped[i] = row
reshaped_columns[col] = reshaped

indices = list(filter(lambda x: x.startswith('__index__'), array.dtype.names))
if len(indices) == 0:
index = None
if start_index is not None:
index = RangeIndex(start=start_index, stop=start_index + len(array))
df = DataFrame.from_records(array, exclude=nonscalar_columns, index=index)
df = DataFrame.from_records(array, exclude=reshaped_columns, index=index)
elif len(indices) == 1:
# We store the index under the __index__* branch, where
# * is the name of the index
df = DataFrame.from_records(array, index=indices[0], exclude=nonscalar_columns)
df = DataFrame.from_records(array, exclude=reshaped_columns, index=indices[0])
index_name = indices[0][len('__index__'):]
if not index_name:
# None means the index has no name
index_name = None
df.index.name = index_name
else:
raise ValueError("More than one index found in file")

# Manually the columns which were reshaped
for key, reshaped in reshaped_columns.items():
df[key] = reshaped

# Reshaping can cause the order of columns to change so we have to change it back
if reshaped_columns:
# Filter to remove __index__ columns
columns = [c for c in array.dtype.names if c in df.columns]
assert len(columns) == len(df.columns), (columns, df.columns)
df = df.reindex_axis(columns, axis=1, copy=False)

return df


Expand Down
9 changes: 5 additions & 4 deletions root_pandas/utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# Copyright (c) 2012 rootpy developers and contributors
#
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
Expand All @@ -24,6 +24,7 @@
import numpy as np
VLEN = np.vectorize(len)


def stretch(arr, fields=None, return_indices=False):
"""Stretch an array.
Stretch an array by ``hstack()``-ing multiple array fields while
Expand Down Expand Up @@ -104,5 +105,5 @@ def stretch(arr, fields=None, return_indices=False):
if return_indices:
idx = np.concatenate(list(map(np.arange, len_array)))
return ret, idx

return ret
Binary file added tests/samples/HZZ-lz4.root
Binary file not shown.
Binary file added tests/samples/HZZ-lzma.root
Binary file not shown.
Binary file added tests/samples/HZZ-uncompressed.root
Binary file not shown.
Binary file added tests/samples/HZZ-zlib.root
Binary file not shown.
Binary file added tests/samples/HZZ.root
Binary file not shown.
3 changes: 3 additions & 0 deletions tests/samples/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Files for testing `root_pandas`

These files are taken from [uproot](https://github.com/scikit-hep/uproot/).
Binary file added tests/samples/Zmumu-lz4.root
Binary file not shown.
Binary file added tests/samples/Zmumu-lzma.root
Binary file not shown.
Binary file added tests/samples/Zmumu-uncompressed.root
Binary file not shown.
Binary file added tests/samples/Zmumu-zlib.root
Binary file not shown.
Binary file added tests/samples/Zmumu.root
Binary file not shown.
Binary file added tests/samples/foriter.root
Binary file not shown.
Binary file added tests/samples/foriter2.root
Binary file not shown.
Binary file added tests/samples/histograms.root
Binary file not shown.
Binary file added tests/samples/issue21.root
Binary file not shown.
Binary file added tests/samples/issue30.root
Binary file not shown.
Binary file added tests/samples/issue31.root
Binary file not shown.
Binary file added tests/samples/issue33.root
Binary file not shown.
Binary file added tests/samples/issue38a.root
Binary file not shown.
Binary file added tests/samples/issue38b.root
Binary file not shown.
Binary file added tests/samples/issue49.root
Binary file not shown.
Binary file added tests/samples/mc10events.root
Binary file not shown.
Binary file added tests/samples/nesteddirs.root
Binary file not shown.
Binary file added tests/samples/sample-5.23.02-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-5.23.02-zlib.root
Binary file not shown.
Binary file added tests/samples/sample-5.24.00-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-5.24.00-zlib.root
Binary file not shown.
Binary file added tests/samples/sample-5.25.02-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-5.25.02-zlib.root
Binary file not shown.
Binary file added tests/samples/sample-5.26.00-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-5.26.00-zlib.root
Binary file not shown.
Binary file added tests/samples/sample-5.27.02-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-5.27.02-zlib.root
Binary file not shown.
Binary file added tests/samples/sample-5.28.00-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-5.28.00-zlib.root
Binary file not shown.
Binary file added tests/samples/sample-5.29.02-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-5.29.02-zlib.root
Binary file not shown.
Binary file added tests/samples/sample-5.30.00-lzma.root
Binary file not shown.
Binary file added tests/samples/sample-5.30.00-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-5.30.00-zlib.root
Binary file not shown.
Binary file added tests/samples/sample-6.08.04-lzma.root
Binary file not shown.
Binary file added tests/samples/sample-6.08.04-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-6.08.04-zlib.root
Binary file not shown.
Binary file added tests/samples/sample-6.10.05-lz4.root
Binary file not shown.
Binary file added tests/samples/sample-6.10.05-lzma.root
Binary file not shown.
Binary file added tests/samples/sample-6.10.05-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-6.10.05-zlib.root
Binary file not shown.
Binary file added tests/samples/simple.root
Binary file not shown.
Binary file added tests/samples/small-evnt-tree-fullsplit.root
Binary file not shown.
Binary file added tests/samples/small-flat-tree.root
Binary file not shown.
Loading