Skip to content

Commit 8748339

Browse files
committed
ENH: Allow SparseDataFrame/SparseSeries values assignment
Also fix .where for sparse blocks. Discrepancy comes from: dense_frame._data.blocks[0].values # this is 2D even for 1D block sparse_frame._data.blocks[0].values # this is always 1D I'm sure this had worked before and was unneeded in Oct 2017.
1 parent 22b0346 commit 8748339

File tree

11 files changed

+202
-95
lines changed

11 files changed

+202
-95
lines changed

doc/source/whatsnew/v0.24.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ Other Enhancements
175175
(:issue:`21627`)
176176
- New method :meth:`HDFStore.walk` will recursively walk the group hierarchy of an HDF5 file (:issue:`10932`)
177177
- :func:`read_html` copies cell data across ``colspan`` and ``rowspan``, and it treats all-``th`` table rows as headers if ``header`` kwarg is not given and there is no ``thead`` (:issue:`17054`)
178+
- :class:`SparseDataFrame` and :class:`SparseSeries` support value assignment (:issue:`21818`)
178179
- :meth:`Series.nlargest`, :meth:`Series.nsmallest`, :meth:`DataFrame.nlargest`, and :meth:`DataFrame.nsmallest` now accept the value ``"all"`` for the ``keep`` argument. This keeps all ties for the nth largest/smallest value (:issue:`16818`)
179180
- :class:`IntervalIndex` has gained the :meth:`~IntervalIndex.set_closed` method to change the existing ``closed`` value (:issue:`21670`)
180181
- :func:`~DataFrame.to_csv`, :func:`~Series.to_csv`, :func:`~DataFrame.to_json`, and :func:`~Series.to_json` now support ``compression='infer'`` to infer compression based on filename extension (:issue:`15008`).

pandas/core/frame.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2548,9 +2548,7 @@ def set_value(self, index, col, value, takeable=False):
25482548
25492549
Returns
25502550
-------
2551-
frame : DataFrame
2552-
If label pair is contained, will be reference to calling DataFrame,
2553-
otherwise a new object
2551+
self : DataFrame
25542552
"""
25552553
warnings.warn("set_value is deprecated and will be removed "
25562554
"in a future release. Please use "

pandas/core/internals/blocks.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -921,6 +921,9 @@ def _is_empty_indexer(indexer):
921921
if _is_empty_indexer(indexer):
922922
pass
923923

924+
elif is_sparse(values):
925+
values = values.set_values(indexer, value)
926+
924927
# setting a single element for each dim and with a rhs that could
925928
# be say a list
926929
# GH 6043
@@ -1494,6 +1497,11 @@ def where(self, other, cond, align=True, errors='raise',
14941497
raise ValueError("where must have a condition that is ndarray "
14951498
"like")
14961499

1500+
# For SparseBlock, self.values is always 1D. If cond was a frame,
1501+
# it's 2D values would incorrectly broadcast later on.
1502+
if values.ndim == 1 and any(ax == 1 for ax in cond.shape):
1503+
cond = cond.ravel()
1504+
14971505
# our where function
14981506
def func(cond, values, other):
14991507
if cond.ravel().all():
@@ -1844,6 +1852,11 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0,
18441852
new_values = self.values if inplace else self.copy().values
18451853
new_values, _, new, _ = self._try_coerce_args(new_values, new)
18461854

1855+
if is_sparse(new_values):
1856+
indexer = mask.to_dense().values.ravel().nonzero()[0]
1857+
block = self.setitem(indexer, new)
1858+
return [block]
1859+
18471860
if isinstance(new, np.ndarray) and len(new) == len(mask):
18481861
new = new[mask]
18491862

@@ -3154,6 +3167,17 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
31543167
return self.make_block_same_class(values=values,
31553168
placement=self.mgr_locs)
31563169

3170+
def _can_hold_element(self, element):
3171+
return np.can_cast(np.asarray(element).dtype, self.sp_values.dtype)
3172+
3173+
def _try_coerce_result(self, result):
3174+
if (isinstance(result, np.ndarray) and
3175+
np.ndim(result) == 1 and
3176+
not is_sparse(result)):
3177+
result = SparseArray(result, kind=self.kind,
3178+
fill_value=self.fill_value)
3179+
return result
3180+
31573181
def __len__(self):
31583182
try:
31593183
return self.sp_index.length

pandas/core/series.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,9 +1071,7 @@ def set_value(self, label, value, takeable=False):
10711071
10721072
Returns
10731073
-------
1074-
series : Series
1075-
If label is contained, will be reference to calling Series,
1076-
otherwise a new object
1074+
self : Series
10771075
"""
10781076
warnings.warn("set_value is deprecated and will be removed "
10791077
"in a future release. Please use "

pandas/core/sparse/array.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import pandas.core.algorithms as algos
3838
import pandas.core.ops as ops
3939
import pandas.io.formats.printing as printing
40+
from pandas.errors import PerformanceWarning
4041
from pandas.util._decorators import Appender
4142
from pandas.core.indexes.base import _index_shared_docs
4243

@@ -369,6 +370,53 @@ def get_values(self, fill=None):
369370
""" return a dense representation """
370371
return self.to_dense(fill=fill)
371372

373+
def set_values(self, indexer, value):
374+
"""
375+
Return new SparseArray with indexed values set to `value`.
376+
377+
Returns
378+
-------
379+
SparseArray
380+
A new sparse array with indexer positions filled with value.
381+
"""
382+
# If indexer is not a single int position, easiest to handle via dense
383+
if not is_scalar(indexer):
384+
warnings.warn(
385+
'Setting SparseSeries/Array values is particularly '
386+
'inefficient when indexing with multiple keys because the '
387+
'whole series is made dense interim.',
388+
PerformanceWarning, stacklevel=2)
389+
390+
values = self.to_dense()
391+
values[indexer] = value
392+
return SparseArray(values, kind=self.kind,
393+
fill_value=self.fill_value)
394+
395+
warnings.warn(
396+
'Setting SparseSeries/Array values is inefficient '
397+
'(a copy of data is made).', PerformanceWarning, stacklevel=2)
398+
399+
# If label already in sparse index, just switch the value on a copy
400+
idx = self.sp_index.lookup(indexer)
401+
if idx != -1:
402+
obj = self.copy()
403+
obj.sp_values[idx] = value
404+
return obj
405+
406+
# Otherwise, construct a new array, and insert the new value in the
407+
# correct position
408+
indices = self.sp_index.to_int_index().indices
409+
pos = np.searchsorted(indices, indexer)
410+
411+
indices = np.insert(indices, pos, indexer)
412+
sp_values = np.insert(self.sp_values, pos, value)
413+
# Length can be increased when adding a new value into index
414+
length = max(self.sp_index.length, indexer + 1)
415+
sp_index = _make_index(length, indices, self.kind)
416+
417+
return SparseArray(sp_values, sparse_index=sp_index,
418+
fill_value=self.fill_value)
419+
372420
def to_dense(self, fill=None):
373421
"""
374422
Convert SparseArray to a NumPy array.
@@ -544,6 +592,10 @@ def astype(self, dtype=None, copy=True):
544592
return self._simple_new(sp_values, self.sp_index,
545593
fill_value=fill_value)
546594

595+
def tolist(self):
596+
"""Return *dense* self as list"""
597+
return self.values.tolist()
598+
547599
def copy(self, deep=True):
548600
"""
549601
Make a copy of the SparseArray. Only the actual sparse values need to

pandas/core/sparse/frame.py

Lines changed: 2 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -333,8 +333,8 @@ def _apply_columns(self, func):
333333
default_fill_value=self.default_fill_value,
334334
default_kind=self.default_kind).__finalize__(self)
335335

336-
def astype(self, dtype):
337-
return self._apply_columns(lambda x: x.astype(dtype))
336+
def astype(self, dtype, **kwargs):
337+
return self._apply_columns(lambda x: x.astype(dtype, **kwargs))
338338

339339
def copy(self, deep=True):
340340
"""
@@ -465,44 +465,6 @@ def _get_value(self, index, col, takeable=False):
465465
return series._get_value(index, takeable=takeable)
466466
_get_value.__doc__ = get_value.__doc__
467467

468-
def set_value(self, index, col, value, takeable=False):
469-
"""
470-
Put single value at passed column and index
471-
472-
.. deprecated:: 0.21.0
473-
474-
Please use .at[] or .iat[] accessors.
475-
476-
Parameters
477-
----------
478-
index : row label
479-
col : column label
480-
value : scalar value
481-
takeable : interpret the index/col as indexers, default False
482-
483-
Notes
484-
-----
485-
This method *always* returns a new object. It is currently not
486-
particularly efficient (and potentially very expensive) but is provided
487-
for API compatibility with DataFrame
488-
489-
Returns
490-
-------
491-
frame : DataFrame
492-
"""
493-
warnings.warn("set_value is deprecated and will be removed "
494-
"in a future release. Please use "
495-
".at[] or .iat[] accessors instead", FutureWarning,
496-
stacklevel=2)
497-
return self._set_value(index, col, value, takeable=takeable)
498-
499-
def _set_value(self, index, col, value, takeable=False):
500-
dense = self.to_dense()._set_value(
501-
index, col, value, takeable=takeable)
502-
return dense.to_sparse(kind=self._default_kind,
503-
fill_value=self._default_fill_value)
504-
_set_value.__doc__ = set_value.__doc__
505-
506468
def _slice(self, slobj, axis=0, kind=None):
507469
if axis == 0:
508470
new_index = self.index[slobj]

pandas/core/sparse/series.py

Lines changed: 15 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import pandas.core.common as com
1919
import pandas.core.indexes.base as ibase
2020
import pandas.core.ops as ops
21-
import pandas._libs.index as libindex
2221
from pandas.util._decorators import Appender
2322

2423
from pandas.core.sparse.array import (
@@ -278,8 +277,13 @@ def __array_wrap__(self, result, context=None):
278277
else:
279278
fill_value = self.fill_value
280279

280+
# Assume: If result size matches, old sparse index is valid (ok???)
281+
if np.size(result) == self.sp_index.npoints:
282+
sp_index = self.sp_index
283+
else:
284+
sp_index = None
281285
return self._constructor(result, index=self.index,
282-
sparse_index=self.sp_index,
286+
sparse_index=sp_index,
283287
fill_value=fill_value,
284288
copy=False).__finalize__(self)
285289

@@ -480,7 +484,7 @@ def set_value(self, label, value, takeable=False):
480484
481485
Returns
482486
-------
483-
series : SparseSeries
487+
self : SparseSeries
484488
"""
485489
warnings.warn("set_value is deprecated and will be removed "
486490
"in a future release. Please use "
@@ -489,35 +493,16 @@ def set_value(self, label, value, takeable=False):
489493
return self._set_value(label, value, takeable=takeable)
490494

491495
def _set_value(self, label, value, takeable=False):
492-
values = self.to_dense()
493-
494-
# if the label doesn't exist, we will create a new object here
495-
# and possibly change the index
496-
new_values = values._set_value(label, value, takeable=takeable)
497-
if new_values is not None:
498-
values = new_values
499-
new_index = values.index
500-
values = SparseArray(values, fill_value=self.fill_value,
501-
kind=self.kind)
502-
self._data = SingleBlockManager(values, new_index)
503-
self._index = new_index
496+
self._data = self._data.copy()
497+
try:
498+
idx = self.index.get_loc(label)
499+
except KeyError:
500+
idx = len(self)
501+
self._data.axes[0] = self._data.index.append(Index([label]))
502+
self._data = self._data.setitem(indexer=idx, value=value)
503+
return self
504504
_set_value.__doc__ = set_value.__doc__
505505

506-
def _set_values(self, key, value):
507-
508-
# this might be inefficient as we have to recreate the sparse array
509-
# rather than setting individual elements, but have to convert
510-
# the passed slice/boolean that's in dense space into a sparse indexer
511-
# not sure how to do that!
512-
if isinstance(key, Series):
513-
key = key.values
514-
515-
values = self.values.to_dense()
516-
values[key] = libindex.convert_scalar(values, value)
517-
values = SparseArray(values, fill_value=self.fill_value,
518-
kind=self.kind)
519-
self._data = SingleBlockManager(values, self.index)
520-
521506
def to_dense(self, sparse_only=False):
522507
"""
523508
Convert SparseSeries to a Series.

0 commit comments

Comments
 (0)