Skip to content

BUG: 2D ndarray of dtype 'object' is always copied upon construction #39272

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Jul 15, 2021
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ Categorical
Datetimelike
^^^^^^^^^^^^
- Bug in :func:`to_datetime` returning pd.NaT for inputs that produce duplicated values, when ``cache=True`` (:issue:`42259`)
- Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`)
-

Timedelta
Expand Down
44 changes: 16 additions & 28 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,11 @@
ArrayManager,
SingleArrayManager,
)
from pandas.core.internals.blocks import (
ensure_block_shape,
new_block,
)
from pandas.core.internals.managers import (
BlockManager,
SingleBlockManager,
create_block_manager_from_array,
create_block_manager_from_arrays,
create_block_manager_from_blocks,
)

if TYPE_CHECKING:
Expand Down Expand Up @@ -342,37 +338,29 @@ def ndarray_to_mgr(

return ArrayManager(arrays, [index, columns], verify_integrity=False)

values = values.T
array = values.T

# if we don't have a dtype specified, then try to convert objects
# on the entire block; this is to convert if we have datetimelike's
# embedded in an object type
if dtype is None and is_object_dtype(values.dtype):

if values.ndim == 2 and values.shape[0] != 1:
if dtype is None and is_object_dtype(array.dtype):
if array.ndim == 2 and array.shape[0] != 1:
# transpose and separate blocks

dtlike_vals = [maybe_infer_to_datetimelike(row) for row in values]
dvals_list = [ensure_block_shape(dval, 2) for dval in dtlike_vals]

# TODO: What about re-joining object columns?
block_values = [
new_block(dvals_list[n], placement=n, ndim=2)
for n in range(len(dvals_list))
maybe_datetime = [
maybe_infer_to_datetimelike(instance) for instance in array
]

# don't convert (and copy) the objects if no type inference occurs
if any(
not is_dtype_equal(instance.dtype, array.dtype)
for instance in maybe_datetime
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

double-check me on this, but i think a more performant alternative would be

obj_columns = [x for x in array]
maybe_datetime = [maybe_infer_to_datetimelike(x) for x in obj_columns]
if any(x is not y for x, y in zip(obj_columns, maybe_datetime):
    [...]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm running asv to see if there's a performance difference

Copy link
Contributor Author

@irgolic irgolic Jul 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's the full asv run comparing the last commit (your suggestion) and the second to last commit. Any of these in particular you'd like me to rerun?

       before           after         ratio                                                                                                                  
     [dc2ae20d]       [96dd1b9e]                                                                                                                     
     <2d-object-dont-copy>       <2d-object-dont-copy~1>                                                                                                     
+     4.43±0.01ms       94.2±0.3ms    21.27  indexing.NumericSeriesIndexing.time_getitem_list_like(<class 'pandas.core.indexes.numeric.Float64Index'>, 'uniqu
e_monotonic_inc')                                                                                                                                            
+        59.0±3ms       75.8±0.4ms     1.28  inference.ToTimedeltaErrors.time_convert('coerce')                                                              
+        45.8±5ms         58.5±5ms     1.28  gil.ParallelGroupbyMethods.time_loop(2, 'last')                                                                 
+      4.37±0.2ms       5.34±0.3ms     1.22  rolling.Engine.time_rolling_apply('Series', 'float', <function sum>, 'cython', 'max')                           
+      5.31±0.1ms       6.45±0.5ms     1.21  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function Engine.<lambda>>, 'cython', 'sum')            
+      4.46±0.1ms       5.40±0.3ms     1.21  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function sum>, 'cython', 'mean')                       
+      5.38±0.2ms       6.51±0.6ms     1.21  rolling.Engine.time_rolling_apply('DataFrame', 'int', <function Engine.<lambda>>, 'cython', 'max')              
+         147±9ms          178±1ms     1.21  inference.ToDatetimeISO8601.time_iso8601_tz_spaceformat                                                         
+        72.1±5ms         86.7±1ms     1.20  inference.ToDatetimeCache.time_dup_string_tzoffset_dates(False)                                                 
+     4.57±0.09ms       5.47±0.3ms     1.20  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function sum>, 'cython', 'max')                        
+      5.23±0.2ms       6.23±0.4ms     1.19  rolling.Engine.time_rolling_apply('Series', 'float', <function Engine.<lambda>>, 'cython', 'mean')              
+      5.40±0.1ms       6.42±0.5ms     1.19  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function Engine.<lambda>>, 'cython', 'median')         
+      4.29±0.1ms       5.10±0.3ms     1.19  rolling.Engine.time_rolling_apply('Series', 'int', <function sum>, 'cython', 'mean')                            
+      4.52±0.2ms       5.37±0.2ms     1.19  rolling.Engine.time_rolling_apply('DataFrame', 'int', <function sum>, 'cython', 'median')                       
+      4.45±0.2ms       5.28±0.4ms     1.19  rolling.Engine.time_rolling_apply('Series', 'float', <function sum>, 'cython', 'mean')                          
+      15.4±0.6μs       18.2±0.3μs     1.19  series_methods.NanOps.time_func('sum', 1000, 'boolean')                                                         
+      4.39±0.1ms       5.20±0.2ms     1.19  rolling.Engine.time_rolling_apply('Series', 'int', <function sum>, 'cython', 'max')                             
+      5.25±0.2ms       6.23±0.4ms     1.18  rolling.Engine.time_rolling_apply('Series', 'int', <function Engine.<lambda>>, 'cython', 'median')              
+      5.20±0.2ms       6.16±0.3ms     1.18  rolling.Engine.time_rolling_apply('Series', 'float', <function Engine.<lambda>>, 'cython', 'sum')               
+      4.37±0.2ms       5.17±0.3ms     1.18  rolling.Engine.time_rolling_apply('Series', 'int', <function sum>, 'cython', 'median')                          
+     4.53±0.04ms       5.35±0.3ms     1.18  rolling.Engine.time_rolling_apply('DataFrame', 'int', <function sum>, 'cython', 'min')                          
+      4.35±0.1ms       5.13±0.3ms     1.18  rolling.Engine.time_rolling_apply('Series', 'int', <function sum>, 'cython', 'sum')                             
+      4.40±0.1ms       5.18±0.3ms     1.18  rolling.Engine.time_rolling_apply('Series', 'float', <function sum>, 'cython', 'median')                        
+      5.25±0.2ms       6.17±0.2ms     1.17  rolling.Engine.time_rolling_apply('Series', 'float', <function Engine.<lambda>>, 'cython', 'min')               
+      5.39±0.2ms       6.32±0.4ms     1.17  rolling.Engine.time_rolling_apply('DataFrame', 'int', <function Engine.<lambda>>, 'cython', 'min')              
+        58.8±2ms         68.9±7ms     1.17  inference.ToTimedeltaErrors.time_convert('ignore')                                                              
+      4.42±0.2ms       5.17±0.2ms     1.17  rolling.Engine.time_rolling_apply('Series', 'float', <function sum>, 'cython', 'min')                           
+      5.33±0.1ms       6.23±0.2ms     1.17  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function Engine.<lambda>>, 'cython', 'min')            
+      5.38±0.3ms       6.27±0.4ms     1.17  rolling.Engine.time_rolling_apply('Series', 'int', <function Engine.<lambda>>, 'cython', 'max')                 
+      4.73±0.3ms       5.51±0.4ms     1.17  rolling.Engine.time_expanding_apply('Series', 'int', <function sum>, 'cython', 'max')                           
+      5.20±0.1ms       6.06±0.3ms     1.16  rolling.Engine.time_rolling_apply('Series', 'int', <function Engine.<lambda>>, 'cython', 'min')                 
+      4.60±0.2ms       5.36±0.3ms     1.16  rolling.Engine.time_rolling_apply('DataFrame', 'int', <function sum>, 'cython', 'max')                          
+      13.2±0.4μs       15.3±0.3μs     1.16  series_methods.NanOps.time_func('max', 1000, 'boolean')                                                         
+     4.40±0.06ms       5.11±0.4ms     1.16  rolling.Engine.time_rolling_apply('Series', 'float', <function sum>, 'cython', 'sum')                           
+      4.57±0.1ms       5.29±0.3ms     1.16  rolling.Engine.time_rolling_apply('DataFrame', 'int', <function sum>, 'cython', 'sum')                          
+     4.67±0.09ms       5.41±0.2ms     1.16  rolling.Engine.time_rolling_apply('DataFrame', 'int', <function sum>, 'cython', 'mean')                         
+      5.39±0.1ms       6.23±0.5ms     1.16  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function Engine.<lambda>>, 'cython', 'mean')           
+      5.21±0.1ms       6.02±0.4ms     1.16  rolling.Engine.time_rolling_apply('Series', 'float', <function Engine.<lambda>>, 'cython', 'max')               
+     4.59±0.03ms       5.29±0.2ms     1.15  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function sum>, 'cython', 'median')                     
+      5.31±0.1ms       6.12±0.3ms     1.15  rolling.Engine.time_rolling_apply('DataFrame', 'int', <function Engine.<lambda>>, 'cython', 'mean')             
+     4.55±0.03ms       5.24±0.3ms     1.15  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function sum>, 'cython', 'sum')                        
+      5.37±0.1ms       6.19±0.4ms     1.15  rolling.Engine.time_rolling_apply('DataFrame', 'int', <function Engine.<lambda>>, 'cython', 'sum')              
+      16.4±0.7μs         18.7±1μs     1.14  series_methods.SearchSorted.time_searchsorted('uint8')                                                          
+      5.26±0.1ms       5.99±0.3ms     1.14  rolling.Engine.time_rolling_apply('Series', 'float', <function Engine.<lambda>>, 'cython', 'median')            
+      4.64±0.1ms       5.29±0.3ms     1.14  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function sum>, 'cython', 'min')                        
+     1.17±0.01ms      1.33±0.07ms     1.14  hash_functions.NumericSeriesIndexingShuffled.time_loc_slice(<class 'pandas.core.indexes.numeric.UInt64Index'>, 5
00000)                                                                                                                                   
+     2.58±0.03μs      2.93±0.08μs     1.13  indexing_engines.NumericEngineIndexing.time_get_loc((<class 'pandas._libs.index.Float64Engine'>, <class 'numpy.f
loat64'>), 'monotonic_incr')                                                                                                                                 
+      5.21±0.2ms       5.91±0.3ms     1.13  rolling.Engine.time_rolling_apply('Series', 'int', <function Engine.<lambda>>, 'cython', 'mean')                
+        4.28±0ms       4.84±0.5ms     1.13  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('time', 10000, datetime.timezone(datetime.timedelta(se
conds=3600)))                                                                                                                                                
+      4.51±0.2ms       5.10±0.2ms     1.13  rolling.Engine.time_rolling_apply('Series', 'int', <function sum>, 'cython', 'min')                             
+      5.50±0.2ms       6.21±0.2ms     1.13  rolling.Engine.time_rolling_apply('DataFrame', 'float', <function Engine.<lambda>>, 'cython', 'max')            
+      16.5±0.4μs         18.6±1μs     1.13  series_methods.SearchSorted.time_searchsorted('int16')                                                          
+     2.57±0.01μs       2.89±0.2μs     1.13  indexing_engines.NumericEngineIndexing.time_get_loc((<class 'pandas._libs.index.Int64Engine'>, <class 'numpy.int
64'>), 'monotonic_incr')                                                                                                                                     
+       452±0.8ms         508±50ms     1.12  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('time', 1000000, datetime.timezone(datetime.timedelta(
seconds=3600)))                                                                                                                                              
+        806±20μs         904±40μs     1.12  arithmetic.IntFrameWithScalar.time_frame_op_with_scalar(<class 'numpy.float64'>, 5.0, <built-in function ne>)   
+     3.20±0.04μs       3.59±0.2μs     1.12  tslibs.fields.TimeGetStartEndField.time_get_start_end_field(0, 'start', 'month', 'QS', 3)                       
+      13.2±0.5μs       14.8±0.6μs     1.12  series_methods.NanOps.time_func('min', 1000, 'boolean')                                                         
+         121±5μs          136±4μs     1.12  multiindex_object.GetLoc.time_large_get_loc                                                                     
+      47.6±0.2ms         53.0±1ms     1.11  rolling.TableMethod.time_apply('single')                                                                        
+     4.17±0.02μs       4.64±0.2μs     1.11  categoricals.CategoricalSlicing.time_getitem_scalar('non_monotonic')                                            
+      15.9±0.3ms       17.7±0.2ms     1.11  inference.ToTimedelta.time_convert_string_days                                                                  
+        741±20μs         824±20μs     1.11  arithmetic.IntFrameWithScalar.time_frame_op_with_scalar(<class 'numpy.int64'>, 2, <built-in function eq>)       
+        732±20μs         813±20μs     1.11  arithmetic.IntFrameWithScalar.time_frame_op_with_scalar(<class 'numpy.float64'>, 5.0, <built-in function le>)   
+     3.32±0.03μs       3.67±0.2μs     1.11  tslibs.fields.TimeGetStartEndField.time_get_start_end_field(0, 'start', 'year', 'B', 5)                         
+     3.44±0.02μs       3.81±0.3μs     1.11  tslibs.fields.TimeGetStartEndField.time_get_start_end_field(1, 'start', 'year', 'B', 5)                         
+       113±0.7μs          125±1μs     1.11  indexing.NumericSeriesIndexing.time_loc_slice(<class 'pandas.core.indexes.numeric.UInt64Index'>, 'unique_monoton
ic_inc')                                                                                                                                                     
+     4.16±0.02μs       4.60±0.4μs     1.10  categoricals.CategoricalSlicing.time_getitem_scalar('monotonic_incr')                                           
+      13.0±0.2μs       14.3±0.2μs     1.10  series_methods.NanOps.time_func('sum', 1000, 'Int64')                                                           
+        60.3±1ms         66.6±1ms     1.10  rolling.Groupby.time_rolling_int('mean')                                                                        
+      14.4±0.1μs       15.9±0.5μs     1.10  series_methods.NanOps.time_func('min', 1000, 'Int64')                                                           
+        60.7±1ms         67.0±1ms     1.10  rolling.Groupby.time_rolling_int('max')                                                                         
+        61.5±1ms         67.9±1ms     1.10  rolling.Groupby.time_rolling_int('median')                                                                      
+        544±20ns         600±30ns     1.10  index_cached_properties.IndexCache.time_inferred_type('Int64Index')                                             
+     15.8±0.04μs       17.4±0.8μs     1.10  inference.MaybeConvertObjects.time_maybe_convert_objects                                                        
+       437±0.7ms         482±40ms     1.10  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('time', 1000000, None)                                
+        60.5±2ms         66.6±1ms     1.10  rolling.Groupby.time_rolling_int('sum' (0))                                                                     
+     3.36±0.01μs       3.70±0.2μs     1.10  tslibs.fields.TimeGetStartEndField.time_get_start_end_field(0, 'end', 'year', 'B', 12)                          
+     3.24±0.04μs       3.57±0.2μs     1.10  tslibs.fields.TimeGetStartEndField.time_get_start_end_field(0, 'start', 'quarter', 'QS', 5)                     
+         142±2μs          157±1μs     1.10  indexing.NumericSeriesIndexing.time_loc_scalar(<class 'pandas.core.indexes.numeric.Float64Index'>, 'nonunique_mo
notonic_inc')                                                                                                                                                
+      41.1±0.3ms         45.2±1ms     1.10  frame_methods.Isnull.time_isnull_obj                                                                            
+     3.47±0.02μs       3.82±0.2μs     1.10  tslibs.fields.TimeGetStartEndField.time_get_start_end_field(1, 'end', 'quarter', 'QS', 12)                      
-      8.68±0.5μs      7.89±0.02μs     0.91  tslibs.resolution.TimeResolution.time_get_resolution('h', 0, datetime.timezone(datetime.timedelta(seconds=3600))
)                                                                                                                                                            
-      8.79±0.6μs      7.98±0.04μs     0.91  tslibs.resolution.TimeResolution.time_get_resolution('D', 1, datetime.timezone(datetime.timedelta(seconds=3600))
)                                                                                                                                                            
-        18.0±1μs      16.3±0.05μs     0.91  tslibs.resolution.TimeResolution.time_get_resolution('ns', 1, tzlocal())                                        
-      9.93±0.6μs      9.01±0.02μs     0.91  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(0, 4000, datetime.timezone(datetime.timedelta(sec
onds=3600)))                                                                                                                                                 
-      1.10±0.09s          998±5ms     0.91  arithmetic.OffsetArrayArithmetic.time_add_series_offset(<CustomBusinessMonthEnd> (0))
-      2.71±0.2μs         2.46±0μs     0.91  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('date', 0, None)
-      9.98±0.7μs      9.05±0.03μs     0.91  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(0, 2000, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-      2.75±0.2μs      2.50±0.02μs     0.91  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('time', 0, tzlocal())
-      2.72±0.2μs         2.46±0μs     0.91  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('timestamp', 0, None)
-      9.98±0.5μs      9.03±0.09μs     0.91  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(1, 5000, datetime.timezone(datetime.timedelta(sec
onds=3600)))

-      9.11±0.3μs       8.24±0.3μs     0.90  tslibs.timestamp.TimestampProperties.time_weekday_name(tzlocal(), None)
-      9.91±0.6μs       8.96±0.1μs     0.90  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(0, 12000, datetime.timezone(datetime.timedelta(se
conds=3600)))
-      10.0±0.6μs      9.06±0.04μs     0.90  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(1, 3000, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-      10.1±0.8μs      9.09±0.03μs     0.90  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(0, 8000, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-      9.94±0.7μs       8.99±0.1μs     0.90  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(1, 10000, datetime.timezone(datetime.timedelta(se
conds=3600)))
-        19.8±1μs      17.9±0.05μs     0.90  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(100, 2000, <DstTzInfo 'US/Pacific' LMT-1 day, 16:
07:00 STD>)
-      1.11±0.09s          998±3ms     0.90  arithmetic.OffsetArrayArithmetic.time_add_dti_offset(<CustomBusinessMonthEnd> (0))
-      9.91±0.6μs      8.95±0.08μs     0.90  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(0, 7000, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-      9.99±0.5μs      9.01±0.09μs     0.90  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(1, 1000, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-      2.58±0.2μs      2.32±0.01μs     0.90  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('time', 0, datetime.timezone.utc)
-      9.97±0.7μs      8.96±0.06μs     0.90  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(1, 11000, datetime.timezone(datetime.timedelta(se
conds=3600)))
-      29.3±0.3ms       26.2±0.5ms     0.90  rolling.Apply.time_rolling('Series', 3, 'int', <built-in function sum>, False)
-      10.1±0.7μs      9.03±0.06μs     0.89  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(0, 9000, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-      2.60±0.2μs      2.32±0.01μs     0.89  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('timestamp', 0, datetime.timezone.utc)
-      10.0±0.7μs      8.98±0.04μs     0.89  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(1, 6000, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-      8.65±0.2μs       7.73±0.1μs     0.89  tslibs.period.TimePeriodArrToDT64Arr.time_periodarray_to_dt64arr(1, 7000)
-      9.99±0.7μs      8.93±0.05μs     0.89  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(0, 2011, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-        26.7±2μs       23.8±0.2μs     0.89  tslibs.timestamp.TimestampOps.time_normalize(tzlocal())
-      2.77±0.2μs      2.47±0.02μs     0.89  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('time', 0, None)
-      6.29±0.6μs      5.60±0.08μs     0.89  tslibs.timedelta.TimedeltaConstructor.time_from_unit
-      11.6±0.8μs       10.3±0.1μs     0.89  tslibs.timestamp.TimestampOps.time_replace_None(datetime.timezone(datetime.timedelta(seconds=3600)))
-      10.1±0.6μs      8.95±0.06μs     0.89  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(0, 10000, datetime.timezone(datetime.timedelta(se
conds=3600)))
-      5.64±0.3μs      5.00±0.04μs     0.89  tslibs.timestamp.TimestampOps.time_normalize(None)
-        58.8±3μs       51.9±0.6μs     0.88  tslibs.timestamp.TimestampOps.time_normalize(tzfile('/usr/share/zoneinfo/Asia/Tokyo'))
-      3.19±0.3μs      2.80±0.02μs     0.88  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('date', 1, None)
-        9.72±1μs      8.54±0.05μs     0.88  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('timestamp', 0, datetime.timezone(datetime.timedelta(s
econds=3600)))
-      2.85±0.2μs      2.50±0.01μs     0.88  tslibs.tslib.TimeIntsToPydatetime.time_ints_to_pydatetime('timestamp', 0, tzlocal())
-     1.27±0.03ms      1.11±0.06ms     0.87  arithmetic.IntFrameWithScalar.time_frame_op_with_scalar(<class 'numpy.float64'>, 4, <built-in function mul>)
-        15.5±2μs       13.6±0.2μs     0.87  tslibs.timedelta.TimedeltaConstructor.time_from_components
-        8.74±1μs      7.57±0.05μs     0.87  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(1, 6000, <DstTzInfo 'US/Pacific' LMT-1 day, 16:07
:00 STD>)
-      8.05±0.8μs      6.92±0.03μs     0.86  tslibs.timestamp.TimestampOps.time_replace_None(<DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>)
-     1.59±0.01ms      1.36±0.01ms     0.85  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(100, 4000, tzlocal())
-      6.61±0.6μs      5.65±0.08μs     0.85  tslibs.timedelta.TimedeltaConstructor.time_from_string
-        24.1±2μs       20.5±0.1μs     0.85  tslibs.timestamp.TimestampOps.time_normalize(datetime.timezone(datetime.timedelta(seconds=3600)))
-     11.7±0.05ms      9.84±0.06ms     0.84  indexing.NumericSeriesIndexing.time_loc_scalar(<class 'pandas.core.indexes.numeric.UInt64Index'>, 'nonunique_mon
otonic_inc')
-      1.66±0.1ms      1.39±0.07ms     0.84  series_methods.NanOps.time_func('prod', 1000000, 'int8')
-      10.8±0.2μs      9.05±0.04μs     0.84  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(0, 4006, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-      10.7±0.3μs       8.95±0.2μs     0.83  tslibs.period.TimeDT64ArrToPeriodArr.time_dt64arr_to_periodarr(1, 1011, datetime.timezone(datetime.timedelta(sec
onds=3600)))
-      1.76±0.2ms       1.46±0.3ms     0.83  index_cached_properties.IndexCache.time_engine('MultiIndex')
-      57.4±0.4ms      34.8±0.07ms     0.61  algos.isin.IsinWithArange.time_isin(<class 'numpy.object_'>, 8000, 0)
-        93.1±1ms          400±3μs     0.00  hash_functions.NumericSeriesIndexing.time_loc_slice(<class 'pandas.core.indexes.numeric.Float64Index'>, 1000000)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reran time_getitem_list_like, now it swung in the other direction, seems like it's just doing whatever it feels like.

       before           after         ratio
     [dc2ae20d]       [96dd1b9e]
     <2d-object-dont-copy>       <2d-object-dont-copy~1>
-     59.7±0.08ms      3.17±0.02ms     0.05  indexing.NumericSeriesIndexing.time_getitem_list_like(<class 'pandas.core.indexes.numeric.UInt64Index'>, 'unique_monotonic_inc')

):
return create_block_manager_from_arrays(
maybe_datetime, columns, [columns, index]
)
else:
datelike_vals = maybe_infer_to_datetimelike(values)
nb = new_block(datelike_vals, placement=slice(len(columns)), ndim=2)
block_values = [nb]
else:
nb = new_block(values, placement=slice(len(columns)), ndim=2)
block_values = [nb]

if len(columns) == 0:
block_values = []
array = maybe_infer_to_datetimelike(array)

return create_block_manager_from_blocks(block_values, [columns, index])
return create_block_manager_from_array(array, [columns, index])


def _check_values_indices_shape_match(
Expand Down
20 changes: 20 additions & 0 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1791,6 +1791,26 @@ def create_block_manager_from_arrays(
return mgr


def create_block_manager_from_array(
array,
axes: list[Index],
consolidate: bool = True,
) -> BlockManager:
assert isinstance(axes, list)
assert all(isinstance(x, Index) for x in axes)

array = _extract_array(array)

try:
block = new_block(values=array, placement=slice(0, len(axes[0])), ndim=2)
mgr = BlockManager([block], axes)
except ValueError as e:
raise construction_error(array.shape[0], array.shape[1:], axes, e)
if consolidate:
mgr._consolidate_inplace()
return mgr


def construction_error(
tot_items: int,
block_shape: Shape,
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,20 @@ def test_constructor_dtype_nocast_view_2d_array(self):
should_be_view[0][0] = 97
assert df.values[0, 0] == 97

@td.skip_array_manager_invalid_test
def test_1d_object_array_does_not_copy(self):
# https://github.com/pandas-dev/pandas/issues/39272
arr = np.array(["a", "b"], dtype="object")
df = DataFrame(arr)
assert np.shares_memory(df.values, arr)

@td.skip_array_manager_invalid_test
def test_2d_object_array_does_not_copy(self):
# https://github.com/pandas-dev/pandas/issues/39272
arr = np.array([["a", "b"], ["c", "d"]], dtype="object")
df = DataFrame(arr)
assert np.shares_memory(df.values, arr)

def test_constructor_dtype_list_data(self):
df = DataFrame([[1, "2"], [None, "a"]], dtype=object)
assert df.loc[1, 0] is None
Expand Down