Skip to content

Commit e93ee07

Browse files
topper-123Terji Petersenmroeschke
authored
BUG/API: Indexes on empty frames/series should be RangeIndex (#49637)
* BUG/API: ndexes on empty frames/series should be RangeIndex, are Index[object] * fix black * fix window stuff * Add docs * double ticks * unneeded line * update thatsnew text * update whatsnew text * fix rst * Update doc/source/whatsnew/v2.0.0.rst Co-authored-by: Matthew Roeschke <[email protected]> Co-authored-by: Terji Petersen <[email protected]> Co-authored-by: Matthew Roeschke <[email protected]>
1 parent afca9f8 commit e93ee07

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+158
-109
lines changed

doc/source/whatsnew/v2.0.0.rst

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,35 @@ The new behavior, as for datetime64, either gives exactly the requested dtype or
312312
ser.astype("timedelta64[s]")
313313
ser.astype("timedelta64[D]")
314314
315+
.. _whatsnew_200.api_breaking.zero_len_indexes:
316+
317+
Empty DataFrames/Series will now default to have a ``RangeIndex``
318+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
319+
320+
Before, constructing an empty (where ``data`` is ``None`` or an empty list-like argument) :class:`Series` or :class:`DataFrame` without
321+
specifying the axes (``index=None``, ``columns=None``) would return the axes as empty :class:`Index` with object dtype.
322+
323+
Now, the axes return an empty :class:`RangeIndex`.
324+
325+
*Previous behavior*:
326+
327+
.. code-block:: ipython
328+
329+
In [8]: pd.Series().index
330+
Out[8]:
331+
Index([], dtype='object')
332+
333+
In [9] pd.DataFrame().axes
334+
Out[9]:
335+
[Index([], dtype='object'), Index([], dtype='object')]
336+
337+
*New behavior*:
338+
339+
.. ipython:: python
340+
341+
pd.Series().index
342+
pd.DataFrame().axes
343+
315344
.. _whatsnew_200.api_breaking.deps:
316345

317346
Increased minimum versions for dependencies
@@ -373,6 +402,7 @@ Other API changes
373402
- Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`)
374403
- Changed behavior of :meth:`DataFrame.shift` with ``axis=1``, an integer ``fill_value``, and homogeneous datetime-like dtype, this now fills new columns with integer dtypes instead of casting to datetimelike (:issue:`49842`)
375404
- Files are now closed when encountering an exception in :func:`read_json` (:issue:`49921`)
405+
- Changed behavior of :func:`read_csv`, :func:`read_json` & :func:`read_fwf`, where the index will now always be a :class:`RangeIndex`, when no index is specified. Previously the index would be a :class:`Index` with dtype ``object`` if the new DataFrame/Series has length 0 (:issue:`49572`)
376406
- :meth:`DataFrame.values`, :meth:`DataFrame.to_numpy`, :meth:`DataFrame.xs`, :meth:`DataFrame.reindex`, :meth:`DataFrame.fillna`, and :meth:`DataFrame.replace` no longer silently consolidate the underlying arrays; do ``df = df.copy()`` to ensure consolidation (:issue:`49356`)
377407
-
378408

pandas/core/frame.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -632,8 +632,6 @@ def __init__(
632632
copy: bool | None = None,
633633
) -> None:
634634

635-
if data is None:
636-
data = {}
637635
if dtype is not None:
638636
dtype = self._validate_dtype(dtype)
639637

@@ -671,6 +669,12 @@ def __init__(
671669
else:
672670
copy = False
673671

672+
if data is None:
673+
index = index if index is not None else default_index(0)
674+
columns = columns if columns is not None else default_index(0)
675+
dtype = dtype if dtype is not None else pandas_dtype(object)
676+
data = []
677+
674678
if isinstance(data, (BlockManager, ArrayManager)):
675679
mgr = self._init_mgr(
676680
data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy
@@ -777,7 +781,7 @@ def __init__(
777781
mgr = dict_to_mgr(
778782
{},
779783
index,
780-
columns,
784+
columns if columns is not None else default_index(0),
781785
dtype=dtype,
782786
typ=manager,
783787
)
@@ -2309,8 +2313,7 @@ def maybe_reorder(
23092313

23102314
result_index = None
23112315
if len(arrays) == 0 and index is None and length == 0:
2312-
# for backward compat use an object Index instead of RangeIndex
2313-
result_index = Index([])
2316+
result_index = default_index(0)
23142317

23152318
arrays, arr_columns = reorder_arrays(arrays, arr_columns, columns, length)
23162319
return arrays, arr_columns, result_index

pandas/core/internals/construction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,7 @@ def _extract_index(data) -> Index:
582582
"""
583583
index: Index
584584
if len(data) == 0:
585-
return Index([])
585+
return default_index(0)
586586

587587
raw_lengths = []
588588
indexes: list[list[Hashable] | Index] = []

pandas/core/reshape/merge.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,8 +1084,8 @@ def _get_join_info(
10841084
else:
10851085
join_index = default_index(len(left_indexer))
10861086

1087-
if len(join_index) == 0:
1088-
join_index = join_index.astype(object)
1087+
if len(join_index) == 0 and not isinstance(join_index, MultiIndex):
1088+
join_index = default_index(0).set_names(join_index.name)
10891089
return join_index, left_indexer, right_indexer
10901090

10911091
def _create_join_index(

pandas/core/series.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -385,11 +385,16 @@ def __init__(
385385
if index is not None:
386386
index = ensure_index(index)
387387

388-
if data is None:
389-
data = {}
390388
if dtype is not None:
391389
dtype = self._validate_dtype(dtype)
392390

391+
if data is None:
392+
index = index if index is not None else default_index(0)
393+
if len(index) or dtype is not None:
394+
data = na_value_for_dtype(pandas_dtype(dtype), compat=False)
395+
else:
396+
data = []
397+
393398
if isinstance(data, MultiIndex):
394399
raise NotImplementedError(
395400
"initializing a Series from a MultiIndex is not supported"

pandas/core/window/common.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ def dataframe_from_int_dict(data, frame_template):
3030
result = DataFrame(data, index=frame_template.index)
3131
if len(result.columns) > 0:
3232
result.columns = frame_template.columns[result.columns]
33+
else:
34+
result.columns = frame_template.columns.copy()
3335
return result
3436

3537
results = {}

pandas/io/parsers/base_parser.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@
8484
from pandas.core.indexes.api import (
8585
Index,
8686
MultiIndex,
87+
default_index,
8788
ensure_index_from_sequences,
8889
)
8990
from pandas.core.series import Series
@@ -1093,8 +1094,9 @@ def _get_empty_meta(
10931094
#
10941095
# Both must be non-null to ensure a successful construction. Otherwise,
10951096
# we have to create a generic empty Index.
1097+
index: Index
10961098
if (index_col is None or index_col is False) or index_names is None:
1097-
index = Index([])
1099+
index = default_index(0)
10981100
else:
10991101
data = [Series([], dtype=dtype_dict[name]) for name in index_names]
11001102
index = ensure_index_from_sequences(data, names=index_names)

pandas/tests/apply/test_frame_apply.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,14 +114,14 @@ def test_apply_with_reduce_empty():
114114
result = empty_frame.apply(x.append, axis=1, result_type="expand")
115115
tm.assert_frame_equal(result, empty_frame)
116116
result = empty_frame.apply(x.append, axis=1, result_type="reduce")
117-
expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64)
117+
expected = Series([], dtype=np.float64)
118118
tm.assert_series_equal(result, expected)
119119

120120
empty_with_cols = DataFrame(columns=["a", "b", "c"])
121121
result = empty_with_cols.apply(x.append, axis=1, result_type="expand")
122122
tm.assert_frame_equal(result, empty_with_cols)
123123
result = empty_with_cols.apply(x.append, axis=1, result_type="reduce")
124-
expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64)
124+
expected = Series([], dtype=np.float64)
125125
tm.assert_series_equal(result, expected)
126126

127127
# Ensure that x.append hasn't been called
@@ -147,7 +147,7 @@ def test_nunique_empty():
147147
tm.assert_series_equal(result, expected)
148148

149149
result = df.T.nunique()
150-
expected = Series([], index=pd.Index([]), dtype=np.float64)
150+
expected = Series([], dtype=np.float64)
151151
tm.assert_series_equal(result, expected)
152152

153153

pandas/tests/apply/test_str.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
from pandas import (
1010
DataFrame,
11-
Index,
1211
Series,
1312
)
1413
import pandas._testing as tm
@@ -149,8 +148,8 @@ def test_agg_cython_table_series(series, func, expected):
149148
tm.get_cython_table_params(
150149
Series(dtype=np.float64),
151150
[
152-
("cumprod", Series([], Index([]), dtype=np.float64)),
153-
("cumsum", Series([], Index([]), dtype=np.float64)),
151+
("cumprod", Series([], dtype=np.float64)),
152+
("cumsum", Series([], dtype=np.float64)),
154153
],
155154
),
156155
tm.get_cython_table_params(

pandas/tests/extension/base/constructors.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ def test_construct_empty_dataframe(self, dtype):
119119
# GH 33623
120120
result = pd.DataFrame(columns=["a"], dtype=dtype)
121121
expected = pd.DataFrame(
122-
{"a": pd.array([], dtype=dtype)}, index=pd.Index([], dtype="object")
122+
{"a": pd.array([], dtype=dtype)}, index=pd.RangeIndex(0)
123123
)
124124
self.assert_frame_equal(result, expected)
125125

0 commit comments

Comments
 (0)