From 9fe657ac17ec201d965b66d3cf29c927a40cda29 Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 6 Apr 2025 12:57:17 +0200 Subject: [PATCH 1/2] ENH: Add dropna parameter to Series.unique() (fixes #61209) --- pandas/core/base.py | 21 ++++++++++-- pandas/core/series.py | 75 ++++++++++--------------------------------- 2 files changed, 36 insertions(+), 60 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 6cc28d4e46634..44befeb3d2e84 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -16,6 +16,8 @@ ) import numpy as np +from typing import Any +from pandas._typing import ArrayLike from pandas._libs import lib from pandas._typing import ( @@ -1096,13 +1098,28 @@ def value_counts( dropna=dropna, ) - def unique(self): + def unique(self, dropna: bool = True) -> ArrayLike: + """ + Return unique values in the object. + + Parameters + ---------- + dropna : bool, default True + If True, exclude NA/null values. + + Returns + ------- + ndarray or ExtensionArray + """ values = self._values if not isinstance(values, np.ndarray): - # i.e. ExtensionArray + # For ExtensionArray result = values.unique() else: result = algorithms.unique1d(values) + + if dropna: + result = result[~isna(result)] return result @final diff --git a/pandas/core/series.py b/pandas/core/series.py index 03a2ce85a08c9..891747569d3d3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2084,72 +2084,31 @@ def mode(self, dropna: bool = True) -> Series: dtype=self.dtype, ).__finalize__(self, method="mode") - def unique(self) -> ArrayLike: + def unique(self, dropna: bool = True) -> ArrayLike: """ Return unique values of Series object. - - Uniques are returned in order of appearance. Hash table-based unique, - therefore does NOT sort. - + + Parameters + ---------- + dropna : bool, default True + If True, exclude NA/null values. + Returns ------- ndarray or ExtensionArray - The unique values returned as a NumPy array. See Notes. - - See Also - -------- - Series.drop_duplicates : Return Series with duplicate values removed. - unique : Top-level unique method for any 1-d array-like object. - Index.unique : Return Index with unique values from an Index object. - - Notes - ----- - Returns the unique values as a NumPy array. In case of an - extension-array backed Series, a new - :class:`~api.extensions.ExtensionArray` of that type with just - the unique values is returned. This includes - - * Categorical - * Period - * Datetime with Timezone - * Datetime without Timezone - * Timedelta - * Interval - * Sparse - * IntegerNA - - See Examples section. - + The unique values returned as a NumPy array or ExtensionArray. + Examples -------- - >>> pd.Series([2, 1, 3, 3], name="A").unique() - array([2, 1, 3]) - - >>> pd.Series([pd.Timestamp("2016-01-01") for _ in range(3)]).unique() - - ['2016-01-01 00:00:00'] - Length: 1, dtype: datetime64[s] - - >>> pd.Series( - ... [pd.Timestamp("2016-01-01", tz="US/Eastern") for _ in range(3)] - ... ).unique() - - ['2016-01-01 00:00:00-05:00'] - Length: 1, dtype: datetime64[s, US/Eastern] - - An Categorical will return categories in the order of - appearance and with the same dtype. - - >>> pd.Series(pd.Categorical(list("baabc"))).unique() - ['b', 'a', 'c'] - Categories (3, object): ['a', 'b', 'c'] - >>> pd.Series( - ... pd.Categorical(list("baabc"), categories=list("abc"), ordered=True) - ... ).unique() - ['b', 'a', 'c'] - Categories (3, object): ['a' < 'b' < 'c'] + >>> s = pd.Series([1, 2, 2, pd.NA]) + >>> s.unique() + array([1, 2]) + + >>> s.unique(dropna=False) + array([1, 2, ], dtype=object) """ - return super().unique() + return super().unique(dropna=dropna) + @overload def drop_duplicates( From 6a5df71fabd226faae1e456ed3b178dedf8359b6 Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 6 Apr 2025 12:57:29 +0200 Subject: [PATCH 2/2] TST: Add tests for Series.unique(dropna) (#61209) --- pandas/tests/series/test_arithmetic.py | 38 ++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index e7d284bd47e21..571db4b099d62 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -958,3 +958,41 @@ def test_rmod_consistent_large_series(): expected = Series([1] * 10001) tm.assert_series_equal(result, expected) + + +from pandas._testing import assert_numpy_array_equal, assert_extension_array_equal + +# Test Case 1: Basic numeric unique with NA (dropna=False) +def test_unique_numeric_dropna_false(): + s = pd.Series([1, 2, 2, pd.NA, 3, pd.NA]) + result = s.unique(dropna=False) + expected = np.array([1, 2, pd.NA, 3], dtype=object) + assert_numpy_array_equal(result, expected) + +# Test Case 2: Empty Series +def test_unique_empty_series(): + s = pd.Series([], dtype='float64') + result = s.unique() + expected = np.array([], dtype='float64') + assert_numpy_array_equal(result, expected) + +# Test Case 3: Categorical data +def test_unique_categorical(): + s = pd.Series(pd.Categorical(['a', 'b', 'a', pd.NA])) + result = s.unique(dropna=False) + expected = pd.Categorical(['a', 'b', pd.NA]) + assert_extension_array_equal(result, expected) + +# Test Case 4: NA values +def test_unique_with_nas_simple(): + s = pd.Series([1, 2, 2, pd.NA, 3, pd.NA], dtype='Int64') + + # Current behavior (returns ExtensionArray) + result = s.unique() + expected = pd.array([1, 2, 3], dtype='Int64') + tm.assert_extension_array_equal(result, expected) + + # With dropna=False + result_with_na = s.unique(dropna=False) + expected_with_na = pd.array([1, 2, pd.NA, 3], dtype='Int64') + tm.assert_extension_array_equal(result_with_na, expected_with_na) \ No newline at end of file