Skip to content

Commit d09fff8

Browse files
committed
Merge pull request pandas-dev#3219 from jreback/GH3216
BUG: GH3216 Upcast when needed to DataFrame when setitem with indexer
2 parents c15b902 + 749d911 commit d09fff8

File tree

7 files changed

+152
-24
lines changed

7 files changed

+152
-24
lines changed

RELEASE.rst

+6-1
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ pandas 0.11.0
122122
- Handle "ragged" CSV files missing trailing delimiters in rows with missing
123123
fields when also providing explicit list of column names (so the parser
124124
knows how many columns to expect in the result) (GH2981_)
125+
- On a mixed DataFrame, allow setting with indexers with ndarray/DataFrame
126+
on rhs (GH3216_)
125127

126128
**API Changes**
127129

@@ -249,9 +251,11 @@ pandas 0.11.0
249251
- Add comparison operators to Period object (GH2781_)
250252
- Fix bug when concatenating two Series into a DataFrame when they have the
251253
same name (GH2797_)
252-
- fix automatic color cycling when plotting consecutive timeseries
254+
- Fix automatic color cycling when plotting consecutive timeseries
253255
without color arguments (GH2816_)
254256
- fixed bug in the pickling of PeriodIndex (GH2891_)
257+
- Upcast/split blocks when needed in a mixed DataFrame when setitem
258+
with an indexer (GH3216_)
255259

256260
.. _GH622: https://github.com/pydata/pandas/issues/622
257261
.. _GH797: https://github.com/pydata/pandas/issues/797
@@ -340,6 +344,7 @@ pandas 0.11.0
340344
.. _GH2751: https://github.com/pydata/pandas/issues/2751
341345
.. _GH2747: https://github.com/pydata/pandas/issues/2747
342346
.. _GH2816: https://github.com/pydata/pandas/issues/2816
347+
.. _GH3216: https://github.com/pydata/pandas/issues/2816
343348

344349
pandas 0.10.1
345350
=============

pandas/core/common.py

+34-1
Original file line numberDiff line numberDiff line change
@@ -694,6 +694,11 @@ def _maybe_promote(dtype, fill_value=np.nan):
694694
if issubclass(fill_value.dtype.type, (np.datetime64,np.timedelta64)):
695695
fill_value = tslib.iNaT
696696
else:
697+
698+
# we need to change to object type as our
699+
# fill_value is of object type
700+
if fill_value.dtype == np.object_:
701+
dtype = np.dtype(np.object_)
697702
fill_value = np.nan
698703

699704
# returns tuple of (dtype, fill_value)
@@ -763,7 +768,7 @@ def changeit():
763768
if change is not None:
764769
change.dtype = r.dtype
765770
change[:] = r
766-
771+
767772
return r, True
768773

769774
# we want to decide whether putmask will work
@@ -792,6 +797,34 @@ def changeit():
792797

793798
return result, False
794799

800+
def _maybe_upcast_indexer(result, indexer, other, dtype=None):
801+
""" a safe version of setitem that (potentially upcasts the result
802+
return the result and a changed flag
803+
"""
804+
805+
def changeit():
806+
# our type is wrong here, need to upcast
807+
r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
808+
try:
809+
r[indexer] = other
810+
except:
811+
812+
# if we hit this then we still have an incompatible type
813+
r[indexer] = fill_value
814+
815+
return r, True
816+
817+
new_dtype, fill_value = _maybe_promote(result.dtype,other)
818+
if new_dtype != result.dtype:
819+
return changeit()
820+
821+
try:
822+
result[indexer] = other
823+
except:
824+
return changeit()
825+
826+
return result, False
827+
795828
def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False):
796829
""" provide explicty type promotion and coercion
797830

pandas/core/indexing.py

+47-17
Original file line numberDiff line numberDiff line change
@@ -119,24 +119,54 @@ def _setitem_with_indexer(self, indexer, value):
119119
plane_indexer = indexer[:het_axis] + indexer[het_axis + 1:]
120120
item_labels = self.obj._get_axis(het_axis)
121121

122-
if isinstance(value, (np.ndarray, DataFrame)) and value.ndim > 1:
123-
raise ValueError('Setting mixed-type DataFrames with '
124-
'array/DataFrame pieces not yet supported')
122+
def setter(item, v):
123+
data = self.obj[item]
124+
values = data.values
125+
if np.prod(values.shape):
126+
result, changed = com._maybe_upcast_indexer(values,plane_indexer,v,dtype=getattr(data,'dtype',None))
127+
if changed:
128+
self.obj[item] = result
125129

126-
try:
127-
for item in item_labels[het_idx]:
128-
data = self.obj[item]
129-
values = data.values
130-
if np.prod(values.shape):
131-
value = com._possibly_cast_to_datetime(
132-
value, getattr(data, 'dtype', None))
133-
values[plane_indexer] = value
134-
except ValueError:
135-
for item, v in zip(item_labels[het_idx], value):
136-
data = self.obj[item]
137-
values = data.values
138-
if np.prod(values.shape):
139-
values[plane_indexer] = v
130+
labels = item_labels[het_idx]
131+
132+
if _is_list_like(value):
133+
134+
# we have an equal len Frame
135+
if isinstance(value, DataFrame) and value.ndim > 1:
136+
137+
for item in labels:
138+
139+
# align to
140+
if item in value:
141+
v = value[item]
142+
v = v.reindex(self.obj[item].reindex(v.index).dropna().index)
143+
setter(item, v.values)
144+
else:
145+
setter(item, np.nan)
146+
147+
# we have an equal len ndarray
148+
elif isinstance(value, np.ndarray) and value.ndim > 1:
149+
if len(labels) != len(value):
150+
raise ValueError('Must have equal len keys and value when'
151+
' setting with an ndarray')
152+
153+
for i, item in enumerate(labels):
154+
setter(item, value[:,i])
155+
156+
# we have an equal len list/ndarray
157+
elif len(labels) == 1 and len(self.obj[labels[0]]) == len(value):
158+
setter(labels[0], value)
159+
160+
# per label values
161+
else:
162+
163+
for item, v in zip(labels, value):
164+
setter(item, v)
165+
else:
166+
167+
# scalar
168+
for item in labels:
169+
setter(item, value)
140170

141171
else:
142172
if isinstance(indexer, tuple):

pandas/core/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2065,7 +2065,7 @@ def update(self, other):
20652065
"""
20662066
other = other.reindex_like(self)
20672067
mask = notnull(other)
2068-
np.putmask(self.values, mask, other.values)
2068+
com._maybe_upcast_putmask(self.values,mask,other,change=self.values)
20692069

20702070
#----------------------------------------------------------------------
20712071
# Reindexing, sorting

pandas/tests/test_frame.py

+45-4
Original file line numberDiff line numberDiff line change
@@ -1275,20 +1275,61 @@ def test_setitem_single_column_mixed_datetime(self):
12751275
df.ix['d', :] = nan
12761276
self.assert_(com.isnull(df.ix['c', :]).all() == False)
12771277

1278+
# as of GH 3216 this will now work!
12781279
# try to set with a list like item
1279-
self.assertRaises(
1280-
Exception, df.ix.__setitem__, ('d', 'timestamp'), [nan])
1280+
#self.assertRaises(
1281+
# Exception, df.ix.__setitem__, ('d', 'timestamp'), [nan])
12811282

12821283
def test_setitem_frame(self):
12831284
piece = self.frame.ix[:2, ['A', 'B']]
12841285
self.frame.ix[-2:, ['A', 'B']] = piece.values
12851286
assert_almost_equal(self.frame.ix[-2:, ['A', 'B']].values,
12861287
piece.values)
12871288

1289+
# GH 3216
1290+
1291+
# already aligned
1292+
f = self.mixed_frame.copy()
1293+
piece = DataFrame([[ 1, 2], [3, 4]], index=f.index[0:2],columns=['A', 'B'])
1294+
key = (slice(None,2), ['A', 'B'])
1295+
f.ix[key] = piece
1296+
assert_almost_equal(f.ix[0:2, ['A', 'B']].values,
1297+
piece.values)
1298+
1299+
# rows unaligned
1300+
f = self.mixed_frame.copy()
1301+
piece = DataFrame([[ 1, 2 ], [3, 4], [5, 6], [7, 8]], index=list(f.index[0:2]) + ['foo','bar'],columns=['A', 'B'])
1302+
key = (slice(None,2), ['A', 'B'])
1303+
f.ix[key] = piece
1304+
assert_almost_equal(f.ix[0:2:, ['A', 'B']].values,
1305+
piece.values[0:2])
1306+
1307+
# key is unaligned with values
1308+
f = self.mixed_frame.copy()
1309+
piece = f.ix[:2, ['A']]
1310+
key = (slice(-2, None), ['A', 'B'])
1311+
f.ix[key] = piece
1312+
piece['B'] = np.nan
1313+
assert_almost_equal(f.ix[-2:, ['A', 'B']].values,
1314+
piece.values)
1315+
1316+
# ndarray
1317+
f = self.mixed_frame.copy()
12881318
piece = self.mixed_frame.ix[:2, ['A', 'B']]
1289-
f = self.mixed_frame.ix.__setitem__
12901319
key = (slice(-2, None), ['A', 'B'])
1291-
self.assertRaises(ValueError, f, key, piece)
1320+
f.ix[key] = piece.values
1321+
assert_almost_equal(f.ix[-2:, ['A', 'B']].values,
1322+
piece.values)
1323+
1324+
1325+
# needs upcasting
1326+
df = DataFrame([[1,2,'foo'],[3,4,'bar']],columns=['A','B','C'])
1327+
df2 = df.copy()
1328+
df2.ix[:,['A','B']] = df.ix[:,['A','B']]+0.5
1329+
expected = df.reindex(columns=['A','B'])
1330+
expected += 0.5
1331+
expected['C'] = df['C']
1332+
assert_frame_equal(df2, expected)
12921333

12931334
def test_setitem_frame_align(self):
12941335
piece = self.frame.ix[:2, ['A', 'B']]

pandas/tests/test_indexing.py

+12
Original file line numberDiff line numberDiff line change
@@ -724,6 +724,18 @@ def test_xs_multiindex(self):
724724
expected = df.iloc[:,0:2].loc[:,'a']
725725
assert_frame_equal(result,expected)
726726

727+
def test_setitem_dtype_upcast(self):
728+
729+
# GH3216
730+
df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
731+
df['c'] = np.nan
732+
self.assert_(df['c'].dtype == np.float64)
733+
734+
df.ix[0,'c'] = 'foo'
735+
expected = DataFrame([{"a": 1, "c" : 'foo'}, {"a": 3, "b": 2, "c" : np.nan}])
736+
assert_frame_equal(df,expected)
737+
738+
727739
if __name__ == '__main__':
728740
import nose
729741
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

pandas/tests/test_series.py

+7
Original file line numberDiff line numberDiff line change
@@ -2314,6 +2314,13 @@ def test_update(self):
23142314
expected = Series([1.5, 3.5, 3., 5., np.nan])
23152315
assert_series_equal(s, expected)
23162316

2317+
# GH 3217
2318+
df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
2319+
df['c'] = np.nan
2320+
2321+
# this will fail as long as series is a sub-class of ndarray
2322+
##### df['c'].update(Series(['foo'],index=[0])) #####
2323+
23172324
def test_corr(self):
23182325
_skip_if_no_scipy()
23192326

0 commit comments

Comments
 (0)