Merge pull request pandas-dev#3219 from jreback/GH3216

jreback · jreback · commit d09fff82ca2a · 2013-03-30T17:12:47.000-07:00
BUG: GH3216 Upcast when needed to DataFrame when setitem with indexer
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -122,6 +122,8 @@ pandas 0.11.0
   - Handle "ragged" CSV files missing trailing delimiters in rows with missing
     fields when also providing explicit list of column names (so the parser
     knows how many columns to expect in the result) (GH2981_)
+  - On a mixed DataFrame, allow setting with indexers with ndarray/DataFrame
+    on rhs (GH3216_)
 
 **API Changes**
 
@@ -249,9 +251,11 @@ pandas 0.11.0
   - Add comparison operators to Period object (GH2781_)
   - Fix bug when concatenating two Series into a DataFrame when they have the
     same name (GH2797_)
-  - fix automatic color cycling when plotting consecutive timeseries
+  - Fix automatic color cycling when plotting consecutive timeseries
     without color arguments (GH2816_)
   - fixed bug in the pickling of PeriodIndex (GH2891_)
+  - Upcast/split blocks when needed in a mixed DataFrame when setitem 
+    with an indexer (GH3216_)
 
 .. _GH622: https://github.com/pydata/pandas/issues/622
 .. _GH797: https://github.com/pydata/pandas/issues/797
@@ -340,6 +344,7 @@ pandas 0.11.0
 .. _GH2751: https://github.com/pydata/pandas/issues/2751
 .. _GH2747: https://github.com/pydata/pandas/issues/2747
 .. _GH2816: https://github.com/pydata/pandas/issues/2816
+.. _GH3216: https://github.com/pydata/pandas/issues/2816
 
 pandas 0.10.1
 =============
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -694,6 +694,11 @@ def _maybe_promote(dtype, fill_value=np.nan):
         if issubclass(fill_value.dtype.type, (np.datetime64,np.timedelta64)):
             fill_value = tslib.iNaT
         else:
+
+            # we need to change to object type as our
+            # fill_value is of object type
+            if fill_value.dtype == np.object_:
+                dtype = np.dtype(np.object_)
             fill_value = np.nan
 
     # returns tuple of (dtype, fill_value)
@@ -763,7 +768,7 @@ def changeit():
             if change is not None:
                 change.dtype = r.dtype
                 change[:] = r
-                
+
             return r, True
 
         # we want to decide whether putmask will work
@@ -792,6 +797,34 @@ def changeit():
 
     return result, False
 
+def _maybe_upcast_indexer(result, indexer, other, dtype=None):
+    """ a safe version of setitem that (potentially upcasts the result
+        return the result and a changed flag
+        """
+
+    def changeit():
+        # our type is wrong here, need to upcast
+        r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
+        try:
+            r[indexer] = other
+        except:
+
+            # if we hit this then we still have an incompatible type
+            r[indexer] = fill_value
+
+        return r, True
+
+    new_dtype, fill_value = _maybe_promote(result.dtype,other)
+    if new_dtype != result.dtype:
+        return changeit()
+
+    try:
+        result[indexer] = other
+    except:
+        return changeit()
+
+    return result, False
+
 def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False):
     """ provide explicty type promotion and coercion
 
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -119,24 +119,54 @@ def _setitem_with_indexer(self, indexer, value):
             plane_indexer = indexer[:het_axis] + indexer[het_axis + 1:]
             item_labels = self.obj._get_axis(het_axis)
 
-            if isinstance(value, (np.ndarray, DataFrame)) and value.ndim > 1:
-                raise ValueError('Setting mixed-type DataFrames with '
-                                 'array/DataFrame pieces not yet supported')
+            def setter(item, v):
+                data = self.obj[item]
+                values = data.values
+                if np.prod(values.shape):
+                    result, changed = com._maybe_upcast_indexer(values,plane_indexer,v,dtype=getattr(data,'dtype',None))
+                    if changed:
+                        self.obj[item] = result
 
-            try:
-                for item in item_labels[het_idx]:
-                    data = self.obj[item]
-                    values = data.values
-                    if np.prod(values.shape):
-                        value = com._possibly_cast_to_datetime(
-                            value, getattr(data, 'dtype', None))
-                        values[plane_indexer] = value
-            except ValueError:
-                for item, v in zip(item_labels[het_idx], value):
-                    data = self.obj[item]
-                    values = data.values
-                    if np.prod(values.shape):
-                        values[plane_indexer] = v
+            labels = item_labels[het_idx]
+
+            if _is_list_like(value):
+
+                # we have an equal len Frame
+                if isinstance(value, DataFrame) and value.ndim > 1:
+
+                    for item in labels:
+
+                        # align to
+                        if item in value:
+                            v = value[item]
+                            v = v.reindex(self.obj[item].reindex(v.index).dropna().index)
+                            setter(item, v.values)
+                        else:
+                            setter(item, np.nan)
+
+                # we have an equal len ndarray
+                elif isinstance(value, np.ndarray) and value.ndim > 1:
+                    if len(labels) != len(value):
+                        raise ValueError('Must have equal len keys and value when'
+                                         ' setting with an ndarray')
+
+                    for i, item in enumerate(labels):
+                        setter(item, value[:,i])
+
+                # we have an equal len list/ndarray
+                elif len(labels) == 1 and len(self.obj[labels[0]]) == len(value):
+                    setter(labels[0], value)
+
+                # per label values
+                else:
+
+                    for item, v in zip(labels, value):
+                        setter(item, v)
+            else:
+
+                # scalar
+                for item in labels:
+                    setter(item, value)
 
         else:
             if isinstance(indexer, tuple):
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2065,7 +2065,7 @@ def update(self, other):
         """
         other = other.reindex_like(self)
         mask = notnull(other)
-        np.putmask(self.values, mask, other.values)
+        com._maybe_upcast_putmask(self.values,mask,other,change=self.values)
 
     #----------------------------------------------------------------------
     # Reindexing, sorting
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -1275,20 +1275,61 @@ def test_setitem_single_column_mixed_datetime(self):
         df.ix['d', :] = nan
         self.assert_(com.isnull(df.ix['c', :]).all() == False)
 
+        # as of GH 3216 this will now work!
         # try to set with a list like item
-        self.assertRaises(
-            Exception, df.ix.__setitem__, ('d', 'timestamp'), [nan])
+        #self.assertRaises(
+        #    Exception, df.ix.__setitem__, ('d', 'timestamp'), [nan])
 
     def test_setitem_frame(self):
         piece = self.frame.ix[:2, ['A', 'B']]
         self.frame.ix[-2:, ['A', 'B']] = piece.values
         assert_almost_equal(self.frame.ix[-2:, ['A', 'B']].values,
                             piece.values)
 
+        # GH 3216
+
+        # already aligned
+        f = self.mixed_frame.copy()
+        piece = DataFrame([[ 1, 2], [3, 4]], index=f.index[0:2],columns=['A', 'B'])
+        key = (slice(None,2), ['A', 'B'])
+        f.ix[key] = piece
+        assert_almost_equal(f.ix[0:2, ['A', 'B']].values,
+                            piece.values)
+
+        # rows unaligned
+        f = self.mixed_frame.copy()
+        piece = DataFrame([[ 1, 2 ], [3, 4], [5, 6], [7, 8]], index=list(f.index[0:2]) + ['foo','bar'],columns=['A', 'B'])
+        key = (slice(None,2), ['A', 'B'])
+        f.ix[key] = piece
+        assert_almost_equal(f.ix[0:2:, ['A', 'B']].values,
+                            piece.values[0:2])
+
+        # key is unaligned with values
+        f = self.mixed_frame.copy()
+        piece = f.ix[:2, ['A']]
+        key = (slice(-2, None), ['A', 'B'])
+        f.ix[key] = piece
+        piece['B'] = np.nan
+        assert_almost_equal(f.ix[-2:, ['A', 'B']].values,
+                            piece.values)
+
+        # ndarray
+        f = self.mixed_frame.copy()
         piece = self.mixed_frame.ix[:2, ['A', 'B']]
-        f = self.mixed_frame.ix.__setitem__
         key = (slice(-2, None), ['A', 'B'])
-        self.assertRaises(ValueError, f, key, piece)
+        f.ix[key] = piece.values
+        assert_almost_equal(f.ix[-2:, ['A', 'B']].values,
+                            piece.values)
+
+
+        # needs upcasting
+        df = DataFrame([[1,2,'foo'],[3,4,'bar']],columns=['A','B','C'])
+        df2 = df.copy()
+        df2.ix[:,['A','B']] = df.ix[:,['A','B']]+0.5
+        expected = df.reindex(columns=['A','B'])
+        expected += 0.5
+        expected['C'] = df['C']
+        assert_frame_equal(df2, expected)
 
     def test_setitem_frame_align(self):
         piece = self.frame.ix[:2, ['A', 'B']]
diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py
@@ -724,6 +724,18 @@ def test_xs_multiindex(self):
         expected = df.iloc[:,0:2].loc[:,'a']
         assert_frame_equal(result,expected)
 
+    def test_setitem_dtype_upcast(self):
+ 
+        # GH3216
+        df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
+        df['c'] = np.nan
+        self.assert_(df['c'].dtype == np.float64)
+
+        df.ix[0,'c'] = 'foo'
+        expected = DataFrame([{"a": 1, "c" : 'foo'}, {"a": 3, "b": 2, "c" : np.nan}])
+        assert_frame_equal(df,expected)
+
+
 if __name__ == '__main__':
     import nose
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
@@ -2314,6 +2314,13 @@ def test_update(self):
         expected = Series([1.5, 3.5, 3., 5., np.nan])
         assert_series_equal(s, expected)
 
+        # GH 3217
+        df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
+        df['c'] = np.nan
+
+        # this will fail as long as series is a sub-class of ndarray
+        ##### df['c'].update(Series(['foo'],index=[0])) #####
+
     def test_corr(self):
         _skip_if_no_scipy()