BUG: GH2808 Apply with invalid returned indices raise correct Exception

jreback · jreback · commit b2039e066b14 · 2013-04-02T08:24:26.000-04:00
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -258,6 +258,7 @@ pandas 0.11.0
   - Upcast/split blocks when needed in a mixed DataFrame when setitem 
     with an indexer (GH3216_)
   - Invoking df.applymap on a dataframe with dupe cols now raises a ValueError (GH2786_)
+  - Apply with invalid returned indices raise correct Exception (GH2808_)
 
 .. _GH622: https://github.com/pydata/pandas/issues/622
 .. _GH797: https://github.com/pydata/pandas/issues/797
@@ -304,6 +305,7 @@ pandas 0.11.0
 .. _GH2867: https://github.com/pydata/pandas/issues/2867
 .. _GH2803: https://github.com/pydata/pandas/issues/2803
 .. _GH2807: https://github.com/pydata/pandas/issues/2807
+.. _GH2808: https://github.com/pydata/pandas/issues/2808
 .. _GH2849: https://github.com/pydata/pandas/issues/2849
 .. _GH2850: https://github.com/pydata/pandas/issues/2850
 .. _GH2898: https://github.com/pydata/pandas/issues/2898
diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -1404,8 +1404,8 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None):
             subarr.names = [None] * subarr.nlevels
         else:
             if len(names) != subarr.nlevels:
-                raise AssertionError(('Length of names must be same as level '
-                                      '(%d), got %d') % (subarr.nlevels))
+                raise AssertionError(('Length of names (%d) must be same as level '
+                                      '(%d)') % (len(names),subarr.nlevels))
 
             subarr.names = list(names)
 
@@ -2765,13 +2765,13 @@ def _handle_legacy_indexes(indexes):
 
 
 def _get_consensus_names(indexes):
-    consensus_name = indexes[0].names
-    for index in indexes[1:]:
-        if index.names != consensus_name:
-            consensus_name = [None] * index.nlevels
-            break
-    return consensus_name
 
+    # find the non-none names, need to tupleify to make 
+    # the set hashable, then reverse on return
+    consensus_names = set([ tuple(i.names) for i in indexes if all(n is not None for n in i.names) ])
+    if len(consensus_names) == 1:
+        return list(list(consensus_names)[0])
+    return [None] * indexes[0].nlevels
 
 def _maybe_box(idx):
     from pandas.tseries.api import DatetimeIndex, PeriodIndex
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -568,6 +568,71 @@ def f(x):
             assert_series_equal(agged, expected, check_dtype=False)
             self.assert_(issubclass(agged.dtype.type, np.dtype(dtype).type))
 
+    def test_indices_concatenation_order(self):
+
+        # GH 2808
+
+        def f1(x):
+            y = x[(x.b % 2) == 1]**2
+            if y.empty:
+                multiindex = MultiIndex(
+                    levels = [[]]*2,
+                    labels = [[]]*2,
+                    names = ['b', 'c']
+                    )
+                res = DataFrame(None,
+                                   columns=['a'],
+                                   index=multiindex)
+                return res
+            else:
+                y = y.set_index(['b','c'])
+                return y
+
+        def f2(x):
+            y = x[(x.b % 2) == 1]**2
+            if y.empty:
+                return DataFrame()
+            else:
+                y = y.set_index(['b','c'])
+                return y
+
+        def f3(x):
+            y = x[(x.b % 2) == 1]**2
+            if y.empty:
+                multiindex = MultiIndex(
+                    levels = [[]]*2,
+                    labels = [[]]*2,
+                    names = ['foo', 'bar']
+                    )
+                res = DataFrame(None,
+                                columns=['a','b'],
+                                index=multiindex)
+                return res
+            else:
+                return y
+
+        df = DataFrame({'a':[1,2,2,2],
+                        'b':range(4),
+                        'c':range(5,9)})
+        
+        df2 = DataFrame({'a':[3,2,2,2],
+                         'b':range(4),
+                         'c':range(5,9)})
+
+
+        # correct result
+        result1 = df.groupby('a').apply(f1)
+        result2 = df2.groupby('a').apply(f1)
+        assert_frame_equal(result1, result2)
+        
+        # should fail (not the same number of levels)
+        self.assertRaises(AssertionError, df.groupby('a').apply, f2)
+        self.assertRaises(AssertionError, df2.groupby('a').apply, f2)
+
+        # should fail (incorrect shape)
+        self.assertRaises(AssertionError, df.groupby('a').apply, f3)
+        self.assertRaises(AssertionError, df2.groupby('a').apply, f3)
+
     def test_attr_wrapper(self):
         grouped = self.ts.groupby(lambda x: x.weekday())
 
diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py
@@ -1221,6 +1221,11 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None):
         if len(names) == len(levels):
             names = list(names)
         else:
+            # make sure that all of the passed indices have the same nlevels
+            if not len(set([ i.nlevels for i in indexes ])) == 1:
+                raise AssertionError("Cannot concat indices that do"
+                                     " not have the same number of levels")
+
             # also copies
             names = names + _get_consensus_names(indexes)