Skip to content

Commit b2039e0

Browse files
committedApr 2, 2013
BUG: GH2808 Apply with invalid returned indices raise correct Exception
1 parent 84c1b41 commit b2039e0

File tree

4 files changed

+80
-8
lines changed

4 files changed

+80
-8
lines changed
 

Diff for: ‎RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ pandas 0.11.0
258258
- Upcast/split blocks when needed in a mixed DataFrame when setitem
259259
with an indexer (GH3216_)
260260
- Invoking df.applymap on a dataframe with dupe cols now raises a ValueError (GH2786_)
261+
- Apply with invalid returned indices raise correct Exception (GH2808_)
261262

262263
.. _GH622: https://github.com/pydata/pandas/issues/622
263264
.. _GH797: https://github.com/pydata/pandas/issues/797
@@ -304,6 +305,7 @@ pandas 0.11.0
304305
.. _GH2867: https://github.com/pydata/pandas/issues/2867
305306
.. _GH2803: https://github.com/pydata/pandas/issues/2803
306307
.. _GH2807: https://github.com/pydata/pandas/issues/2807
308+
.. _GH2808: https://github.com/pydata/pandas/issues/2808
307309
.. _GH2849: https://github.com/pydata/pandas/issues/2849
308310
.. _GH2850: https://github.com/pydata/pandas/issues/2850
309311
.. _GH2898: https://github.com/pydata/pandas/issues/2898

Diff for: ‎pandas/core/index.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -1404,8 +1404,8 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None):
14041404
subarr.names = [None] * subarr.nlevels
14051405
else:
14061406
if len(names) != subarr.nlevels:
1407-
raise AssertionError(('Length of names must be same as level '
1408-
'(%d), got %d') % (subarr.nlevels))
1407+
raise AssertionError(('Length of names (%d) must be same as level '
1408+
'(%d)') % (len(names),subarr.nlevels))
14091409

14101410
subarr.names = list(names)
14111411

@@ -2765,13 +2765,13 @@ def _handle_legacy_indexes(indexes):
27652765

27662766

27672767
def _get_consensus_names(indexes):
2768-
consensus_name = indexes[0].names
2769-
for index in indexes[1:]:
2770-
if index.names != consensus_name:
2771-
consensus_name = [None] * index.nlevels
2772-
break
2773-
return consensus_name
27742768

2769+
# find the non-none names, need to tupleify to make
2770+
# the set hashable, then reverse on return
2771+
consensus_names = set([ tuple(i.names) for i in indexes if all(n is not None for n in i.names) ])
2772+
if len(consensus_names) == 1:
2773+
return list(list(consensus_names)[0])
2774+
return [None] * indexes[0].nlevels
27752775

27762776
def _maybe_box(idx):
27772777
from pandas.tseries.api import DatetimeIndex, PeriodIndex

Diff for: ‎pandas/tests/test_groupby.py

+65
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,71 @@ def f(x):
568568
assert_series_equal(agged, expected, check_dtype=False)
569569
self.assert_(issubclass(agged.dtype.type, np.dtype(dtype).type))
570570

571+
def test_indices_concatenation_order(self):
572+
573+
# GH 2808
574+
575+
def f1(x):
576+
y = x[(x.b % 2) == 1]**2
577+
if y.empty:
578+
multiindex = MultiIndex(
579+
levels = [[]]*2,
580+
labels = [[]]*2,
581+
names = ['b', 'c']
582+
)
583+
res = DataFrame(None,
584+
columns=['a'],
585+
index=multiindex)
586+
return res
587+
else:
588+
y = y.set_index(['b','c'])
589+
return y
590+
591+
def f2(x):
592+
y = x[(x.b % 2) == 1]**2
593+
if y.empty:
594+
return DataFrame()
595+
else:
596+
y = y.set_index(['b','c'])
597+
return y
598+
599+
def f3(x):
600+
y = x[(x.b % 2) == 1]**2
601+
if y.empty:
602+
multiindex = MultiIndex(
603+
levels = [[]]*2,
604+
labels = [[]]*2,
605+
names = ['foo', 'bar']
606+
)
607+
res = DataFrame(None,
608+
columns=['a','b'],
609+
index=multiindex)
610+
return res
611+
else:
612+
return y
613+
614+
df = DataFrame({'a':[1,2,2,2],
615+
'b':range(4),
616+
'c':range(5,9)})
617+
618+
df2 = DataFrame({'a':[3,2,2,2],
619+
'b':range(4),
620+
'c':range(5,9)})
621+
622+
623+
# correct result
624+
result1 = df.groupby('a').apply(f1)
625+
result2 = df2.groupby('a').apply(f1)
626+
assert_frame_equal(result1, result2)
627+
628+
# should fail (not the same number of levels)
629+
self.assertRaises(AssertionError, df.groupby('a').apply, f2)
630+
self.assertRaises(AssertionError, df2.groupby('a').apply, f2)
631+
632+
# should fail (incorrect shape)
633+
self.assertRaises(AssertionError, df.groupby('a').apply, f3)
634+
self.assertRaises(AssertionError, df2.groupby('a').apply, f3)
635+
571636
def test_attr_wrapper(self):
572637
grouped = self.ts.groupby(lambda x: x.weekday())
573638

Diff for: ‎pandas/tools/merge.py

+5
Original file line numberDiff line numberDiff line change
@@ -1221,6 +1221,11 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None):
12211221
if len(names) == len(levels):
12221222
names = list(names)
12231223
else:
1224+
# make sure that all of the passed indices have the same nlevels
1225+
if not len(set([ i.nlevels for i in indexes ])) == 1:
1226+
raise AssertionError("Cannot concat indices that do"
1227+
" not have the same number of levels")
1228+
12241229
# also copies
12251230
names = names + _get_consensus_names(indexes)
12261231

0 commit comments

Comments
 (0)
Please sign in to comment.