from pandas import * import numpy as np import string g1 = np.array(list(string.letters))[:-1] g2 = np.arange(510) df_small = DataFrame({'group1': ["a", "b", "a", "a", "b", "c", "c", "c", "c", "c", "a", "a", "a", "b", "b", "b", "b"], 'group2': [1, 2, 3, 4, 1, 3, 5, 6, 5, 4, 1, 2, 3, 4, 3, 2, 1], 'value': ["apple", "pear", "orange", "apple", "banana", "durian", "lemon", "lime", "raspberry", "durian", "peach", "nectarine", "banana", "lemon", "guava", "blackberry", "grape"]}) value = df_small['value'].values.repeat(3) df = DataFrame({'group1': g1.repeat(4000 * 5), 'group2': np.tile(g2, 400 * 5), 'value': value.repeat(4000 * 5)}) def random_sample(): grouped = df.groupby(['group1', 'group2'])['value'] from random import choice choose = lambda group: choice(group.index) indices = grouped.apply(choose) return df.reindex(indices) def random_sample_v2(): grouped = df.groupby(['group1', 'group2'])['value'] from random import choice choose = lambda group: choice(group.index) indices = [choice(v) for k, v in grouped.groups.iteritems()] return df.reindex(indices) def do_shuffle(arr): from random import shuffle result = arr.copy().values shuffle(result) return result def shuffle_uri(df, grouped): perm = np.r_[tuple([np.random.permutation( idxs) for idxs in grouped.groups.itervalues()])] df['state_permuted'] = np.asarray(df.ix[perm]['value']) df2 = df.copy() grouped = df2.groupby('group1') shuffle_uri(df2, grouped) df2['state_perm'] = grouped['value'].transform(do_shuffle)