@@ -838,7 +838,7 @@ def aggregate_all_and_stack(
838838 ]
839839 result_expr = self .expr .aggregate (aggregations , dropna = dropna ).unpivot (
840840 row_labels = self .column_labels .to_list (),
841- index_col_id = "index" ,
841+ index_col_ids = [ "index" ] ,
842842 unpivot_columns = [(value_col_id , self .value_columns )],
843843 dtype = dtype ,
844844 )
@@ -849,7 +849,7 @@ def aggregate_all_and_stack(
849849 expr_with_offsets , offset_col = self .expr .promote_offsets ()
850850 stacked_expr = expr_with_offsets .unpivot (
851851 row_labels = self .column_labels .to_list (),
852- index_col_id = guid .generate_guid (),
852+ index_col_ids = [ guid .generate_guid ()] ,
853853 unpivot_columns = [(value_col_id , self .value_columns )],
854854 passthrough_columns = [* self .index_columns , offset_col ],
855855 dtype = dtype ,
@@ -1041,7 +1041,7 @@ def summarize(
10411041 expr = self .expr .aggregate (aggregations ).unpivot (
10421042 labels ,
10431043 unpivot_columns = columns ,
1044- index_col_id = label_col_id ,
1044+ index_col_ids = [ label_col_id ] ,
10451045 )
10461046 labels = self ._get_labels_for_columns (column_ids )
10471047 return Block (expr , column_labels = labels , index_columns = [label_col_id ])
@@ -1225,116 +1225,83 @@ def pivot(
12251225
12261226 return result_block .with_column_labels (column_index )
12271227
1228- def stack (self ):
1228+ def stack (self , how = "left" , dropna = True , sort = True , levels : int = 1 ):
12291229 """Unpivot last column axis level into row axis"""
1230- if isinstance (self .column_labels , pd .MultiIndex ):
1231- return self ._stack_multi ()
1232- else :
1233- return self ._stack_mono ()
1234-
1235- def _stack_mono (self ):
1236- if isinstance (self .column_labels , pd .MultiIndex ):
1237- raise ValueError ("Expected single level index" )
1238-
12391230 # These are the values that will be turned into rows
1240- stack_values = self .column_labels .drop_duplicates ().sort_values ()
12411231
1242- # Get matching columns
1243- unpivot_columns : List [Tuple [str , List [str ]]] = []
1244- dtypes : List [bigframes .dtypes .Dtype ] = []
1245- col_id = guid .generate_guid ("unpivot_" )
1246- dtype = None
1247- input_columns : Sequence [Optional [str ]] = []
1248- for uvalue in stack_values :
1249- matching_ids = self .label_to_col_id .get (uvalue , [])
1250- input_id = matching_ids [0 ] if len (matching_ids ) > 0 else None
1251- if input_id :
1252- if dtype and dtype != self ._column_type (input_id ):
1253- raise NotImplementedError (
1254- "Cannot stack columns with non-matching dtypes."
1255- )
1256- else :
1257- dtype = self ._column_type (input_id )
1258- input_columns .append (input_id )
1259- unpivot_columns .append ((col_id , input_columns ))
1260- if dtype :
1261- dtypes .append (dtype or pd .Float64Dtype ())
1232+ col_labels , row_labels = utils .split_index (self .column_labels , levels = levels )
1233+ if dropna :
1234+ row_labels = row_labels .drop_duplicates ()
1235+ if sort :
1236+ row_labels = row_labels .sort_values ()
12621237
1263- added_index_column = col_id = guid .generate_guid ()
1264- unpivot_expr = self ._expr .unpivot (
1265- row_labels = stack_values ,
1266- passthrough_columns = self .index_columns ,
1267- unpivot_columns = unpivot_columns ,
1268- index_col_id = added_index_column ,
1269- dtype = dtypes ,
1270- )
1271- block = Block (
1272- unpivot_expr ,
1273- index_columns = [* self .index_columns , added_index_column ],
1274- column_labels = [None ],
1275- index_labels = [* self ._index_labels , self .column_labels .names [- 1 ]],
1276- )
1277- return block
1278-
1279- def _stack_multi (self ):
1280- if not isinstance (self .column_labels , pd .MultiIndex ):
1281- raise ValueError ("Expected multi-index" )
1282-
1283- # These are the values that will be turned into rows
1284- stack_values = (
1285- self .column_labels .get_level_values (- 1 ).drop_duplicates ().sort_values ()
1286- )
1238+ row_label_tuples = utils .index_as_tuples (row_labels )
12871239
1288- result_col_labels = (
1289- self . column_labels . droplevel ( - 1 )
1290- . drop_duplicates ( )
1291- . sort_values ()
1292- . dropna ( how = "all" )
1293- )
1240+ if col_labels is not None :
1241+ result_index = col_labels . drop_duplicates (). sort_values (). dropna ( how = "all" )
1242+ result_col_labels = utils . index_as_tuples ( result_index )
1243+ else :
1244+ result_index = pd . Index ([ None ] )
1245+ result_col_labels = list ([()] )
12941246
12951247 # Get matching columns
12961248 unpivot_columns : List [Tuple [str , List [str ]]] = []
12971249 dtypes = []
12981250 for val in result_col_labels :
12991251 col_id = guid .generate_guid ("unpivot_" )
1300- dtype = None
1301- input_columns : Sequence [Optional [str ]] = []
1302- for uvalue in stack_values :
1303- # Need to unpack if still a multi-index after dropping 1 level
1304- label_to_match = (
1305- (val , uvalue ) if result_col_labels .nlevels == 1 else (* val , uvalue )
1306- )
1307- matching_ids = self .label_to_col_id .get (label_to_match , [])
1308- input_id = matching_ids [0 ] if len (matching_ids ) > 0 else None
1309- if input_id :
1310- if dtype and dtype != self ._column_type (input_id ):
1311- raise NotImplementedError (
1312- "Cannot stack columns with non-matching dtypes."
1313- )
1314- else :
1315- dtype = self ._column_type (input_id )
1316- input_columns .append (input_id )
1317- # Input column i is the first one that
1252+ input_columns , dtype = self ._create_stack_column (val , row_label_tuples )
13181253 unpivot_columns .append ((col_id , input_columns ))
13191254 if dtype :
13201255 dtypes .append (dtype or pd .Float64Dtype ())
13211256
1322- added_index_column = col_id = guid .generate_guid ()
1257+ added_index_columns = [ guid .generate_guid () for _ in range ( row_labels . nlevels )]
13231258 unpivot_expr = self ._expr .unpivot (
1324- row_labels = stack_values ,
1259+ row_labels = row_label_tuples ,
13251260 passthrough_columns = self .index_columns ,
13261261 unpivot_columns = unpivot_columns ,
1327- index_col_id = added_index_column ,
1262+ index_col_ids = added_index_columns ,
13281263 dtype = dtypes ,
1264+ how = how ,
13291265 )
1266+ new_index_level_names = self .column_labels .names [- levels :]
1267+ if how == "left" :
1268+ index_columns = [* self .index_columns , * added_index_columns ]
1269+ index_labels = [* self ._index_labels , * new_index_level_names ]
1270+ else :
1271+ index_columns = [* added_index_columns , * self .index_columns ]
1272+ index_labels = [* new_index_level_names , * self ._index_labels ]
1273+
13301274 block = Block (
13311275 unpivot_expr ,
1332- index_columns = [ * self . index_columns , added_index_column ] ,
1333- column_labels = result_col_labels ,
1334- index_labels = [ * self . _index_labels , self . column_labels . names [ - 1 ]] ,
1276+ index_columns = index_columns ,
1277+ column_labels = result_index ,
1278+ index_labels = index_labels ,
13351279 )
13361280 return block
13371281
1282+ def _create_stack_column (
1283+ self , col_label : typing .Tuple , stack_labels : typing .Sequence [typing .Tuple ]
1284+ ):
1285+ dtype = None
1286+ input_columns : list [Optional [str ]] = []
1287+ for uvalue in stack_labels :
1288+ label_to_match = (* col_label , * uvalue )
1289+ label_to_match = (
1290+ label_to_match [0 ] if len (label_to_match ) == 1 else label_to_match
1291+ )
1292+ matching_ids = self .label_to_col_id .get (label_to_match , [])
1293+ input_id = matching_ids [0 ] if len (matching_ids ) > 0 else None
1294+ if input_id :
1295+ if dtype and dtype != self ._column_type (input_id ):
1296+ raise NotImplementedError (
1297+ "Cannot stack columns with non-matching dtypes."
1298+ )
1299+ else :
1300+ dtype = self ._column_type (input_id )
1301+ input_columns .append (input_id )
1302+ # Input column i is the first one that
1303+ return input_columns , dtype or pd .Float64Dtype ()
1304+
13381305 def _column_type (self , col_id : str ) -> bigframes .dtypes .Dtype :
13391306 col_offset = self .value_columns .index (col_id )
13401307 dtype = self .dtypes [col_offset ]
0 commit comments