Advertencia para el usuario de Python Pandas: clasificación porque el eje de no concatenación no está alineado

Estoy haciendo algo de práctica de código y aplicando la fusión de marcos de datos mientras hago esto obteniendo la advertencia del usuario

/usr/lib64/python2.7/site-packages/pandas/core/frame.py:6201: FutureWarning: ordenando porque el eje de no concatenación no está alineado. Una versión futura de pandas cambiará a no ordenada por defecto. Para aceptar el comportamiento futuro, pase ‘sort = True’. Para conservar el comportamiento actual y silenciar la advertencia, pase sort = False

En estas líneas de código: ¿Puede usted ayudar a obtener la solución de esta advertencia?

placement_video = [self.read_sql_vdx_summary, self.read_sql_video_km] placement_video_summary = reduce(lambda left, right: pd.merge(left, right, on='PLACEMENT', sort=False), placement_video) placement_by_video = placement_video_summary.loc[:, ["PLACEMENT", "PLACEMENT_NAME", "COST_TYPE", "PRODUCT", "VIDEONAME", "VIEW0", "VIEW25", "VIEW50", "VIEW75", "VIEW100", "ENG0", "ENG25", "ENG50", "ENG75", "ENG100", "DPE0", "DPE25", "DPE50", "DPE75", "DPE100"]] # print (placement_by_video) placement_by_video["Placement# Name"] = placement_by_video[["PLACEMENT", "PLACEMENT_NAME"]].apply(lambda x: ".".join(x), axis=1) placement_by_video_new = placement_by_video.loc[:, ["PLACEMENT", "Placement# Name", "COST_TYPE", "PRODUCT", "VIDEONAME", "VIEW0", "VIEW25", "VIEW50", "VIEW75", "VIEW100", "ENG0", "ENG25", "ENG50", "ENG75", "ENG100", "DPE0", "DPE25", "DPE50", "DPE75", "DPE100"]] placement_by_km_video = [placement_by_video_new, self.read_sql_km_for_video] placement_by_km_video_summary = reduce(lambda left, right: pd.merge(left, right, on=['PLACEMENT', 'PRODUCT'], sort=False), placement_by_km_video) #print (list(placement_by_km_video_summary)) #print(placement_by_km_video_summary) #exit() # print(placement_by_video_new) """Conditions for 25%view""" mask17 = placement_by_km_video_summary["PRODUCT"].isin(['Display', 'Mobile']) mask18 = placement_by_km_video_summary["COST_TYPE"].isin(["CPE", "CPM", "CPCV"]) mask19 = placement_by_km_video_summary["PRODUCT"].isin(["InStream"]) mask20 = placement_by_km_video_summary["COST_TYPE"].isin(["CPE", "CPM", "CPE+", "CPCV"]) mask_video_video_completions = placement_by_km_video_summary["COST_TYPE"].isin(["CPCV"]) mask21 = placement_by_km_video_summary["COST_TYPE"].isin(["CPE+"]) mask22 = placement_by_km_video_summary["COST_TYPE"].isin(["CPE", "CPM"]) mask23 = placement_by_km_video_summary["PRODUCT"].isin(['Display', 'Mobile', 'InStream']) mask24 = placement_by_km_video_summary["COST_TYPE"].isin(["CPE", "CPM", "CPE+"]) choice25video_eng = placement_by_km_video_summary["ENG25"] choice25video_vwr = placement_by_km_video_summary["VIEW25"] choice25video_deep = placement_by_km_video_summary["DPE25"] placement_by_km_video_summary["25_pc_video"] = np.select([mask17 & mask18, mask19 & mask20, mask17 & mask21], [choice25video_eng, choice25video_vwr, choice25video_deep]) """Conditions for 50%view""" choice50video_eng = placement_by_km_video_summary["ENG50"] choice50video_vwr = placement_by_km_video_summary["VIEW50"] choice50video_deep = placement_by_km_video_summary["DPE50"] placement_by_km_video_summary["50_pc_video"] = np.select([mask17 & mask18, mask19 & mask20, mask17 & mask21], [choice50video_eng, choice50video_vwr, choice50video_deep]) """Conditions for 75%view""" choice75video_eng = placement_by_km_video_summary["ENG75"] choice75video_vwr = placement_by_km_video_summary["VIEW75"] choice75video_deep = placement_by_km_video_summary["DPE75"] placement_by_km_video_summary["75_pc_video"] = np.select([mask17 & mask18, mask19 & mask20, mask17 & mask21], [choice75video_eng, choice75video_vwr, choice75video_deep]) """Conditions for 100%view""" choice100video_eng = placement_by_km_video_summary["ENG100"] choice100video_vwr = placement_by_km_video_summary["VIEW100"] choice100video_deep = placement_by_km_video_summary["DPE100"] choicecompletions = placement_by_km_video_summary['COMPLETIONS'] placement_by_km_video_summary["100_pc_video"] = np.select([mask17 & mask22, mask19 & mask24, mask17 & mask21, mask23 & mask_video_video_completions], [choice100video_eng, choice100video_vwr, choice100video_deep, choicecompletions]) """conditions for 0%view""" choice0video_eng = placement_by_km_video_summary["ENG0"] choice0video_vwr = placement_by_km_video_summary["VIEW0"] choice0video_deep = placement_by_km_video_summary["DPE0"] placement_by_km_video_summary["Views"] = np.select([mask17 & mask18, mask19 & mask20, mask17 & mask21], [choice0video_eng, choice0video_vwr, choice0video_deep]) #print (placement_by_km_video_summary) #exit() #final Table placement_by_video_summary = placement_by_km_video_summary.loc[:, ["PLACEMENT", "Placement# Name", "PRODUCT", "VIDEONAME", "COST_TYPE", "Views", "25_pc_video", "50_pc_video", "75_pc_video","100_pc_video", "ENGAGEMENTS","IMPRESSIONS", "DPEENGAMENTS"]] #placement_by_km_video = [placement_by_video_summary, self.read_sql_km_for_video] #placement_by_km_video_summary = reduce(lambda left, right: pd.merge(left, right, on=['PLACEMENT', 'PRODUCT']), #placement_by_km_video) #print(placement_by_video_summary) #exit() # dup_col =["IMPRESSIONS","ENGAGEMENTS","DPEENGAMENTS"] # placement_by_video_summary.loc[placement_by_video_summary.duplicated(dup_col),dup_col] = np.nan # print ("Dhar",placement_by_video_summary) '''adding views based on conditions''' #filter maximum value from videos placement_by_video_summary_new = placement_by_km_video_summary.loc[ placement_by_km_video_summary.reset_index().groupby(['PLACEMENT', 'PRODUCT'])['Views'].idxmax()] #print (placement_by_video_summary_new) #exit() # print (placement_by_video_summary_new) # mask22 = (placement_by_video_summary_new.PRODUCT.str.upper ()=='DISPLAY') & (placement_by_video_summary_new.COST_TYPE=='CPE') placement_by_video_summary_new.loc[mask17 & mask18, 'Views'] = placement_by_video_summary_new['ENGAGEMENTS'] placement_by_video_summary_new.loc[mask19 & mask20, 'Views'] = placement_by_video_summary_new['IMPRESSIONS'] placement_by_video_summary_new.loc[mask17 & mask21, 'Views'] = placement_by_video_summary_new['DPEENGAMENTS'] #print (placement_by_video_summary_new) #exit() placement_by_video_summary = placement_by_video_summary.drop(placement_by_video_summary_new.index).append( placement_by_video_summary_new).sort_index() placement_by_video_summary["Video Completion Rate"] = placement_by_video_summary["100_pc_video"] / \ placement_by_video_summary["Views"] placement_by_video_final = placement_by_video_summary.loc[:, ["Placement# Name", "PRODUCT", "VIDEONAME", "Views", "25_pc_video", "50_pc_video", "75_pc_video", "100_pc_video", "Video Completion Rate"]] 

Este comportamiento es nuevo en pandas 0.23.0 .

En una versión futura de pandas, pandas.concat() y DataFrame.append() ya no ordenarán el eje de no concatenación cuando aún no esté alineado. El comportamiento actual es el mismo que el anterior (clasificación), pero ahora se emite una advertencia cuando no se especifica la clasificación y el eje de no concatenación no está alineado, enlace :

La solución es agregar sort=True parámetro sort=True :

 df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a']) df2 = pd.DataFrame({"a": [4, 5]}) print (pd.concat([df1, df2], sort=True)) ab 0 1 1.0 1 2 2.0 0 4 NaN 1 5 NaN print (df1.append(df2, sort=True)) ab 0 1 1.0 1 2 2.0 0 4 NaN 1 5 NaN 

En su código:

 placement_by_video_summary = placement_by_video_summary.drop(placement_by_video_summary_new.index) .append(placement_by_video_summary_new, sort=True) .sort_index() 

La respuesta de Jezrael es buena, pero no respondió una pregunta que tuve: ¿el hecho de que aparezca el indicador “orden” mal arruinará mis datos de alguna manera? La respuesta es aparentemente “no”, estás bien de cualquier manera.

 from pandas import DataFrame, concat a = DataFrame([{'a':1, 'c':2,'d':3 }]) b = DataFrame([{'a':4,'b':5, 'd':6,'e':7}]) >>> concat([a,b],sort=False) acdbe 0 1 2.0 3 NaN NaN 0 4 NaN 6 5.0 7.0 >>> concat([a,b],sort=True) abcde 0 1 NaN 2.0 3 NaN 0 4 5.0 NaN 6 7.0