Guarde un dataframe de pandas como tabla en Imagen o documento pdf con una bonita visualización de índice múltiple

Estoy tratando de incluir un dataframe con múltiples índices en un informe en pdf. Me gustaría tener una salida de mesa agradable.

He encontrado estas 2 soluciones:

pandas.df -> HTML -> pdf

import pandas as pd from IPython.display import HTML import pdfkit # df generation df = pd.read_csv(path_to_csv, sep =',') groupeddf = df.groupby('Cluster') res = groupeddf.describe([0.05, 0.5, 0.95]) res.index.rename(['Cluster', 'stats'], inplace=True) res['Cluster'] = res.index.get_level_values('Cluster') res['stats'] = res.index.get_level_values('stats') populations = (res.iloc[(res.index.get_level_values('stats') == 'count'), \ 0].values).tolist() res['population'] = [populations[i] for i in res.index.labels[0].values()] total_pop = sum(populations) res['frequency'] =(res['population']/total_pop).round(3) res.set_index(['Cluster', 'population','frequency', 'stats'], inplace=True) res1 = res.iloc[(res.index.get_level_values('stats') == '5%') | (res.index.get_level_values('stats') == 'mean') | (res.index.get_level_values('stats') == '50%') | (res.index.get_level_values('stats') == '95%')] res1 = res1.round(2) # saving the df h = HTML(res1.to_html()) my_file = open('test.html', 'w') my_file.write(h.data) my_file.close() options = { 'orientation': 'Landscape' } with open('test.html') as f: pdfkit.from_file(f, 'out.pdf', options=options) 

Pero esto depende del pdfkit lo que nos dificulta. Es por eso que estoy tratando de usar pandas.df -> tex -> pdf (como se menciona en Exportar un dataframe de Pandas como imagen de tabla )

  import pandas as pd import os # df generation df = pd.read_csv(path_to_csv, sep =',') groupeddf = df.groupby('Cluster') res = groupeddf.describe([0.05, 0.5, 0.95]) res.index.rename(['Cluster', 'stats'], inplace=True) res['Cluster'] = res.index.get_level_values('Cluster') res['stats'] = res.index.get_level_values('stats') populations = (res.iloc[(res.index.get_level_values('stats') == 'count'), \ 0].values).tolist() res['population'] = [populations[i] for i in res.index.labels[0].values()] total_pop = sum(populations) res['frequency'] =(res['population']/total_pop).round(3) res.set_index(['Cluster', 'population','frequency', 'stats'], inplace=True) res1 = res.iloc[(res.index.get_level_values('stats') == '5%') | (res.index.get_level_values('stats') == 'mean') | (res.index.get_level_values('stats') == '50%') | (res.index.get_level_values('stats') == '95%')] res1 = res1.round(2) res1.rename(columns=lambda x: x.replace('_', ' '), inplace=True) #latex template = r'''\documentclass[preview]{{standalone}} \usepackage{{booktabs}} \begin{{document}} {} \end{{document}} ''' with open("outputfile.tex", "wb") as afile: afile.write(template.format(res1.to_latex())) os.system("pdflatex outputfile.tex") 

Sin embargo, no estoy familiarizado con el látex y recibo este error:

      ! LaTeX Error: File `standalone.cls' not found. Type X to quit or  to proceed, or enter a new name. (Default extension: cls) 

    ¿Alguna idea sobre el error o la forma estándar de hacer pandas.df -> pdf?

    La solución que me funciona: con pandas> = 0.17 instalé pdflatex. Copié un paquete de látex como booktabs.sty, geography.sty y pdflscape.sty

     import pandas as pd import os import math def save_summary_table_as_pdf(path_to_csv, path_to_output_folder): pwd = os.getcwd() df = pd.read_csv(path_to_csv, sep =',') #data preparation groupeddf = df.groupby('Cluster') res = groupeddf.describe([0.05, 0.5, 0.95]) res.index.rename(['Cluster', 'Stats'], inplace=True) res['cluster'] = res.index.get_level_values('Cluster') res['stats'] = res.index.get_level_values('Stats') populations = (res.iloc[(res.index.get_level_values('Stats') == 'count'), \ 0].values).tolist() res['population'] = [populations[i] for i in res.index.labels[0].values()] total_pop = sum(populations) res['frequency'] =(res['population']/total_pop).round(3) res.set_index(['cluster', 'population','frequency', 'stats'], inplace=True) res1 = res.iloc[(res.index.get_level_values('stats') == '5%') | (res.index.get_level_values('stats') == 'mean') | (res.index.get_level_values('stats') == '50%') | (res.index.get_level_values('stats') == '95%')] res1 = res1.round(2) res1.rename(columns=lambda x: x.replace('_', ' '), inplace=True) #latex nbpages = int(math.ceil(res1.shape[0]*1.0/40)) templatetop = r'''\documentclass[a3paper, 5pt]{article} \usepackage{booktabs} \usepackage{pdflscape} \usepackage[a4paper,bindingoffset=0.2in,% left=0.25in,right=0.25in,top=1in,bottom=1in,% footskip=.25in]{geometry} \begin{document} \begin{landscape} \pagenumbering{gobble} \oddsidemargin = 0pt \hoffset = -0.25in \topmargin = 1pt \headheight = 0pt \headsep = 0pt ''' templatebottom = ''' \end{landscape} \end{document} ''' output_folder_path_abs = path_to_output_folder output_tex = os.path.join(output_folder_path_abs, "clustering_summary_table.tex") with open(output_tex, "wb") as afile: afile.write(templatetop +'\n') for i in range(0, nbpages): afile.write(res1.iloc[(i*40):((i+1)*40), :].to_latex() +'\n' + """\pagenumbering{gobble}""") afile.write(templatebottom +'\n') os.chdir(output_folder_path_abs) os.system('pdflatex clustering_summary_table.tex') os.chdir(pwd) os.remove(output_tex) os.remove(os.path.join(path_to_output_folder, 'clustering_summary_table.aux')) os.remove(os.path.join(path_to_output_folder, 'clustering_summary_table.log')) if __name__ == "__main__": print 'begin generate pdf table about clustering' import argparse parser = argparse.ArgumentParser() parser.add_argument("path_to_csv") parser.add_argument("outputfolder") args = vars(parser.parse_args()) filedir = os.path.abspath(os.path.dirname(__file__)) output_folder_path_abs = os.path.abspath(args['outputfolder']) input_folder_path_abs = os.path.abspath(args['path_to_csv']) # copy the user package latex to the folder os.system('scp ' +os.path.abspath(os.path.join(filedir, 'userpackagelatex/booktabs.sty'))+ ' ' +output_folder_path_abs) os.system('scp ' +os.path.abspath(os.path.join(filedir, 'userpackagelatex/geography.sty'))+ ' ' +output_folder_path_abs) os.system('scp ' +os.path.abspath(os.path.join(filedir, 'userpackagelatex/pdflscape.sty'))+ ' ' +output_folder_path_abs) save_summary_table_as_pdf(input_folder_path_abs, output_folder_path_abs) os.remove(os.path.join(output_folder_path_abs, 'booktabs.sty')) os.remove(os.path.join(output_folder_path_abs, 'geography.sty')) os.remove(os.path.join(output_folder_path_abs, 'pdflscape.sty')) 

    Bueno, una forma es usar markdown. Puedes usar df.to_html() . Esto convierte el dataframe en una tabla html. Desde allí, puede colocar el html generado en un archivo de reducción (.md) y usar un paquete para convertir el descuento en pdf. https://www.npmjs.com/package/markdown-pdf

    ¿Sería esta una buena alternativa?