Module tablewriter.tablewriter

Created on some day of 2018 or 2019.

@author: cottephi@gmail.com

Expand source code
# -*- coding: utf-8 -*-
"""Created on some day of 2018 or 2019.

@author: cottephi@gmail.com
"""

import os
import time
import tempfile
import dask.dataframe as dd
from transparentpath import TransparentPath as Path
from typing import Any, Dict, TypeVar, Union, Optional

import pandas as pd

T = TypeVar("T")
DONE = "  ...done"
LATEX_TEXT_COLOR = r"\textcolor{"


class TableWriter(object):
    """Class used to produce a ready-to-compile .tex file containing a table from a pandas or dask DataFrame object.
    Can also compile the .tex to produce a .pdf.

    Handles using additional latex packages through the *packages* argument. The given DataFrame is copied so any
    modification of the  said DataFrame after instensiation of the TableWriter object has no effect on the
    TableWriter object, and vice-versa.
    You should not however modify the DataFrame contained in the TableWriter object, you should just create the
    TableWriter once you are sure that your DataFrame is ready.

    TableWriter uses pandas.DataFrame.to_latex and adds some more options to produce the .tex and the .pdf. Any
    option that must be given to the to_latex method can be given to TableWriter through the *to_latex_args*
    argument.

    Note that the content of the DataFrame will be converted to string. If the DataFrame contains one the following
    characters ("$", "_", "^", "%", "&"), a '\' is put before them.
    Mathmode using '$' is handled.

    Examples
    --------

    >>> from tablewriter import TableWriter  # doctest: +SKIP
    >>> import pandas as pd  # doctest: +SKIP
    >>> df = pd.DataFrame(columns=["$x$", "$x^2$"],  # doctest: +SKIP
    >>>                   index=["$A_{00}$", "$A_{01}$"], data=[[2, 4], [3, 9]])  # doctest: +SKIP
    >>> table = TableWriter(df, path="ouput")  # doctest: +SKIP
    >>> table.compile()  # doctest: +SKIP

    TableWriter will use os.system('pdflatex ...') to create the pdf, so you need a working installation of it.
    In order not to flood the stdout with pdflatex ouput, which is quite verbose, it is silenced by default. If the
    compilation fails TableWriter will return 'ValueError: Failed to compile pdf'. In that case, you can try to
    recompile if using

    >>> table.compile(silenced=False)  # doctest: +SKIP

    To have the full output and try to understand what went wrong.

    By default, all files produced by LaTeX are deleted except the .tex and the .pdf. You can change this default
    behavior :

    >>> # To keep all files :  # doctest: +SKIP
    >>> table.compile(clean=False)  # doctest: +SKIP
    >>> # Or on the contrary, to remove also .tex :  # doctest: +SKIP
    >>> table.compile(clean_tex=True)  # doctest: +SKIP

    You can also do a compilation that will reuse the .tex file if it already exists:

    >>> table.compile(recreate=False)

    Here is a more complete example of table generation :

    >>> from tablewriter import TableWriter  # doctest: +SKIP
    >>> import pandas as pd  # doctest: +SKIP
    >>> df = pd.DataFrame(columns=["$x$", "$x^2$"], index=["$A_{00}$", "$A_{01}$"],    # doctest: +SKIP
    >>>                   data=[["2", "$2^2$"], ["3", "$3^2$"]])  # doctest: +SKIP
    >>> table = TableWriter(  # doctest: +SKIP
    >>>     path="path_output",  # doctest: +SKIP
    >>>     data=df,  # doctest: +SKIP
    >>>     to_latex_args={"column_format": "lr"},  # doctest: +SKIP
    >>>     label="tab::example",  # doctest: +SKIP
    >>>     caption="TableWriter example",  # doctest: +SKIP
    >>>     packages={"inputenc": {"T1": ""}},  # doctest: +SKIP
    >>>     hide_numbering=True
    >>> )  # doctest: +SKIP
    >>> table.compile()  # doctest: +SKIP

    """

    # //////////////////
    # // Initialisers //
    # //////////////////

    def __init__(
        self,
        path_output: Optional[Union[str, Path]] = None,
        data: Optional[Union[pd.DataFrame, dd.DataFrame]] = None,
        path_input: Optional[Union[str, Path]] = None,
        to_latex_args: Optional[Dict[str, Any]] = None,
        label: Optional[str] = None,
        caption: Optional[str] = None,
        packages: Dict[str, Union[None, Dict[str, Union[None, str]]]] = None,
        read_from_file_args: Dict = None,
        paperwidth: Union[int, float] = 0,
        paperheight: Union[int, float] = 0,
        number: int = 1,
        hide_numbering: bool = False,
    ):
        """All parameters are optionnal and can be modified by dedicated
        setters.

        Parameters
        ----------
        path_output: Union[str, TransparentPath]
            Path to the .tex file to create. If the path's suffix is not .tex, it will be changed to .tex.
            You can set this path later using mytable.path = ... or mytable.path_output = ...
            (Default value = None)
        data: Union[pd.DataFrame, dd.DataFrame]
            Data to transform to table. Can not be specified alongside path_input. (Default value = None)
        path_input: Union[str, TransparentPath]
            Path to the file to use to read the DataFrame from. Can not be specified alongside data.
            (Default value = None)
        to_latex_args: Dict[str, Any]
            Dict of arguments to give to the DataFrame.to_latex method. See valid arguments at
            https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_latex.html
            (Default value = None)
        label: str
            Label to use for the table (callable by LateX's \\ref)
            (Default value = None)
        caption: str
            Caption to use for the table
            (Default value = None)
        packages: Dict[str, Dict[str, str]]
            Packages to use. Keys of first dict are the package names. values are dict of option: value options to
            use with the package. Can be empty if no options are to be specified.
            (Default value = None)
        read_from_file_args: Dict
            Dict of argument to pass to the read method.
        paperwidth: Union[int, float]
            Width of the output table in the pdf. If 0, TableWriter will try to guess a default value from table
            content, but that is not very accurate. (Default value = 0)
         paperheight: Union[int, float]
            Height of the page of the output pdf. If table is too long to fit on the page, it will be split in
            several pages using longtable package. (Default value = 0)
         number: int
            Number LateX should show after 'Table'.  (Default value = 1)
         hide_numbering: bool
            Do not show 'Table N' in the caption. (Default value = False)
        """

        if data is None and path_input is None:
            raise ValueError("You must give data or path_input argument.")
        if data is not None and path_input is not None:
            raise ValueError("You must give data or path_input argument, but not both.")

        if path_input is not None:
            if read_from_file_args is None:
                read_from_file_args = {}
            if not isinstance(path_input, Path):
                path_input = Path(path_input)
            data = path_input.read(**read_from_file_args)

        if isinstance(data, dd.DataFrame):
            data = data.head(len(data.index))

        if data is not None and not isinstance(data, pd.DataFrame):
            raise ValueError("Data must be a DataFrame")

        if packages is None:
            packages = {}
        if to_latex_args is None:
            to_latex_args = {}

        self.header = ""
        self.body = "\\begin{document}\\end{document}"
        self.footer = ""

        self.data = data
        self.to_latex_args = to_latex_args
        self.__path = path_output
        self.label = label
        self.caption = caption
        self.packages = packages

        self.paperwidth, self.paperheight = None, None
        self._get_dimensions(paperwidth, paperheight)
        self.number = number
        self.hide_numbering = hide_numbering

        self.special_char = ["_", "^", "%", "&"]

        if self.caption is not None:
            self.to_latex_args["caption"] = self.caption
        if self.label is not None:
            self.to_latex_args["label"] = self.label
        if "column_format" not in self.to_latex_args:
            self.to_latex_args["column_format"] = "|l|" + len(self.data.columns) * "c" + "|"
        if "escape" not in self.to_latex_args:
            self.to_latex_args["escape"] = True
        if "longtable" not in self.to_latex_args:
            self.to_latex_args["longtable"] = True

        if "geometry" not in self.packages:
            self.packages["geometry"] = {}
        if "marging" not in self.packages["geometry"]:
            self.packages["geometry"]["margin"] = "0.5cm"
        if "paperwidth" not in self.packages["geometry"]:
            self.packages["geometry"]["paperwidth"] = f"{str(self.paperwidth)}cm"
        if "paperheight" not in self.packages["geometry"]:
            self.packages["geometry"]["paperheight"] = f"{str(self.paperheight)}cm"
        if "caption" not in self.packages:
            self.packages["caption"] = {}
        if "xcolor" not in self.packages:
            self.packages["xcolor"] = {"dvipsnames": None}
        if "booktabs" not in self.packages:
            self.packages["booktabs"] = {}
        if "inputenc" not in self.packages:
            self.packages["inputenc"] = {"utf8": None}
        if "longtable" not in self.packages and self.to_latex_args["longtable"] is True:
            self.packages["longtable"] = {}

        if isinstance(self.number, str):
            self.number = int(self.number)
        if self.number > 0:
            self.number -= 1
        self.number = str(int(self.number))

        if self.__path is not None:
            if not isinstance(self.__path, Path):
                self.__path = Path(self.__path)
            if self.__path.suffix != ".tex":
                self.__path = self.__path.with_suffix(".tex")

    @property
    def path(self) -> Path:
        return self.__path

    @property
    def path_output(self) -> Path:
        return self.__path

    @path.setter
    def path(self, apath: Union[str, Path, None]):
        if apath is not None:
            if not isinstance(apath, Path):
                apath = Path(apath)
            if apath.suffix != ".tex":
                apath = apath.with_suffix(".tex")
        self.__path = apath

    @path_output.setter
    def path_output(self, apath: Union[str, Path, None]):
        self.path = apath

    # ////////////
    # // Makers //
    # ////////////

    def _get_dimensions(self, paperwidth, paperheight):

        self.paperwidth = paperwidth
        self.paperheight = paperheight

        if self.paperwidth != 0 and self.paperheight != 0:
            return

        # Try to guess a kind of optimal width for the table
        if not self.data.empty:
            charswidth = (
                len("".join(list(self.data.columns.dropna().astype(str))))
                + max([len(ind) for ind in self.data.index.dropna().astype(str)])
            ) * 0.178
            self.paperwidth = charswidth + 0.8 * (len(self.data.columns)) + 1
            if self.paperwidth < 9:
                self.paperwidth = 9
        # Same for height
        if not self.data.empty:
            self.paperheight = 3.5 + (len(self.data.index)) * 0.45
            if self.paperheight < 4:
                self.paperheight = 4
            if self.paperheight > 24:
                # Limit page height to A4's 24 cm
                self.paperheight = 24
                self.to_latex_args["longtable"] = True
            else:
                self.to_latex_args["longtable"] = False

    def _make_header(self) -> None:
        """Makes the header of the tex file."""

        self.header = "\\documentclass{article}\n"

        # Add specified packages if any
        for p in self.packages:
            if len(self.packages[p]) == 0:
                self.header += p.join(["\\usepackage{", "}\n"])
            else:
                self.header += "\\usepackage["
                for o in self.packages[p]:
                    if self.packages[p][o] is None:
                        self.header += o + ","

                    else:
                        self.header += o + "=" + self.packages[p][o] + ","
                self.header = self.header[:-1] + "]{" + p + "}\n"
        self.header += "\\begin{document}\n\\nonstopmode\n\\setcounter{table}{" + self.number + "}\n"

    def _make_body(self) -> None:
        """Makes the main body of tex file."""

        # Needed if you do not want long names to be truncated with "..."
        # by pandas, giving bullshit results in the .tex file
        def_max_col = pd.get_option("display.max_colwidth")
        if pd.__version__.split(".")[0] == "0":
            # pandas is older than 1.0.0
            pd.set_option("display.max_colwidth", -1)
        else:
            # pandas is 1.0.0 or newer
            pd.set_option("display.max_colwidth", None)

        if self.data.empty:
            self.body = self.caption + ": Empty Dataframe\n"
            return
        else:
            self.body = self.data.to_latex(**self.to_latex_args)
        pd.set_option("display.max_colwidth", def_max_col)

        if self.caption is not None and self.hide_numbering:
            self.body = self.body.replace("\\caption{", "\\caption*{")

        if self.caption is not None or self.label is not None:
            self.body = self.body.replace("\n\\toprule", "\\\\\n\\toprule")
        self.body = self.body.replace("\\\\\\\\", "\\\\")

    def _make_footer(self) -> None:
        """Makes the footer of tex file."""

        self.footer = "\\end{document}\n"

    def _escape_special_chars(self, s: T) -> T:
        """Will add '\\' before special characters outside of mathmode to given
        string.

        Parameters
        ----------
        s: T
            If s is not a string, will return it without changing anything

        Returns
        -------
        T
            String with special char escaped, or unmodified non-string object
        """

        if not isinstance(s, str):
            return s
        in_math = False
        previous_c = ""
        s2 = ""
        for c in s:
            if c == "$":
                in_math = not in_math
            if in_math:
                s2 += c
                previous_c = c
                continue
            if c in self.special_char and not previous_c == "\\":
                c = "\\" + c
            previous_c = c
            s2 += c
        return s2

    # //////////////////
    # // Output files //
    # //////////////////

    def build(self):
        """build header body and footer."""
        if "escape" in self.to_latex_args and self.to_latex_args["escape"]:
            self.data.index = [self._escape_special_chars(s) for s in self.data.index]
            self.data.columns = [self._escape_special_chars(s) for s in self.data.columns]
            self.data = self.data.applymap(self._escape_special_chars)
        self.to_latex_args["escape"] = False
        self._make_header()
        self._make_body()
        self._make_footer()

    def create_tex_file(self) -> None:
        """Creates the tex file."""

        with open(self.__path, "w") as outfile:
            # escape argument only works on column names. We need to apply
            # it on entier DataFrame, so do that then set it to False
            self.build()
            outfile.write(self.header)
            outfile.write(self.body)
            outfile.write(self.footer)

    # noinspection StandardShellInjection
    def compile(
        self, silenced: bool = True, recreate: bool = True, clean: bool = True, clean_tex: bool = False,
    ) -> None:
        """Compile the pdf.

        Parameters
        ----------
        silenced: bool
            Will or will not print on terminal the pdflatex output. (Default value = True)
        recreate: bool
            If False and .tex file exists, compile from it. If True, recreate the .tex file first.
        clean: bool
            Removes all files created by the compilation which are not the .tex or the .pdf file.
        clean_tex: bool
            Also removes the .tex file, leaving only the .pdf.

        Returns
        -------
        None
        """

        if self.__path is None:
            raise ValueError("Must specify a file path.")
        if recreate or not self.__path.is_file():
            self.create_tex_file()

        if not self.__path.is_file():
            raise ValueError(f"Tex file {self.__path} not found.")

        path_to_compile = self.__path
        if self.__path.fs_kind == "gcs":
            path_to_compile = tempfile.NamedTemporaryFile(delete=False, suffix=".tex")
            path_to_compile.close()
            self.__path.get(path_to_compile.name)
            path_to_compile = Path(path_to_compile.name, fs="local")

        command = "pdflatex -synctex=1 -interaction=nonstopmode "
        parent = path_to_compile.parent
        if parent != ".":
            command = f"{command} -output-directory=\"{parent}\""

        command = f"{command} \"{path_to_compile}\""
        if silenced:  # unix
            if os.name == "posix":
                command = f"{command} > /dev/null"
            else:  # windows
                command = f"{command} > NUL"
        x1 = os.system(command)
        time.sleep(0.5)
        x2 = os.system(command)
        time.sleep(0.5)
        x3 = os.system(command)

        if self.__path.fs_kind == "gcs":
            for path in path_to_compile.with_suffix("").glob("*"):
                path_gcs = self.__path.with_suffix(path.suffix)
                path.put(path_gcs)
                path.rm()

        if x1 != 0 or x2 != 0 or x3 != 0:
            raise ValueError("Failed to compile pdf")

        if clean:
            self.clean(clean_tex)

    def clean(self, clean_tex: bool = False) -> None:
        """Clean files produced by latex. Also remove .tex if clean_tex is
        True.

        Parameters
        ---------
        clean_tex: bool
            To also remove the .tex file

        Returns
        -------
        None
        """
        to_keep = [".pdf", ".csv", ".excel"]
        if not clean_tex:
            to_keep.append(".tex")
        files = self.__path.with_suffix("").glob("*")
        for f in files:
            if f.suffix not in to_keep:
                f.rm()


def remove_color(obj: str) -> str:
    """Remove coloration of given object.

    Parameters
    ----------
    obj: str
        The object from which to remove the color

    Return
    ------
    str
        Object without color
    """

    if LATEX_TEXT_COLOR not in obj:
        return obj
    to_find = LATEX_TEXT_COLOR
    before_color = obj[: obj.find(to_find)]
    after_color = obj[obj.find("textcolor") + 10:]
    no_color = after_color[after_color.find("{") + 1:].replace("}", "", 1)
    return before_color + no_color


def set_color(obj: Any, color: str) -> str:
    """Add color to a given object.

    Parameters
    ----------
    obj : Any
        The object to which color must be added.
    color: str
        Must be a valid LateX color string

    Return
    ------
    str
        "\\textcolor{color}{str(obj)}"
    """
    if pd.isna(obj):
        return obj
    return LATEX_TEXT_COLOR + color + "}{" + str(obj) + "}"


# noinspection PyTypeChecker
def set_color_dataframe(
    df: Union[pd.DataFrame, pd.Series], color: str, color_index: bool = False, color_columns: bool = False,
) -> Union[pd.DataFrame, pd.Series]:
    r"""Sets color for the entier DataFrame's or Series's entries.

    To change the color of some elements in the dataframe under some condition

    Parameters
    ----------
    df: Union[pd.DataFrame, pd.Series]
        The DataFrame or Series to change the colors of
    color: str
        LateX-recognized color string
        Default ''
    color_index: bool
        To color the index too
        Default False.
    color_columns: str
        To color the columns (or Series name if df is a Series) too
        Default False.
    color_index: bool
        whether to color index or not
    color_columns: bool
        whether to color columns or not

    Returns
    -------
    Union[pd.DataFrame, pd.Series]
        Colored DataFrame or Series (dtype will be str)

    Examples
    --------

    dff = dff.mask(dff < 0, TableWriter.set_color_dataframe(dff, "red"))
    dff = pd.DataFrame(columns=dff.columns, index=dff.index, data=dff.values.astype(str))
    dff = dff.mask(dff == "nan", "")
    writer = TableWriter(data=dff)

    """
    if isinstance(df, pd.DataFrame):
        df_c = df.applymap(lambda x: set_color(x, color))
    else:
        df_c = df.apply(lambda x: set_color(x, color))
    if color_index:
        df_c.index = [set_color(x, color) for x in df_c.index]
    if color_columns:
        if isinstance(df, pd.DataFrame):
            df_c.columns = [set_color(x, color) for x in df_c.columns]
        else:
            df_c.name = set_color(df_c.name, color)
    return df_c

Functions

def remove_color(obj: str) ‑> str

Remove coloration of given object.

Parameters

obj : str
The object from which to remove the color

Return

str Object without color

Expand source code
def remove_color(obj: str) -> str:
    """Remove coloration of given object.

    Parameters
    ----------
    obj: str
        The object from which to remove the color

    Return
    ------
    str
        Object without color
    """

    if LATEX_TEXT_COLOR not in obj:
        return obj
    to_find = LATEX_TEXT_COLOR
    before_color = obj[: obj.find(to_find)]
    after_color = obj[obj.find("textcolor") + 10:]
    no_color = after_color[after_color.find("{") + 1:].replace("}", "", 1)
    return before_color + no_color
def set_color(obj: Any, color: str) ‑> str

Add color to a given object.

Parameters

obj : Any
The object to which color must be added.
color : str
Must be a valid LateX color string

Return

str "\textcolor{color}{str(obj)}"

Expand source code
def set_color(obj: Any, color: str) -> str:
    """Add color to a given object.

    Parameters
    ----------
    obj : Any
        The object to which color must be added.
    color: str
        Must be a valid LateX color string

    Return
    ------
    str
        "\\textcolor{color}{str(obj)}"
    """
    if pd.isna(obj):
        return obj
    return LATEX_TEXT_COLOR + color + "}{" + str(obj) + "}"
def set_color_dataframe(df: Union[pandas.core.frame.DataFrame, pandas.core.series.Series], color: str, color_index: bool = False, color_columns: bool = False) ‑> Union[pandas.core.frame.DataFrame, pandas.core.series.Series]

Sets color for the entier DataFrame's or Series's entries.

To change the color of some elements in the dataframe under some condition

Parameters

df : Union[pd.DataFrame, pd.Series]
The DataFrame or Series to change the colors of
color : str
LateX-recognized color string Default ''
color_index : bool
To color the index too Default False.
color_columns : str
To color the columns (or Series name if df is a Series) too Default False.
color_index : bool
whether to color index or not
color_columns : bool
whether to color columns or not

Returns

Union[pd.DataFrame, pd.Series]
Colored DataFrame or Series (dtype will be str)

Examples

dff = dff.mask(dff < 0, TableWriter.set_color_dataframe(dff, "red")) dff = pd.DataFrame(columns=dff.columns, index=dff.index, data=dff.values.astype(str)) dff = dff.mask(dff == "nan", "") writer = TableWriter(data=dff)

Expand source code
def set_color_dataframe(
    df: Union[pd.DataFrame, pd.Series], color: str, color_index: bool = False, color_columns: bool = False,
) -> Union[pd.DataFrame, pd.Series]:
    r"""Sets color for the entier DataFrame's or Series's entries.

    To change the color of some elements in the dataframe under some condition

    Parameters
    ----------
    df: Union[pd.DataFrame, pd.Series]
        The DataFrame or Series to change the colors of
    color: str
        LateX-recognized color string
        Default ''
    color_index: bool
        To color the index too
        Default False.
    color_columns: str
        To color the columns (or Series name if df is a Series) too
        Default False.
    color_index: bool
        whether to color index or not
    color_columns: bool
        whether to color columns or not

    Returns
    -------
    Union[pd.DataFrame, pd.Series]
        Colored DataFrame or Series (dtype will be str)

    Examples
    --------

    dff = dff.mask(dff < 0, TableWriter.set_color_dataframe(dff, "red"))
    dff = pd.DataFrame(columns=dff.columns, index=dff.index, data=dff.values.astype(str))
    dff = dff.mask(dff == "nan", "")
    writer = TableWriter(data=dff)

    """
    if isinstance(df, pd.DataFrame):
        df_c = df.applymap(lambda x: set_color(x, color))
    else:
        df_c = df.apply(lambda x: set_color(x, color))
    if color_index:
        df_c.index = [set_color(x, color) for x in df_c.index]
    if color_columns:
        if isinstance(df, pd.DataFrame):
            df_c.columns = [set_color(x, color) for x in df_c.columns]
        else:
            df_c.name = set_color(df_c.name, color)
    return df_c

Classes

class TableWriter (path_output: Union[str, transparentpath.gcsutils.transparentpath.TransparentPath, NoneType] = None, data: Union[pandas.core.frame.DataFrame, dask.dataframe.core.DataFrame, NoneType] = None, path_input: Union[str, transparentpath.gcsutils.transparentpath.TransparentPath, NoneType] = None, to_latex_args: Union[Dict[str, Any], NoneType] = None, label: Union[str, NoneType] = None, caption: Union[str, NoneType] = None, packages: Dict[str, Union[NoneType, Dict[str, Union[str, NoneType]]]] = None, read_from_file_args: Dict = None, paperwidth: Union[int, float] = 0, paperheight: Union[int, float] = 0, number: int = 1, hide_numbering: bool = False)

Class used to produce a ready-to-compile .tex file containing a table from a pandas or dask DataFrame object. Can also compile the .tex to produce a .pdf.

Handles using additional latex packages through the packages argument. The given DataFrame is copied so any modification of the said DataFrame after instensiation of the TableWriter object has no effect on the TableWriter object, and vice-versa. You should not however modify the DataFrame contained in the TableWriter object, you should just create the TableWriter once you are sure that your DataFrame is ready.

TableWriter uses pandas.DataFrame.to_latex and adds some more options to produce the .tex and the .pdf. Any option that must be given to the to_latex method can be given to TableWriter through the to_latex_args argument.

Note that the content of the DataFrame will be converted to string. If the DataFrame contains one the following characters ("$", "_", "^", "%", "&"), a '' is put before them. Mathmode using '$' is handled.

Examples

>>> from tablewriter import TableWriter  # doctest: +SKIP
>>> import pandas as pd  # doctest: +SKIP
>>> df = pd.DataFrame(columns=["$x$", "$x^2$"],  # doctest: +SKIP
>>>                   index=["$A_{00}$", "$A_{01}$"], data=[[2, 4], [3, 9]])  # doctest: +SKIP
>>> table = TableWriter(df, path="ouput")  # doctest: +SKIP
>>> table.compile()  # doctest: +SKIP

TableWriter will use os.system('pdflatex …') to create the pdf, so you need a working installation of it. In order not to flood the stdout with pdflatex ouput, which is quite verbose, it is silenced by default. If the compilation fails TableWriter will return 'ValueError: Failed to compile pdf'. In that case, you can try to recompile if using

>>> table.compile(silenced=False)  # doctest: +SKIP

To have the full output and try to understand what went wrong.

By default, all files produced by LaTeX are deleted except the .tex and the .pdf. You can change this default behavior :

>>> # To keep all files :  # doctest: +SKIP
>>> table.compile(clean=False)  # doctest: +SKIP
>>> # Or on the contrary, to remove also .tex :  # doctest: +SKIP
>>> table.compile(clean_tex=True)  # doctest: +SKIP

You can also do a compilation that will reuse the .tex file if it already exists:

>>> table.compile(recreate=False)

Here is a more complete example of table generation :

>>> from tablewriter import TableWriter  # doctest: +SKIP
>>> import pandas as pd  # doctest: +SKIP
>>> df = pd.DataFrame(columns=["$x$", "$x^2$"], index=["$A_{00}$", "$A_{01}$"],    # doctest: +SKIP
>>>                   data=[["2", "$2^2$"], ["3", "$3^2$"]])  # doctest: +SKIP
>>> table = TableWriter(  # doctest: +SKIP
>>>     path="path_output",  # doctest: +SKIP
>>>     data=df,  # doctest: +SKIP
>>>     to_latex_args={"column_format": "lr"},  # doctest: +SKIP
>>>     label="tab::example",  # doctest: +SKIP
>>>     caption="TableWriter example",  # doctest: +SKIP
>>>     packages={"inputenc": {"T1": ""}},  # doctest: +SKIP
>>>     hide_numbering=True
>>> )  # doctest: +SKIP
>>> table.compile()  # doctest: +SKIP

All parameters are optionnal and can be modified by dedicated setters.

Parameters

path_output : Union[str, TransparentPath]
Path to the .tex file to create. If the path's suffix is not .tex, it will be changed to .tex. You can set this path later using mytable.path = … or mytable.path_output = … (Default value = None)
data : Union[pd.DataFrame, dd.DataFrame]
Data to transform to table. Can not be specified alongside path_input. (Default value = None)
path_input : Union[str, TransparentPath]
Path to the file to use to read the DataFrame from. Can not be specified alongside data. (Default value = None)
to_latex_args : Dict[str, Any]
Dict of arguments to give to the DataFrame.to_latex method. See valid arguments at https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_latex.html (Default value = None)
label : str
Label to use for the table (callable by LateX's \ref) (Default value = None)
caption : str
Caption to use for the table (Default value = None)
packages : Dict[str, Dict[str, str]]
Packages to use. Keys of first dict are the package names. values are dict of option: value options to use with the package. Can be empty if no options are to be specified. (Default value = None)
read_from_file_args : Dict
Dict of argument to pass to the read method.
paperwidth : Union[int, float]
Width of the output table in the pdf. If 0, TableWriter will try to guess a default value from table content, but that is not very accurate. (Default value = 0)

paperheight: Union[int, float] Height of the page of the output pdf. If table is too long to fit on the page, it will be split in several pages using longtable package. (Default value = 0) number: int Number LateX should show after 'Table'. (Default value = 1) hide_numbering: bool Do not show 'Table N' in the caption. (Default value = False)

Expand source code
class TableWriter(object):
    """Class used to produce a ready-to-compile .tex file containing a table from a pandas or dask DataFrame object.
    Can also compile the .tex to produce a .pdf.

    Handles using additional latex packages through the *packages* argument. The given DataFrame is copied so any
    modification of the  said DataFrame after instensiation of the TableWriter object has no effect on the
    TableWriter object, and vice-versa.
    You should not however modify the DataFrame contained in the TableWriter object, you should just create the
    TableWriter once you are sure that your DataFrame is ready.

    TableWriter uses pandas.DataFrame.to_latex and adds some more options to produce the .tex and the .pdf. Any
    option that must be given to the to_latex method can be given to TableWriter through the *to_latex_args*
    argument.

    Note that the content of the DataFrame will be converted to string. If the DataFrame contains one the following
    characters ("$", "_", "^", "%", "&"), a '\' is put before them.
    Mathmode using '$' is handled.

    Examples
    --------

    >>> from tablewriter import TableWriter  # doctest: +SKIP
    >>> import pandas as pd  # doctest: +SKIP
    >>> df = pd.DataFrame(columns=["$x$", "$x^2$"],  # doctest: +SKIP
    >>>                   index=["$A_{00}$", "$A_{01}$"], data=[[2, 4], [3, 9]])  # doctest: +SKIP
    >>> table = TableWriter(df, path="ouput")  # doctest: +SKIP
    >>> table.compile()  # doctest: +SKIP

    TableWriter will use os.system('pdflatex ...') to create the pdf, so you need a working installation of it.
    In order not to flood the stdout with pdflatex ouput, which is quite verbose, it is silenced by default. If the
    compilation fails TableWriter will return 'ValueError: Failed to compile pdf'. In that case, you can try to
    recompile if using

    >>> table.compile(silenced=False)  # doctest: +SKIP

    To have the full output and try to understand what went wrong.

    By default, all files produced by LaTeX are deleted except the .tex and the .pdf. You can change this default
    behavior :

    >>> # To keep all files :  # doctest: +SKIP
    >>> table.compile(clean=False)  # doctest: +SKIP
    >>> # Or on the contrary, to remove also .tex :  # doctest: +SKIP
    >>> table.compile(clean_tex=True)  # doctest: +SKIP

    You can also do a compilation that will reuse the .tex file if it already exists:

    >>> table.compile(recreate=False)

    Here is a more complete example of table generation :

    >>> from tablewriter import TableWriter  # doctest: +SKIP
    >>> import pandas as pd  # doctest: +SKIP
    >>> df = pd.DataFrame(columns=["$x$", "$x^2$"], index=["$A_{00}$", "$A_{01}$"],    # doctest: +SKIP
    >>>                   data=[["2", "$2^2$"], ["3", "$3^2$"]])  # doctest: +SKIP
    >>> table = TableWriter(  # doctest: +SKIP
    >>>     path="path_output",  # doctest: +SKIP
    >>>     data=df,  # doctest: +SKIP
    >>>     to_latex_args={"column_format": "lr"},  # doctest: +SKIP
    >>>     label="tab::example",  # doctest: +SKIP
    >>>     caption="TableWriter example",  # doctest: +SKIP
    >>>     packages={"inputenc": {"T1": ""}},  # doctest: +SKIP
    >>>     hide_numbering=True
    >>> )  # doctest: +SKIP
    >>> table.compile()  # doctest: +SKIP

    """

    # //////////////////
    # // Initialisers //
    # //////////////////

    def __init__(
        self,
        path_output: Optional[Union[str, Path]] = None,
        data: Optional[Union[pd.DataFrame, dd.DataFrame]] = None,
        path_input: Optional[Union[str, Path]] = None,
        to_latex_args: Optional[Dict[str, Any]] = None,
        label: Optional[str] = None,
        caption: Optional[str] = None,
        packages: Dict[str, Union[None, Dict[str, Union[None, str]]]] = None,
        read_from_file_args: Dict = None,
        paperwidth: Union[int, float] = 0,
        paperheight: Union[int, float] = 0,
        number: int = 1,
        hide_numbering: bool = False,
    ):
        """All parameters are optionnal and can be modified by dedicated
        setters.

        Parameters
        ----------
        path_output: Union[str, TransparentPath]
            Path to the .tex file to create. If the path's suffix is not .tex, it will be changed to .tex.
            You can set this path later using mytable.path = ... or mytable.path_output = ...
            (Default value = None)
        data: Union[pd.DataFrame, dd.DataFrame]
            Data to transform to table. Can not be specified alongside path_input. (Default value = None)
        path_input: Union[str, TransparentPath]
            Path to the file to use to read the DataFrame from. Can not be specified alongside data.
            (Default value = None)
        to_latex_args: Dict[str, Any]
            Dict of arguments to give to the DataFrame.to_latex method. See valid arguments at
            https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_latex.html
            (Default value = None)
        label: str
            Label to use for the table (callable by LateX's \\ref)
            (Default value = None)
        caption: str
            Caption to use for the table
            (Default value = None)
        packages: Dict[str, Dict[str, str]]
            Packages to use. Keys of first dict are the package names. values are dict of option: value options to
            use with the package. Can be empty if no options are to be specified.
            (Default value = None)
        read_from_file_args: Dict
            Dict of argument to pass to the read method.
        paperwidth: Union[int, float]
            Width of the output table in the pdf. If 0, TableWriter will try to guess a default value from table
            content, but that is not very accurate. (Default value = 0)
         paperheight: Union[int, float]
            Height of the page of the output pdf. If table is too long to fit on the page, it will be split in
            several pages using longtable package. (Default value = 0)
         number: int
            Number LateX should show after 'Table'.  (Default value = 1)
         hide_numbering: bool
            Do not show 'Table N' in the caption. (Default value = False)
        """

        if data is None and path_input is None:
            raise ValueError("You must give data or path_input argument.")
        if data is not None and path_input is not None:
            raise ValueError("You must give data or path_input argument, but not both.")

        if path_input is not None:
            if read_from_file_args is None:
                read_from_file_args = {}
            if not isinstance(path_input, Path):
                path_input = Path(path_input)
            data = path_input.read(**read_from_file_args)

        if isinstance(data, dd.DataFrame):
            data = data.head(len(data.index))

        if data is not None and not isinstance(data, pd.DataFrame):
            raise ValueError("Data must be a DataFrame")

        if packages is None:
            packages = {}
        if to_latex_args is None:
            to_latex_args = {}

        self.header = ""
        self.body = "\\begin{document}\\end{document}"
        self.footer = ""

        self.data = data
        self.to_latex_args = to_latex_args
        self.__path = path_output
        self.label = label
        self.caption = caption
        self.packages = packages

        self.paperwidth, self.paperheight = None, None
        self._get_dimensions(paperwidth, paperheight)
        self.number = number
        self.hide_numbering = hide_numbering

        self.special_char = ["_", "^", "%", "&"]

        if self.caption is not None:
            self.to_latex_args["caption"] = self.caption
        if self.label is not None:
            self.to_latex_args["label"] = self.label
        if "column_format" not in self.to_latex_args:
            self.to_latex_args["column_format"] = "|l|" + len(self.data.columns) * "c" + "|"
        if "escape" not in self.to_latex_args:
            self.to_latex_args["escape"] = True
        if "longtable" not in self.to_latex_args:
            self.to_latex_args["longtable"] = True

        if "geometry" not in self.packages:
            self.packages["geometry"] = {}
        if "marging" not in self.packages["geometry"]:
            self.packages["geometry"]["margin"] = "0.5cm"
        if "paperwidth" not in self.packages["geometry"]:
            self.packages["geometry"]["paperwidth"] = f"{str(self.paperwidth)}cm"
        if "paperheight" not in self.packages["geometry"]:
            self.packages["geometry"]["paperheight"] = f"{str(self.paperheight)}cm"
        if "caption" not in self.packages:
            self.packages["caption"] = {}
        if "xcolor" not in self.packages:
            self.packages["xcolor"] = {"dvipsnames": None}
        if "booktabs" not in self.packages:
            self.packages["booktabs"] = {}
        if "inputenc" not in self.packages:
            self.packages["inputenc"] = {"utf8": None}
        if "longtable" not in self.packages and self.to_latex_args["longtable"] is True:
            self.packages["longtable"] = {}

        if isinstance(self.number, str):
            self.number = int(self.number)
        if self.number > 0:
            self.number -= 1
        self.number = str(int(self.number))

        if self.__path is not None:
            if not isinstance(self.__path, Path):
                self.__path = Path(self.__path)
            if self.__path.suffix != ".tex":
                self.__path = self.__path.with_suffix(".tex")

    @property
    def path(self) -> Path:
        return self.__path

    @property
    def path_output(self) -> Path:
        return self.__path

    @path.setter
    def path(self, apath: Union[str, Path, None]):
        if apath is not None:
            if not isinstance(apath, Path):
                apath = Path(apath)
            if apath.suffix != ".tex":
                apath = apath.with_suffix(".tex")
        self.__path = apath

    @path_output.setter
    def path_output(self, apath: Union[str, Path, None]):
        self.path = apath

    # ////////////
    # // Makers //
    # ////////////

    def _get_dimensions(self, paperwidth, paperheight):

        self.paperwidth = paperwidth
        self.paperheight = paperheight

        if self.paperwidth != 0 and self.paperheight != 0:
            return

        # Try to guess a kind of optimal width for the table
        if not self.data.empty:
            charswidth = (
                len("".join(list(self.data.columns.dropna().astype(str))))
                + max([len(ind) for ind in self.data.index.dropna().astype(str)])
            ) * 0.178
            self.paperwidth = charswidth + 0.8 * (len(self.data.columns)) + 1
            if self.paperwidth < 9:
                self.paperwidth = 9
        # Same for height
        if not self.data.empty:
            self.paperheight = 3.5 + (len(self.data.index)) * 0.45
            if self.paperheight < 4:
                self.paperheight = 4
            if self.paperheight > 24:
                # Limit page height to A4's 24 cm
                self.paperheight = 24
                self.to_latex_args["longtable"] = True
            else:
                self.to_latex_args["longtable"] = False

    def _make_header(self) -> None:
        """Makes the header of the tex file."""

        self.header = "\\documentclass{article}\n"

        # Add specified packages if any
        for p in self.packages:
            if len(self.packages[p]) == 0:
                self.header += p.join(["\\usepackage{", "}\n"])
            else:
                self.header += "\\usepackage["
                for o in self.packages[p]:
                    if self.packages[p][o] is None:
                        self.header += o + ","

                    else:
                        self.header += o + "=" + self.packages[p][o] + ","
                self.header = self.header[:-1] + "]{" + p + "}\n"
        self.header += "\\begin{document}\n\\nonstopmode\n\\setcounter{table}{" + self.number + "}\n"

    def _make_body(self) -> None:
        """Makes the main body of tex file."""

        # Needed if you do not want long names to be truncated with "..."
        # by pandas, giving bullshit results in the .tex file
        def_max_col = pd.get_option("display.max_colwidth")
        if pd.__version__.split(".")[0] == "0":
            # pandas is older than 1.0.0
            pd.set_option("display.max_colwidth", -1)
        else:
            # pandas is 1.0.0 or newer
            pd.set_option("display.max_colwidth", None)

        if self.data.empty:
            self.body = self.caption + ": Empty Dataframe\n"
            return
        else:
            self.body = self.data.to_latex(**self.to_latex_args)
        pd.set_option("display.max_colwidth", def_max_col)

        if self.caption is not None and self.hide_numbering:
            self.body = self.body.replace("\\caption{", "\\caption*{")

        if self.caption is not None or self.label is not None:
            self.body = self.body.replace("\n\\toprule", "\\\\\n\\toprule")
        self.body = self.body.replace("\\\\\\\\", "\\\\")

    def _make_footer(self) -> None:
        """Makes the footer of tex file."""

        self.footer = "\\end{document}\n"

    def _escape_special_chars(self, s: T) -> T:
        """Will add '\\' before special characters outside of mathmode to given
        string.

        Parameters
        ----------
        s: T
            If s is not a string, will return it without changing anything

        Returns
        -------
        T
            String with special char escaped, or unmodified non-string object
        """

        if not isinstance(s, str):
            return s
        in_math = False
        previous_c = ""
        s2 = ""
        for c in s:
            if c == "$":
                in_math = not in_math
            if in_math:
                s2 += c
                previous_c = c
                continue
            if c in self.special_char and not previous_c == "\\":
                c = "\\" + c
            previous_c = c
            s2 += c
        return s2

    # //////////////////
    # // Output files //
    # //////////////////

    def build(self):
        """build header body and footer."""
        if "escape" in self.to_latex_args and self.to_latex_args["escape"]:
            self.data.index = [self._escape_special_chars(s) for s in self.data.index]
            self.data.columns = [self._escape_special_chars(s) for s in self.data.columns]
            self.data = self.data.applymap(self._escape_special_chars)
        self.to_latex_args["escape"] = False
        self._make_header()
        self._make_body()
        self._make_footer()

    def create_tex_file(self) -> None:
        """Creates the tex file."""

        with open(self.__path, "w") as outfile:
            # escape argument only works on column names. We need to apply
            # it on entier DataFrame, so do that then set it to False
            self.build()
            outfile.write(self.header)
            outfile.write(self.body)
            outfile.write(self.footer)

    # noinspection StandardShellInjection
    def compile(
        self, silenced: bool = True, recreate: bool = True, clean: bool = True, clean_tex: bool = False,
    ) -> None:
        """Compile the pdf.

        Parameters
        ----------
        silenced: bool
            Will or will not print on terminal the pdflatex output. (Default value = True)
        recreate: bool
            If False and .tex file exists, compile from it. If True, recreate the .tex file first.
        clean: bool
            Removes all files created by the compilation which are not the .tex or the .pdf file.
        clean_tex: bool
            Also removes the .tex file, leaving only the .pdf.

        Returns
        -------
        None
        """

        if self.__path is None:
            raise ValueError("Must specify a file path.")
        if recreate or not self.__path.is_file():
            self.create_tex_file()

        if not self.__path.is_file():
            raise ValueError(f"Tex file {self.__path} not found.")

        path_to_compile = self.__path
        if self.__path.fs_kind == "gcs":
            path_to_compile = tempfile.NamedTemporaryFile(delete=False, suffix=".tex")
            path_to_compile.close()
            self.__path.get(path_to_compile.name)
            path_to_compile = Path(path_to_compile.name, fs="local")

        command = "pdflatex -synctex=1 -interaction=nonstopmode "
        parent = path_to_compile.parent
        if parent != ".":
            command = f"{command} -output-directory=\"{parent}\""

        command = f"{command} \"{path_to_compile}\""
        if silenced:  # unix
            if os.name == "posix":
                command = f"{command} > /dev/null"
            else:  # windows
                command = f"{command} > NUL"
        x1 = os.system(command)
        time.sleep(0.5)
        x2 = os.system(command)
        time.sleep(0.5)
        x3 = os.system(command)

        if self.__path.fs_kind == "gcs":
            for path in path_to_compile.with_suffix("").glob("*"):
                path_gcs = self.__path.with_suffix(path.suffix)
                path.put(path_gcs)
                path.rm()

        if x1 != 0 or x2 != 0 or x3 != 0:
            raise ValueError("Failed to compile pdf")

        if clean:
            self.clean(clean_tex)

    def clean(self, clean_tex: bool = False) -> None:
        """Clean files produced by latex. Also remove .tex if clean_tex is
        True.

        Parameters
        ---------
        clean_tex: bool
            To also remove the .tex file

        Returns
        -------
        None
        """
        to_keep = [".pdf", ".csv", ".excel"]
        if not clean_tex:
            to_keep.append(".tex")
        files = self.__path.with_suffix("").glob("*")
        for f in files:
            if f.suffix not in to_keep:
                f.rm()

Instance variables

var path : transparentpath.gcsutils.transparentpath.TransparentPath
Expand source code
@property
def path(self) -> Path:
    return self.__path
var path_output : transparentpath.gcsutils.transparentpath.TransparentPath
Expand source code
@property
def path_output(self) -> Path:
    return self.__path

Methods

def build(self)

build header body and footer.

Expand source code
def build(self):
    """build header body and footer."""
    if "escape" in self.to_latex_args and self.to_latex_args["escape"]:
        self.data.index = [self._escape_special_chars(s) for s in self.data.index]
        self.data.columns = [self._escape_special_chars(s) for s in self.data.columns]
        self.data = self.data.applymap(self._escape_special_chars)
    self.to_latex_args["escape"] = False
    self._make_header()
    self._make_body()
    self._make_footer()
def clean(self, clean_tex: bool = False) ‑> NoneType

Clean files produced by latex. Also remove .tex if clean_tex is True.

Parameters

clean_tex : bool
To also remove the .tex file

Returns

None
 
Expand source code
def clean(self, clean_tex: bool = False) -> None:
    """Clean files produced by latex. Also remove .tex if clean_tex is
    True.

    Parameters
    ---------
    clean_tex: bool
        To also remove the .tex file

    Returns
    -------
    None
    """
    to_keep = [".pdf", ".csv", ".excel"]
    if not clean_tex:
        to_keep.append(".tex")
    files = self.__path.with_suffix("").glob("*")
    for f in files:
        if f.suffix not in to_keep:
            f.rm()
def compile(self, silenced: bool = True, recreate: bool = True, clean: bool = True, clean_tex: bool = False) ‑> NoneType

Compile the pdf.

Parameters

silenced : bool
Will or will not print on terminal the pdflatex output. (Default value = True)
recreate : bool
If False and .tex file exists, compile from it. If True, recreate the .tex file first.
clean : bool
Removes all files created by the compilation which are not the .tex or the .pdf file.
clean_tex : bool
Also removes the .tex file, leaving only the .pdf.

Returns

None
 
Expand source code
def compile(
    self, silenced: bool = True, recreate: bool = True, clean: bool = True, clean_tex: bool = False,
) -> None:
    """Compile the pdf.

    Parameters
    ----------
    silenced: bool
        Will or will not print on terminal the pdflatex output. (Default value = True)
    recreate: bool
        If False and .tex file exists, compile from it. If True, recreate the .tex file first.
    clean: bool
        Removes all files created by the compilation which are not the .tex or the .pdf file.
    clean_tex: bool
        Also removes the .tex file, leaving only the .pdf.

    Returns
    -------
    None
    """

    if self.__path is None:
        raise ValueError("Must specify a file path.")
    if recreate or not self.__path.is_file():
        self.create_tex_file()

    if not self.__path.is_file():
        raise ValueError(f"Tex file {self.__path} not found.")

    path_to_compile = self.__path
    if self.__path.fs_kind == "gcs":
        path_to_compile = tempfile.NamedTemporaryFile(delete=False, suffix=".tex")
        path_to_compile.close()
        self.__path.get(path_to_compile.name)
        path_to_compile = Path(path_to_compile.name, fs="local")

    command = "pdflatex -synctex=1 -interaction=nonstopmode "
    parent = path_to_compile.parent
    if parent != ".":
        command = f"{command} -output-directory=\"{parent}\""

    command = f"{command} \"{path_to_compile}\""
    if silenced:  # unix
        if os.name == "posix":
            command = f"{command} > /dev/null"
        else:  # windows
            command = f"{command} > NUL"
    x1 = os.system(command)
    time.sleep(0.5)
    x2 = os.system(command)
    time.sleep(0.5)
    x3 = os.system(command)

    if self.__path.fs_kind == "gcs":
        for path in path_to_compile.with_suffix("").glob("*"):
            path_gcs = self.__path.with_suffix(path.suffix)
            path.put(path_gcs)
            path.rm()

    if x1 != 0 or x2 != 0 or x3 != 0:
        raise ValueError("Failed to compile pdf")

    if clean:
        self.clean(clean_tex)
def create_tex_file(self) ‑> NoneType

Creates the tex file.

Expand source code
def create_tex_file(self) -> None:
    """Creates the tex file."""

    with open(self.__path, "w") as outfile:
        # escape argument only works on column names. We need to apply
        # it on entier DataFrame, so do that then set it to False
        self.build()
        outfile.write(self.header)
        outfile.write(self.body)
        outfile.write(self.footer)