Skip to content

Recipes

Recipes source code is in the /recipes directory of odfdo sources. Most recipes are autonomous scripts doing actual modifications of ODF sample files, you can check the results in the recipes/recipes_output directory.

How to write hello world in a text document

Create a basic spreadsheet with “Hello World” in the first cell.

recipes/how_to_write_hello_world_in_a_text_document.py
#!/usr/bin/env python
"""Create a basic spreadsheet with "Hello World" in the first cell.
"""
import os
from pathlib import Path

from odfdo import Document, Paragraph

_DOC_SEQUENCE = 3
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_hello"
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document("text")
    body = document.body
    body.clear()
    paragraph = Paragraph("Hello World")
    body.append(paragraph)

    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    text = str(document.body)
    print(text)
    assert text == "Hello World\n"


if __name__ == "__main__":
    main()

How to write hello world in a spreadsheet document

Create a basic spreadsheet with “Hello World” in the first cell.

recipes/how_to_write_hello_world_in_a_spreadsheet_document.py
#!/usr/bin/env python
"""Create a basic spreadsheet with "Hello World" in the first cell.
"""
import os
from pathlib import Path

from odfdo import Document, Table

_DOC_SEQUENCE = 5
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_ods"
TARGET = "document.ods"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document("spreadsheet")
    body = document.body
    body.clear()

    table = Table("Empty Table")
    table.set_value("A1", "Hello World")
    body.append(table)
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    text = document.body.get_table(0).get_cell((0, 0)).value.strip()
    print(text)
    assert text == "Hello World"


if __name__ == "__main__":
    main()

Basic presentation hello world

Write a basic “Hello World” in the middle of the first page of a presentaion.

recipes/basic_presentation_hello_world.py
#!/usr/bin/env python
"""Write a basic "Hello World" in the middle of the first page of a presentaion.
"""
from pathlib import Path

from odfdo import Document, DrawPage, Frame

_DOC_SEQUENCE = 7
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_odp"
TARGET = "hello.odp"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document("presentation")
    body = document.body
    body.clear()

    page = DrawPage("page1", name="Page 1")
    text_frame = Frame.text_frame(
        "Hello World",
        size=("7cm", "5cm"),
        position=("11cm", "8cm"),
        style="Standard",
        text_style="Standard",
    )
    page.append(text_frame)
    body.append(page)

    save_new(document, TARGET)


if __name__ == "__main__":
    main()

Create a basic text document

Create a basic text document with headers and praragraphs.

recipes/create_a_basic_text_document.py
#!/usr/bin/env python
"""Create a basic text document with headers and praragraphs.
"""
import os
from pathlib import Path

from odfdo import Document, Header, Paragraph

_DOC_SEQUENCE = 10
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_text"
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def create_basic_document():
    document = Document("text")
    body = document.body
    body.clear()
    body.append(Header(1, "De la Guerre des Gaules - Livre V"))
    body.append(Header(2, "Préparatifs d'expédition en Bretagne"))
    body.append(
        Paragraph(
            "Sous le consulat de Lucius Domitius et d'Appius Claudius, "
            "César, quittant les quartiers d'hiver pour aller en Italie, "
            "comme il avait coutume de le faire chaque année, ordonne aux "
            "lieutenants qu'il laissait à la tête des légions de construire, "
            "pendant l'hiver, le plus de vaisseaux qu'il serait possible, "
            "et de réparer les anciens."
        )
    )
    body.append(Header(2, "La Bretagne"))
    body.append(
        Paragraph(
            "Cette île est de forme triangulaire ; l'un des côtés regarde "
            "la Gaule. Des deux angles de ce côté, l'un est au levant, "
            "vers le pays de Cantium, où abordent presque tous les vaisseaux "
            "gaulois ; l'autre, plus bas, est au midi. La longueur de ce côté "
            "est d'environ cinq cent mille pas. "
        )
    )
    return document


def main():
    document = create_basic_document()
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    text = str(document.body.get_paragraph(position=1))
    print(text)
    assert text.startswith("Cette île est de forme triangulaire")


if __name__ == "__main__":
    main()

How to add a paragraph to a text document

Minimal example of how to add a paragraph.

recipes/how_to_add_a_paragraph_to_a_text_document.py
"""Minimal example of how to add a paragraph.
"""

from odfdo import Document, Paragraph

_DOC_SEQUENCE = 12


def main():
    document = Document("text")
    body = document.body

    # create a new paragraph with some content :
    paragraph = Paragraph("Hello World")
    body.append(paragraph)


if __name__ == "__main__":
    main()

Create a basic text document with a list

Create a basic text document with a list.

recipes/create_a_basic_text_document_with_a_list.py
#!/usr/bin/env python
"""Create a basic text document with a list.
"""
import os
from pathlib import Path

from odfdo import Document, List, ListItem

_DOC_SEQUENCE = 20
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_list"
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = generate_document()
    test_unit(document)
    save_new(document, TARGET)


def generate_document():
    # Create the document
    document = Document("text")
    body = document.body

    # Adding List
    my_list = List(["Arthur", "Ford", "Trillian"])
    # The list accepts a Python list of strings and list items.

    # The list can be written even though we will modify it afterwards:
    body.append(my_list)

    # Adding more List Item to the list
    item = ListItem("Marvin")
    my_list.append_item(item)

    # it should contain:
    print(document.get_formatted_text())
    # - Arthur
    # - Ford
    # - Trillian
    # - Marvin

    return document


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    assert str(document).strip() == "- Arthur\n- Ford\n- Trillian\n- Marvin"


if __name__ == "__main__":
    main()

Create a basic text document with list and sublists

Create a basic text document with list and sublists.

recipes/create_a_basic_text_document_with_list_and_sublists.py
#!/usr/bin/env python
"""Create a basic text document with list and sublists.
"""
import os
from pathlib import Path

from odfdo import Document, List, ListItem

_DOC_SEQUENCE = 25
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_sublist"
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = generate_document()
    test_unit(document)
    save_new(document, TARGET)


def generate_document():
    document = Document("text")
    body = document.body

    # Adding List
    name_list = List(["Arthur", "Ford", "Trillian"])
    item = ListItem("Marvin")
    name_list.append_item(item)
    body.append(name_list)

    # Adding Sublist¶
    # A sublist is simply a list as an item of another list:
    item.append(List(["Paranoid Android", "older than the universe"]))

    # See the result:
    print(document.get_formatted_text())
    # - Arthur
    # - Ford
    # - Trillian
    # - Marvin
    #   - Paranoid Android
    #   - older than the universe

    # Inserting List Item
    # In case your forgot to insert an item:
    name_list.insert_item("some dolphins", position=1)

    # Or you can insert it before another item:
    marvin = name_list.get_item(content="Marvin")
    name_list.insert_item("Zaphod", before=marvin)
    # Or after:
    name_list.insert_item("and many others", after=marvin)

    # See the result:
    print(document.get_formatted_text())
    # - Arthur
    # - some dolphins
    # - Ford
    # - Trillian
    # - Zaphod
    # - Marvin
    #   - Paranoid Android
    #   - older than the universe
    # - and many others
    #

    return document


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    assert document.get_formatted_text().strip() == (
        "- Arthur\n"
        "- some dolphins\n"
        "- Ford\n"
        "- Trillian\n"
        "- Zaphod\n"
        "- Marvin\n"
        "  \n"
        "  - Paranoid Android\n"
        "  - older than the universe\n"
        "- and many others"
    )


if __name__ == "__main__":
    main()

How to add a sublist to a list

Minimal example of how to add a paragraph.

recipes/how_to_add_a_sublist_to_a_list.py
"""Minimal example of how to add a paragraph.
"""

from odfdo import Document, List, ListItem

_DOC_SEQUENCE = 27


def main():
    document = Document("text")
    body = document.body

    my_list = List(["chocolat", "café"])
    body.append(my_list)

    item = ListItem("thé")
    my_list.append(item)

    # A sublist is simply a list as an item of another list
    item.append(List(["thé vert", "thé rouge"]))

    print(body.serialize(True))


if __name__ == "__main__":
    main()

How to insert a new item within a list

Minimal example of how to insert a new item within a list.

recipes/how_to_insert_a_new_item_within_a_list.py
"""Minimal example of how to insert a new item within a list.
"""

from odfdo import List

_DOC_SEQUENCE = 28


def main():

    a_list = List(["chocolat", "café"])

    # In case your forgot to insert an important item:
    a_list.insert_item("Chicorée", position=1)

    # Or you can insert it before another item:
    cafe = a_list.get_item(content="café")
    a_list.insert_item("Chicorée", before=cafe)

    # Or after:
    a_list.insert_item("Chicorée", after=cafe)


if __name__ == "__main__":
    main()

How to add an item to a list

Minimal example of how to add an item to a list.

recipes/how_to_add_an_item_to_a_list.py
"""Minimal example of how to add an item to a list.
"""

from odfdo import List, ListItem

_DOC_SEQUENCE = 28


def main():
    a_list = List(["chocolat", "café"])
    item = ListItem("thé")
    a_list.append(item)


if __name__ == "__main__":
    main()

Get text content from odt file

Read the text content from an .odt file.

recipes/get_text_content_from_odt_file.py
#!/usr/bin/env python
"""Read the text content from an .odt file.
"""
import os
import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 30
DATA = Path(__file__).parent / "data"
# ODF export of Wikipedia article Hitchhiker's Guide to the Galaxy (CC-By-SA) :
SOURCE = "collection2.odt"


def read_source_document():
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def main():
    doc = read_source_document()

    # just verify what type of document it is:
    print("Type of document:", doc.get_type())

    # A quick way to get the text content:
    text = doc.get_formatted_text()

    print("Size :", len(text))

    # Let's show the beginning :
    print(text[:320])

    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    assert len(text) == 56828


if __name__ == "__main__":
    main()

Create a basic text document with a table of content

Create a basic text document with a table of content.

recipes/create_a_basic_text_document_with_a_table_of_content.py
#!/usr/bin/env python
"""Create a basic text document with a table of content.
"""
import os
from pathlib import Path

from odfdo import TOC, Document, Header, Paragraph

_DOC_SEQUENCE = 35
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_toc"
DATA = Path(__file__).parent / "data"
LOREM = (DATA / "lorem.txt").read_text(encoding="utf8")
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document("text")
    make_toc(document)
    save_new(document, TARGET)


def make_toc(document):
    # Create the document
    body = document.body

    # Create the Table Of Content
    toc = TOC()
    # Changing the default "Table Of Content" Title :
    toc.title = "My Table of Content"

    # Do not forget to add the component to the document:
    body.append(toc)

    # Add some content with headers
    title1 = Header(1, LOREM[:70])
    body.append(title1)
    for idx in range(3):
        title = Header(2, LOREM[idx * 5 : 70 + idx * 5])
        body.append(title)
        paragraph = Paragraph(LOREM)
        body.append(paragraph)

    # Beware, update the TOC with the actual content. If not done there,
    # the reader will need to "update the table of content" later.
    toc.fill()

    # only for test suite:
    if "ODFDO_TESTING" in os.environ:
        assert str(toc).split("\n")[2] == (
            "1.1. Lorem ipsum dolor sit amet, consectetuer "
            "adipiscing elit. Sed non risu"
        )


if __name__ == "__main__":
    main()

How to add a table of content to a document

Adding a table of content to an existing text document.

recipes/how_to_add_a_table_of_content_to_a_document.py
#!/usr/bin/env python
"""Adding a table of content to an existing text document.
"""
from pathlib import Path

from odfdo import TOC, Document, Paragraph, Style

_DOC_SEQUENCE = 37
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "add_toc"
TARGET = "document.odt"
DATA = Path(__file__).parent / "data"
SOURCE = DATA / "collection.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document(SOURCE)
    body = document.body

    # here is a way to insert a page break:
    page_break_style = Style("paragraph", name="page_break")
    page_break_style.set_properties({"fo:break-before": "page"})
    document.insert_style(page_break_style)
    empty_paragraph = Paragraph("", style="page_break")
    body.insert(empty_paragraph, 0)

    # The TOC element comes from the toc module
    toc = TOC()
    # to put the TOC at the end, just do:
    # body.append(toc)
    body.insert(toc, 0)
    # fill the toc with current content of document:
    toc.fill()

    save_new(document, TARGET)


if __name__ == "__main__":
    main()

Update a text document with a table of content

Update the table of contents of a document.

recipes/update_a_text_document_with_a_table_of_content.py
#!/usr/bin/env python
"""Update the table of contents of a document.
"""

from pathlib import Path

from odfdo import Document, Header, Paragraph

_DOC_SEQUENCE = 38
DATA = Path(__file__).parent / "data"
SOURCE = "doc_with_toc.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "modified_toc"
TARGET = "document.odt"


def save_new(document: Document, name: str) -> None:
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main() -> None:
    document = Document(DATA / SOURCE)
    update_toc(document)
    save_new(document, TARGET)


def update_toc(document: Document) -> None:
    check_toc_v1(document)
    add_some_header(document)
    check_toc_v2(document)
    change_toc_title(document)
    check_toc_v3(document)
    change_toc_title_to_empty(document)
    check_toc_v4(document)
    remove_second_header_1b(document)
    check_toc_v5(document)
    add_toc_title(document)
    check_toc_v6(document)


def check_toc_v1(document: Document) -> None:
    toc = document.body.toc
    content = str(toc).split("\n")
    assert len(content) == 5
    assert content[0].startswith("Table of Contents")
    assert content[1].startswith("1. Lorem 1")
    assert content[2].startswith("1.1. Lorem 1A")
    assert content[3].startswith("1.2. Lorem 1B")
    assert content[4].startswith("1.3. Lorem 1C")


def add_some_header(document: Document) -> None:
    header = Header(1, "New header")
    document.body.append(header)
    document.body.append(Paragraph("Some text after the new header."))
    # update the table of contents
    toc = document.body.toc
    toc.fill(document)


def check_toc_v2(document: Document) -> None:
    toc = document.body.toc
    content = str(toc).split("\n")
    assert len(content) == 6
    assert content[0].startswith("Table of Contents")
    assert content[1].startswith("1. Lorem 1")
    assert content[2].startswith("1.1. Lorem 1A")
    assert content[3].startswith("1.2. Lorem 1B")
    assert content[4].startswith("1.3. Lorem 1C")
    assert content[5].startswith("2. New header")


def change_toc_title(document: Document) -> None:
    toc = document.body.toc
    toc.set_toc_title("Another title")
    toc.fill(document)


def check_toc_v3(document: Document) -> None:
    toc = document.body.toc
    content = str(toc).split("\n")
    assert len(content) == 6
    assert content[0].startswith("Another title")


def change_toc_title_to_empty(document: Document) -> None:
    toc = document.body.toc
    toc.set_toc_title("")  # that will remove the title
    toc.fill(document)


def check_toc_v4(document: Document) -> None:
    toc = document.body.toc
    content = str(toc).split("\n")
    assert len(content) == 5
    assert content[0].startswith("1. Lorem 1")
    assert content[1].startswith("1.1. Lorem 1A")
    assert content[2].startswith("1.2. Lorem 1B")
    assert content[3].startswith("1.3. Lorem 1C")
    assert content[4].startswith("2. New header")


def remove_second_header_1b(document: Document) -> None:
    # find second header:
    header = document.body.get_header(position=2)
    # this 'header' variable is attached to the document, so
    # deleting will remove the element from the document
    header.delete()

    toc = document.body.toc
    toc.fill(document)


def check_toc_v5(document: Document) -> None:
    toc = document.body.toc
    content = str(toc).split("\n")
    assert len(content) == 4
    assert content[0].startswith("1. Lorem 1")
    assert content[1].startswith("1.1. Lorem 1A")
    assert content[2].startswith("1.2. Lorem 1C")
    assert content[3].startswith("2. New header")


def add_toc_title(document: Document) -> None:
    toc = document.body.toc
    toc.set_toc_title("A new title")
    toc.fill(document)


def check_toc_v6(document: Document) -> None:
    toc = document.body.toc
    content = str(toc).split("\n")
    assert len(content) == 5
    assert content[0].startswith("A new title")
    assert content[1].startswith("1. Lorem 1")
    assert content[2].startswith("1.1. Lorem 1A")
    assert content[3].startswith("1.2. Lorem 1C")
    assert content[4].startswith("2. New header")


if __name__ == "__main__":
    main()

Create a basic text document with annotations

Create a basic text document with annotations.

recipes/create_a_basic_text_document_with_annotations.py
#!/usr/bin/env python
"""Create a basic text document with annotations.
"""
import os
from pathlib import Path

from odfdo import Document, Header, Paragraph

_DOC_SEQUENCE = 40
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_annotations"
DATA = Path(__file__).parent / "data"
LOREM = (DATA / "lorem.txt").read_text(encoding="utf8")
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document("text")
    make_annotations(document)
    test_unit(document)
    save_new(document, TARGET)


def make_annotations(document):
    body = document.body
    title1 = Header(1, "Main title")
    body.append(title1)
    for index in range(3):
        title = Header(2, f"title {index}")
        body.append(title)
        paragraph = Paragraph(LOREM[:240])

        # Adding Annotation
        # Annotations are notes that don't appear in the document but
        # typically on a side bar in a desktop application. So they are not printed.

        # Now we add some annotation on each paragraph
        some_word = str(paragraph).split()[3]
        # choosing the 4th word of the paragraph to insert the note

        paragraph.insert_annotation(
            after=some_word,  # The word after what the annotation is inserted.
            body="It's so easy!",  # The annotation itself, at the end of the page.
            creator="Bob",  # The author of the annotation.
            # date= xxx              A datetime value, by default datetime.now().
        )

        body.append(paragraph)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    assert len(document.body.get_annotations(creator="Bob")) == 3


if __name__ == "__main__":
    main()

Create a basic text document with footnotes

Create a basic text document with footnotes.

recipes/create_a_basic_text_document_with_footnotes.py
#!/usr/bin/env python
"""Create a basic text document with footnotes.
"""
import os
from pathlib import Path

from odfdo import Document, Header, Paragraph

_DOC_SEQUENCE = 45
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_footnotes"
DATA = Path(__file__).parent / "data"
LOREM = (DATA / "lorem.txt").read_text(encoding="utf8")
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document("text")
    make_footnotes(document)
    test_unit(document)
    save_new(document, TARGET)


def make_footnotes(document):
    body = document.body

    # Add content (See Create_a_basic_document.py)
    title1 = Header(1, "Main title")
    body.append(title1)
    for index in range(3):
        title = Header(2, f"title {index}")
        body.append(title)
        paragraph = Paragraph(LOREM[:240])

        # Adding Footnote
        # Now we add a footnote on each paragraph
        # Notes are quite complex so they deserve a dedicated API on paragraphs:
        some_word = str(paragraph).split()[3]
        # choosing the 4th word of the paragraph to insert the note
        paragraph.insert_note(
            after=some_word,  # The word after what the “¹” citation is inserted.
            note_id=f"note{index}",  # The unique identifier of the note in the document.
            citation="1",  # The symbol the user sees to follow the footnote.
            body=(
                f'Author{index}, A. (2007). "How to cite references", Sample Editions.'
                # The footnote itself, at the end of the page.
            ),
        )

        body.append(paragraph)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    assert len(document.body.get_notes()) == 3


if __name__ == "__main__":
    main()

How to add footnote to a text document

Minimal example of how to add an footnote to a text document.

recipes/how_to_add_footnote_to_a_text_document.py
"""Minimal example of how to add an footnote to a text document.
"""

from odfdo import Document, Paragraph

_DOC_SEQUENCE = 47


def main():
    document = Document("text")
    body = document.body
    body.clear()

    paragraph = Paragraph("A paragraph with a footnote about some references.")
    body.append(paragraph)

    # Notes are quite complex so they deserve a dedicated API on paragraphs:
    paragraph.insert_note(
        after="graph",
        note_id="note1",
        citation="1",
        body='Author, A. (2007). "How to cite references" New York: McGraw-Hill.',
    )

    # That looks complex so we detail the arguments:
    #
    # after    =>   The word after what the “¹” citation is inserted.
    # note_id  =>   The unique identifier of the note in the document.
    # citation =>   The symbol the user sees to follow the footnote.
    # body     =>   The footnote itself, at the end of the page.
    #
    # odfdo creates footnotes by default. To create endnotes (notes
    # that appear at the end of the document), give the
    # note_class='endnote' parameter.


if __name__ == "__main__":
    main()

Create a text document with tables in it

Build a basic commercial document, with numerical values displayed in both the text and in a table.

recipes/create_a_text_document_with_tables_in_it.py
#!/usr/bin/env python
"""Build a basic commercial document, with numerical values displayed in
both the text and in a table.
"""

from pathlib import Path

# for cell style
from odfdo import (
    Cell,
    Document,
    Header,
    List,
    ListItem,
    Paragraph,
    Row,
    Table,
    create_table_cell_style,
    make_table_cell_border_string,
)

_DOC_SEQUENCE = 50
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "commercial"
TARGET = "commercial.odt"
TAX_RATE = 0.20


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


class Product:
    def __init__(self, name, price):
        self.name = f"Product {name}"
        self.price = price


def make_product_catalog():
    catalog = []
    price = 10.0
    for prod in range(5):
        catalog.append(Product(chr(65 + prod), price))
        price += 10.5
    return catalog


def main():
    document = generate_commercial()
    save_new(document, TARGET)


def generate_commercial():
    commercial = Document("text")
    body = commercial.body
    catalog = make_product_catalog()

    title1 = Header(1, "Basic commercial document")
    body.append(title1)
    title11 = Header(2, "Available products")
    body.append(title11)
    paragraph = Paragraph("Here the list:")
    body.append(paragraph)

    # List of products in a list :
    product_list = List()  # odfdo.List
    body.append(product_list)
    for product in catalog:
        item = ListItem(f"{product.name:<10}, price: {product.price:.2f} €")
        product_list.append(item)

    title12 = Header(2, "Your command")
    body.append(title12)

    command = {0: 1, 1: 12, 2: 3, 4: 5}

    # A table in the text document :
    table = Table("Table")
    body.append(table)
    row = Row()
    row.set_values(["Product", "Price", "Quantity", "Amount"])
    table.set_row("A1", row)
    # or: table.set_row(0, row)
    row_number = 0
    for item, quantity in command.items():
        prod = catalog[item]
        row = Row()
        row.set_value("A", prod.name)
        # or : row.set_value(0, prod.name)
        cell = Cell()
        cell.set_value(
            prod.price,
            text=f"{prod.price:.2f} €",
            currency="EUR",
            cell_type="float",
        )
        row.set_cell("B", cell)
        # or : row.set_cell(1, cell)
        row.set_value("C", quantity)
        # row.set_value(2, quantity)
        price = prod.price * quantity
        cell = Cell()
        cell.set_value(
            price,
            text=f"{price:.2f} €",
            currency="EUR",
            cell_type="float",
        )
        row.set_cell(3, cell)
        row_number += 1
        table.set_row(row_number, row)

    cols = table.width
    column = cols - 1

    # add merged empty row
    row = Row()
    row_number += 1
    table.set_row(row_number, row)
    table.set_span((0, row_number, 3, row_number))

    # make total
    row = Row()
    row.set_value(column - 1, "Total:")
    total = sum(table.get_column_values(column)[1:-1])
    cell = Cell()
    cell.set_value(
        total,
        text=f"{total:.2f} €",
        currency="EUR",
        cell_type="float",
    )
    row.set_cell(column, cell)
    row_number += 1
    table.set_row(row_number, row)

    # let merge some cells
    table.set_span((column - 3, row_number, column - 1, row_number), merge=True)

    row = Row()
    row.set_value(column - 1, "Total with tax:")
    total *= 1 + TAX_RATE
    cell = Cell()
    cell.set_value(
        total,
        text=f"{total:.2f} €",
        currency="EUR",
        cell_type="float",
    )
    row.set_cell(column, cell)
    row_number += 1
    table.set_row(row_number, row)
    # let merge some cells
    table.set_span((column - 3, row_number, column - 1, row_number), merge=True)

    # Let's add some style on first row
    border = make_table_cell_border_string(thick="0.03cm", color="black")
    cell_style = create_table_cell_style(
        color="black",
        background_color=(210, 210, 210),
        border_right=border,
        border_left=border,
        border_bottom=border,
        border_top=border,
    )
    style_name = commercial.insert_style(style=cell_style, automatic=True)

    row = table.get_row(0)
    # for cell in row.get_cells(): #possible, but .traverse() is better
    for cell in row.traverse():
        cell.style = style_name
        row.set_cell(x=cell.x, cell=cell)
    table.set_row(row.y, row)

    return commercial


if __name__ == "__main__":
    main()

How to add a table to a document

Minimal example of how to add a table to a text document.

recipes/how_to_add_a_table_to_a_document.py
"""Minimal example of how to add a table to a text document.
"""
from odfdo import Document, Header, Paragraph, Table

_DOC_SEQUENCE = 55


def main():
    document = Document("text")
    body = document.body

    # Let's add another section to make our document clear:
    body.append(Header(1, "Tables"))
    body.append(Paragraph("A 3x3 table:"))

    # Creating a table :
    table = Table("Table 1", width=3, height=3)
    body.append(table)


if __name__ == "__main__":
    main()

Create a text document from plain text with layout

Create a document with styles.

We want to:

  • remove standard styles from the document

  • set some styles grabed from a styles.xml ODF file (or generated)

  • insert plain “python” text, containing some , , and spaces

recipes/create_a_text_document_from_plain_text_with_layout.py
#!/usr/bin/env python
"""Create a document with styles.

 We want to:

  - remove standard styles from the document

  - set some styles grabed from a styles.xml ODF file (or generated)

  - insert plain "python" text, containing some \t , \n, and spaces
"""
from pathlib import Path

from odfdo import Document, Element, Paragraph, Style

_DOC_SEQUENCE = 60
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled2"
TARGET = "document.odt"


# Element is the base class of all odfdo classes.
# Element.from_tag permits the creation of any ODF XML tag

# some font styles :
_style_font_1 = Element.from_tag(
    '<style:font-face style:name="OpenSymbol" svg:font-family="OpenSymbol"/>'
)

_style_font_2 = Element.from_tag(
    '<style:font-face style:name="Liberation Serif" '
    'svg:font-family="Liberation Serif" '
    'style:font-family-generic="roman" '
    'style:font-pitch="variable"/>'
)

_style_font_3 = Element.from_tag(
    '<style:font-face style:name="Liberation Sans" '
    'svg:font-family="Liberation Sans" '
    'style:font-family-generic="swiss" '
    'style:font-pitch="variable"/>'
)

# page layout style (changing margin)
_style_page = Element.from_tag(
    '<style:page-layout style:name="MyLayout">'
    '<style:page-layout-properties fo:page-width="21.00cm" '
    'fo:page-height="29.70cm" style:num-format="1" '
    'style:print-orientation="portrait" fo:margin-top="1.7cm" '
    'fo:margin-bottom="1.5cm" fo:margin-left="1.6cm" '
    'fo:margin-right="1.6cm" style:writing-mode="lr-tb" '
    'style:footnote-max-height="0cm"><style:footnote-sep '
    'style:width="0.018cm" style:distance-before-sep="0.10cm" '
    'style:distance-after-sep="0.10cm" style:line-style="solid" '
    'style:adjustment="left" style:rel-width="25%" '
    'style:color="#000000"/> </style:page-layout-properties>'
    "<style:footer-style> "
    '<style:header-footer-properties fo:min-height="0.6cm" '
    'fo:margin-left="0cm" fo:margin-right="0cm" '
    'fo:margin-top="0.3cm" style:dynamic-spacing="false"/> '
    "</style:footer-style></style:page-layout>"
)

# master style, using the precedent layout for the actual document
_style_master = Element.from_tag(
    '<style:master-page style:name="Standard" '
    'style:page-layout-name="MyLayout"><style:footer>'
    '<text:p text:style-name="Footer"> '
    "<text:tab/><text:tab/><text:page-number "
    'text:select-page="current"/> / <text:page-count '
    'style:num-format="1">15</text:page-count>'
    "</text:p></style:footer> "
    "</style:master-page>"
)

# some footer
_style_footer = Element.from_tag(
    '<style:style style:name="Footer" '
    'style:family="paragraph" style:class="extra" '
    'style:master-page-name="">'
    '<style:paragraph-properties style:page-number="auto" '
    'text:number-lines="false" text:line-number="0">'
    "<style:tab-stops>"
    '<style:tab-stop style:position="8.90cm" '
    'style:type="center"/>'
    '<style:tab-stop style:position="17.80cm" style:type="right"/>'
    "</style:tab-stops>"
    "</style:paragraph-properties>"
    "<style:text-properties "
    'style:font-name="Liberation Sans" '
    'fo:font-size="7pt"/></style:style>'
)

# some text style using Liberation Sans font
_style_description = Element.from_tag(
    '<style:style style:name="description" '
    'style:family="paragraph" '
    'style:class="text" style:master-page-name="">'
    "<style:paragraph-properties "
    'fo:margin="100%" fo:margin-left="0cm" fo:margin-right="0cm" '
    'fo:margin-top="0.35cm" fo:margin-bottom="0.10cm" '
    'style:contextual-spacing="false" '
    'fo:text-indent="0cm" '
    'style:auto-text-indent="false" '
    'style:page-number="auto"/>'
    "<style:text-properties "
    'style:font-name="Liberation Sans" '
    'fo:font-size="11pt"/>'
    "</style:style>"
)

# some text style using Liberation Serif font
_style_small_serif = Element.from_tag(
    '<style:style style:name="smallserif" '
    'style:family="paragraph" style:class="text">'
    '<style:paragraph-properties fo:margin="100%" '
    'fo:margin-left="1.20cm" '
    'fo:margin-right="0cm" fo:margin-top="0cm" '
    'fo:margin-bottom="0.10cm" '
    'style:contextual-spacing="false" '
    'fo:text-indent="0cm" '
    'style:auto-text-indent="false"/>'
    '<style:text-properties style:font-name="Liberation Serif" '
    'fo:font-size="9pt" '
    'fo:font-weight="normal"/>'
    "</style:style>"
)

# some style to have stylish line in text
_style_line = Element.from_tag(
    '<style:style style:name="line" '
    'style:family="paragraph" style:class="text">'
    '<style:paragraph-properties fo:margin="100%" '
    'fo:margin-left="0cm" '
    'fo:margin-right="0cm" fo:margin-top="0cm" '
    'fo:margin-bottom="0.15cm" '
    'style:contextual-spacing="false" fo:text-indent="0cm" '
    'style:auto-text-indent="false" fo:padding="0cm" '
    'fo:border-left="none" '
    'fo:border-right="none" fo:border-top="none" '
    'fo:border-bottom="0.06pt solid #000000"/>'
    '<style:text-properties style:font-name="Liberation Sans" '
    'fo:font-size="9pt"/>'
    "</style:style>"
)

# some odfdo generated style (for bold Span)
_style_bold = Style("text", name="bolder", bold=True)


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    # Some plain text :
    text_1 = (
        "Lorem ipsum dolor sit amet,\n\t"
        "consectetuer adipiscing elit.\n\tSed"
        "non risus.\n\tSuspendisse lectus tortor,\n"
        "ndignissim sit amet, \nadipiscing nec,"
        "\nultricies sed, dolor.\n\n"
        " Cras elementum ultrices diam. Maecenas ligula massa,"
        "varius a,semper congue, euismod non,"
        " mi. Proin porttitor, orci nec nonummy"
        "molestie, enim est eleifend mi,"
        " non fermentum diam nisl sit amet erat."
    )

    text_2 = (
        "Vestibulum                 "
        "ante               "
        "ipsum             primis\n"
        "in faucibus orci luctus et ultrices "
        "posuere cubilia Curae; Aliquam nibh."
    )

    text_3 = (
        "Duis semper. \n\tDuis arcu massa,"
        " \n\t\tscelerisque vitae, \n"
        "\t\t\tconsequat in, \n"
        "\t\t\t\tpretium a, enim. \n"
        "\t\t\t\t\tPellentesque congue. \n"
        "Ut in risus volutpat libero pharetra "
        "tempor. Cras vestibulum bibendum augue."
        "Praesent egestas leo in pede. Praesent "
        "blandit odio eu enim. Pellentesque sed"
    )

    document = Document("text")
    # remove default styles
    document.delete_styles()
    # add our styles
    document.insert_style(_style_font_1, default=True)
    document.insert_style(_style_font_2, default=True)
    document.insert_style(_style_font_3, default=True)
    document.insert_style(_style_page, automatic=True)
    document.insert_style(_style_master)
    document.insert_style(_style_footer)
    document.insert_style(_style_description)
    document.insert_style(_style_small_serif)
    document.insert_style(_style_bold)

    body = document.body

    # since version 3.8.14, the append_plain_text() mode is the
    # default for paragraph creation, so the code is more simple:

    # paragraph = Paragraph("", style="description")
    # paragraph.append_plain_text(text_1)
    # body.append(paragraph)

    # paragraph = Paragraph(style="line")
    # body.append(paragraph)

    # paragraph = Paragraph(style="smallserif")
    # paragraph.append_plain_text(text_2)
    # body.append(paragraph)

    # paragraph = Paragraph(style="line")
    # body.append(paragraph)

    # paragraph = Paragraph(style="description")
    # paragraph.append_plain_text(text_3)

    paragraph = Paragraph(text_1, style="description")
    body.append(paragraph)

    paragraph = Paragraph(style="line")
    body.append(paragraph)

    paragraph = Paragraph(text_2, style="smallserif")
    body.append(paragraph)

    paragraph = Paragraph(style="line")
    body.append(paragraph)

    paragraph = Paragraph("A: " + text_3, style="description")
    # span offset become complex after inserting <CR> and <TAB> in a text
    paragraph.set_span("bolder", offset=5, length=6)  # find TEXT position 5 : 6
    paragraph.set_span("bolder", offset=18, length=4)  # find TEXT position 18 : 4
    paragraph.set_span("bolder", offset=49)  # find TEXT position 18 to the end
    # of the text bloc
    paragraph.set_span("bolder", regex=r"Praes\w+\s\w+")  # regex: Praes. + next word

    body.append(paragraph)

    paragraph = Paragraph(style="line")
    body.append(paragraph)

    # it is possible to add the content without the original layout (\n, tab, spaces)
    paragraph = Paragraph("B: " + text_3, style="description", formatted=False)
    body.append(paragraph)

    paragraph = Paragraph(style="line")
    body.append(paragraph)

    # text can also be append after paragraph creation
    paragraph = Paragraph(style="description")
    paragraph.append("C: " + text_3)
    body.append(paragraph)

    save_new(document, TARGET)


if __name__ == "__main__":
    main()

How to add a picture to a text document

Create an empty text document and add a picture in a frame.

recipes/how_to_add_a_picture_to_a_text_document.py
#!/usr/bin/env python
"""Create an empty text document and add a picture in a frame.
"""
from pathlib import Path

from odfdo import Document, Frame, Paragraph

_DOC_SEQUENCE = 65
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_picture"
TARGET = "document.odt"
DATA = Path(__file__).parent / "data"
IMAGE = DATA / "newlogo.png"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document("text")
    body = document.body
    image_path = str(DATA / IMAGE)
    uri = document.add_file(image_path)
    image_frame = Frame.image_frame(
        uri,
        size=("6cm", "4cm"),
        position=("5cm", "10cm"),
    )

    # put image frame in a paragraph:
    paragraph = Paragraph("")
    paragraph.append(image_frame)
    body.append(paragraph)

    save_new(document, TARGET)


if __name__ == "__main__":
    main()

How to add a title to a text document

Minimal example of how to add a Header of first level to a text document.

recipes/how_to_add_a_title_to_a_text_document.py
"""Minimal example of how to add a Header of first level to a text document.
"""

from odfdo import Document, Header

_DOC_SEQUENCE = 67


def main():
    document = Document("text")
    body = document.body

    title1 = Header(1, "The Title")
    body.append(title1)


if __name__ == "__main__":
    main()

Accessing a single element

Example of methods and properties to analyse a document.

These methods return a single element (or None):

- `body.get_note(position)`
- `body.get_paragraph(position)`
- `body.get_header(position)`
recipes/accessing_a_single_element.py
#!/usr/bin/env python
"""Example of methods and properties to analyse a document.

These methods return a single element (or None):

    - `body.get_note(position)`
    - `body.get_paragraph(position)`
    - `body.get_header(position)`
"""
import os
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 70
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"


def main():
    document = Document(DATA / SOURCE)

    # The body object is an XML element from which we can access one or several
    # other elements we are looking for.
    body = document.body

    # Accessing a single element
    # To access a single element by name, position or a regular expression on
    # the content, use get_xxx_by_<criteria>, where criteria can be position,
    # content, or for some of them name, id title, description.
    print("- Content of the first footnote:")
    print(str(body.get_note(position=0)))
    print()
    print("- Content of the paragraph with the word 'Fish'")
    print(str(body.get_paragraph(content="Fish")))
    print("- Content of the first Title:")
    print(str(body.get_header(position=0)))
    print("- Content of the last Title:")
    print(str(body.get_header(position=-1)))

    _expected_result = """
    - Content of the first footnote:
    1. [Gaiman, Neil](http://en.wikipedia.org/w/index.php?title=Neil_Gaiman)
    (2003). Don't Panic: Douglas Adams and the "Hitchhiker's Guide to the
    Galaxy". Titan Books. pp. 144–145. ISBN 1-84023-742-2.

    - Content of the paragraph with the word 'Fish'
    In So Long, and Thanks for All the Fish (published in 1984), Arthur
    returns home to Earth, rather surprisingly since it was destroyed when
    he left. He meets and falls in love with a girl named
    [Fenchurch](http://en.wikipedia.org/w/index.php?title=Minor_characters_from_The_Hitchhiker%27s_Guide_to_the_Galaxy%23Fenchurch), and discovers this Earth is a replacement provided by the [dolphin](http://en.wikipedia.org/w/index.php?title=Dolphin)s in their Save the Humans campaign. Eventually he rejoins Ford, who claims to have saved the Universe in the meantime, to hitch-hike one last time and see God's Final Message to His Creation. Along the way, they are joined by Marvin, the Paranoid Android, who, although 37 times older than the universe itself (what with time
    travel and all), has just enough power left in his failing body to read
    the message and feel better about it all before expiring.

    - Content of the first Title:
    The Hitchhiker's Guide to the Galaxy

    - Content of the last Title:
    Official sites
    """

    test_unit(document)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    body = document.body
    print(str(body.get_note(position=0)))
    assert str(body.get_note(position=0)).startswith(
        "1. [Gaiman, Neil](http://en.wikipedia.org/w/index.php?title=Neil_Gaiman) (2003)"
    )
    assert str(body.get_paragraph(content="Fish")).endswith("all before expiring.\n")
    assert str(body.get_header(position=0)).startswith("The Hitchhiker's Guide")
    assert str(body.get_header(position=-1)).startswith("Official sites")


if __name__ == "__main__":
    main()

Accessing a list of elements

Example of methods and properties to analyse a document.

These methods or properties return a list of elements:

- `body.headers`
- `body.images`
- `body.paragraphs`
- `body.get_links()`
- `body.get_notes()`
- `body.tables`
- `body.get_paragraphs(content)`
recipes/accessing_a_list_of_elements.py
#!/usr/bin/env python
"""Example of methods and properties to analyse a document.

These methods or properties return a list of elements:

    - `body.headers`
    - `body.images`
    - `body.paragraphs`
    - `body.get_links()`
    - `body.get_notes()`
    - `body.tables`
    - `body.get_paragraphs(content)`
"""
import os
import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 75
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"


def read_source_document():
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def main():
    document = read_source_document()

    # The body object is an XML element from which we can access one or several
    # other elements we are looking for.
    body = document.body

    # Accessing a list of elements
    # Should you need to access all elements of a kind, there are the
    # get_xxxs methods, where xxx can be paragraph, heading, list, table, ...
    count_methods = " ".join(dir(body)).count("get_")
    print(f"{count_methods} get methods are available")
    # Some examples, that you can check against actual content of the odt file:
    # See how complex is our wikipedia documents:
    print("number of headings:", len(body.headers))
    print("number of images stored:", len(body.images))
    print("number of paragraphs:", len(body.paragraphs))
    print("number of links (URLs):", len(body.get_links()))
    print("number of footnotes:", len(body.get_notes()))
    # Our sample document has no table:
    # print("number of tables:", len(body.get_tables()))
    print("number of tables:", len(body.tables))

    # Each get_xxx_list method provides parameters for filtering the results.
    # For example headings can be listed by level, annotations by creator, etc.
    # Almost all of them accept filtering by style and content using a regular
    # expressions.
    print("Paragraphs with 'Fish':", len(body.get_paragraphs(content=r"Fish")))
    print(
        "Paragraphs with 'answer' and '42':",
        len(body.get_paragraphs(content=r"answer.*42")),
    )

    _expected_result = """
    96 get methods are available
    number of headings: 29
    number of images stored: 0
    number of paragraphs: 175
    number of links (URLs): 352
    number of footnotes: 49
    number of tables: 0
    Paragraphs with 'Fish': 4
    Paragraphs with 'answer' and '42': 1
    """

    test_unit(document)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    body = document.body
    count_methods = " ".join(dir(body)).count("get_")
    assert count_methods == 96
    assert len(body.headers) == 29
    assert len(body.images) == 0
    assert len(body.paragraphs) == 175
    assert len(body.get_links()) == 352
    assert len(body.get_notes()) == 49
    assert len(body.get_paragraphs(content=r"Fish")) == 4
    assert len(body.get_paragraphs(content=r"answer.*42")) == 1


if __name__ == "__main__":
    main()

Accessing other element from element like list

Accessing elements from element-like list.

Any fetched element is a XML tree context that can be queried, but only on the subtree it contains. Here are quick examples of iteration on Paragraphs and Lists from the document.

recipes/accessing_other_element_from_element_like_list.py
#!/usr/bin/env python
"""Accessing elements from element-like list.

Any fetched element is a XML tree context that can be queried, but only on the subtree it
contains. Here are quick examples of iteration on `Paragraphs` and `Lists` from the document.
"""
import os
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 80
DATA = Path(__file__).parent / "data"
SOURCE = DATA / "collection2.odt"


def analyse_list(document):
    # The body object is an XML element from which we can access one or several
    # other elements we are looking for.
    body = document.body

    # Any element is a context for navigating but only on the subtree it
    # contains. Just like the body was, but since the body contains all content,
    # we didn't see the difference.
    # Let's try the lists:
    print("Number of available lists in the document:", len(body.lists))
    print()

    list4 = body.get_list(position=4)
    print(f"The 4th list contains {len(list4.paragraphs)} paragraphs")
    print()

    # Now print the list content
    paragraphs = list4.paragraphs
    for count, paragraph in enumerate(paragraphs):
        print(count + 1, ":", paragraph)

    _expected_result = """
    Number of available lists in the document: 5

    The 4th list contains 9 paragraphs

    1 : [BBC Cult website](http://www.bbc.co.uk/cult/hitchhikers/),
    official website for the [TV show version](http://en.wikipedia.org/w/index.php?title=The_Hitchhiker%27s_Guide_to_the_Galaxy_%28TV_series%29)
    (includes information, links and downloads)

    2 : [BBC Radio 4 website for the 2004–2005
    series](http://www.bbc.co.uk/radio4/hitchhikers/)

    3 : [Official Movie Site](http://hitchhikers.movies.go.com/)

    4 : [The Hitchhiker's Guide to the Galaxy
    (2005 movie)](http://www.imdb.com/title/tt0371724/)at the
    [Internet Movie Database](http://en.wikipedia.org/w/index.php?title=Internet_Movie_Database)

    5 : [The Hitch Hikers Guide to the Galaxy
    (1981 TV series)](http://www.imdb.com/title/tt0081874/)at the
    [Internet Movie Database](http://en.wikipedia.org/w/index.php?title=Internet_Movie_Database)

    6 : [h2g2](http://www.bbc.co.uk/h2g2/guide/)

    7 : [Encyclopedia of Television](http://www.museum.tv/archives/etv/H/htmlH/hitch-hickers/hitch-hickers.htm)

    8 : [British Film Institute Screen Online](http://www.screenonline.org.uk/tv/id/560180/index.html)
    page devoted to the TV series

    9 : [DC Comics H2G2 site](http://www.dccomics.com/graphic_novels/?gn=1816)
    """  # noqa: RUF001

    # only for test suite:
    if "ODFDO_TESTING" in os.environ:
        assert len(body.lists) == 5
        assert len(list4.paragraphs) == 9
        assert str(paragraphs[0]).startswith("[BBC Cult website](http")
        assert str(paragraphs[8]).startswith("[DC Comics H2G2 site](http")


def main():
    document = Document(SOURCE)
    analyse_list(document)


if __name__ == "__main__":
    main()

How to add a list to a text document

Create an empty text document and add a list.

recipes/how_to_add_a_list_to_a_text_document.py
#!/usr/bin/env python
"""Create an empty text document and add a list.
"""
import os
from pathlib import Path

# Lists are a dedicated object List
from odfdo import Document, List

_DOC_SEQUENCE = 90
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "add_list"
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document("text")
    body = document.body
    body.clear()
    some_list = List(["chocolate", "tea", "coffee"])
    # The list factory accepts a Python list of strings and list items.
    body.append(some_list)

    test_unit(document)

    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    assert (document.get_formatted_text()).strip() == "- chocolate\n- tea\n- coffee"


if __name__ == "__main__":
    main()

How to add a manual page break

Adding a manual page break to a text document.

recipes/how_to_add_a_manual_page_break.py
#!/usr/bin/env python
"""Adding a manual page break to a text document.
"""
from pathlib import Path

from odfdo import Document, PageBreak, Paragraph, Style

_DOC_SEQUENCE = 95
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "page_break"
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document()
    body = document.body
    body.clear()

    # here a simple way to insert a page break with odfdoshortcuts:
    document.add_page_break_style()
    body.append(Paragraph("First paragraph"))
    body.append(PageBreak())
    body.append(Paragraph("Second paragraph"))

    # here is a different way to insert a page break:
    page_break_style = Style("paragraph", name="page_break_before")
    page_break_style.set_properties({"fo:break-before": "page"})
    document.insert_style(page_break_style)
    empty_paragraph = Paragraph("", style="page_break_before")
    body.append(empty_paragraph)
    body.append(Paragraph("Third paragraph"))

    save_new(document, TARGET)


if __name__ == "__main__":
    main()

Create a basic drawing

Insert a circle and a lot of lines (a fractal) in a text document.

recipes/create_a_basic_drawing.py
#!/usr/bin/env python
"""Insert a circle and a lot of lines (a fractal) in a text document.
"""
import cmath
from pathlib import Path
from typing import Union

from odfdo import Document, EllipseShape, Header, LineShape, Paragraph

_DOC_SEQUENCE = 100
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_drawing"
TARGET = "koch.odt"

CYCLES = 4  # beware, 5 is big, 6 is too big to display...


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def cm(x: float) -> str:
    return f"{x:.2f}cm" ""


# some graphic computations
class Vector:
    def __init__(self, a: Union[float, complex], b: Union[float, complex]):
        self.a = a
        self.b = b

    def koch_split(self) -> list:
        c = self.a + 1.0 / 3.0 * (self.b - self.a)
        d = self.a + 2.0 / 3.0 * (self.b - self.a)
        m = 0.5 * (self.a + self.b)
        e = m + (d - c) * complex(0, -1)
        return [Vector(self.a, c), Vector(c, e), Vector(e, d), Vector(d, self.b)]

    def centimeter(self, val) -> tuple:
        if val == 0:
            m = self.a
        else:
            m = self.b
        return (cm(m.real), cm(m.imag))


def koch(vector_list, cycle=2):
    if cycle <= 0:
        return vector_list
    else:
        new_vector_list = []
        for vector in vector_list:
            new_vector_list.extend(vector.koch_split())
        # del vector_list
        return koch(new_vector_list, cycle - 1)


def make_fractal_coords(side, vpos):
    orig = complex((17 - side) / 2.0, vpos)
    v1 = Vector(orig, orig + complex(side, 0))
    v2 = Vector(v1.b, orig + cmath.rect(side, cmath.pi / 3))
    v3 = Vector(v2.b, orig)
    center = (v1.a + v1.b + v2.b) / 3
    vector_list = koch([v1, v2, v3], cycle=CYCLES)
    return center, vector_list


def generate_document():
    document = Document("text")
    body = document.body

    print("Making some Koch fractal")
    title = Header(1, "Some Koch fractal")
    body.append(title)

    style = document.get_style("graphic")
    style.set_properties({"svg:stroke_color": "#0000ff"})
    style.set_properties(fill_color="#ffffcc")

    paragraph = Paragraph("")
    body.append(paragraph)

    # some computation of oordinates
    center, vector_list = make_fractal_coords(side=12.0, vpos=8.0)

    # create a circle
    radius = 8.0
    pos = center - complex(radius, radius)
    circle = EllipseShape(
        size=(cm(radius * 2), cm(radius * 2)),
        position=(cm(pos.real), cm(pos.imag)),
    )
    paragraph.append(circle)

    # create a drawing with a lot of lines
    paragraph.append(f"number of lines: {len(vector_list)}")
    for vector in vector_list:
        line = LineShape(p1=vector.centimeter(0), p2=vector.centimeter(1))
        paragraph.append(line)

    return document


def main():
    document = generate_document()
    save_new(document, TARGET)


if __name__ == "__main__":
    main()

Add private annotations to a document

Add not printable annotations to a document.

Annotations are notes that do not appear in the document but typically on a side bar in a desktop application. So they are not printed.

recipes/add_private_annotations_to_a_document.py
#!/usr/bin/env python
"""Add not printable annotations to a document.

Annotations are notes that do not appear in the document but typically
on a side bar in a desktop application. So they are not printed.
"""
import os
from pathlib import Path

from odfdo import Document, Header, Paragraph

_DOC_SEQUENCE = 110
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "annotated"
TARGET = "annotated_document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def base_document():
    document = Document("text")
    body = document.body

    body.append(Header(1, "De la Guerre des Gaules - Livre V"))
    body.append(Header(2, "Préparatifs d'expédition en Bretagne"))
    body.append(
        Paragraph(
            "Sous le consulat de Lucius Domitius et d'Appius Claudius, "
            "César, quittant les quartiers d'hiver pour aller en Italie, "
            "comme il avait coutume de le faire chaque année, ordonne aux "
            "lieutenants qu'il laissait à la tête des légions de construire, "
            "pendant l'hiver, le plus de vaisseaux qu'il serait possible, "
            "et de réparer les anciens."
        )
    )
    body.append(Header(2, "La Bretagne"))
    body.append(
        Paragraph(
            "Cette île est de forme triangulaire ; l'un des côtés regarde "
            "la Gaule. Des deux angles de ce côté, l'un est au levant, "
            "vers le pays de Cantium, où abordent presque tous les vaisseaux "
            "gaulois ; l'autre, plus bas, est au midi. La longueur de ce côté "
            "est d'environ cinq cent mille pas. "
        )
    )
    return document


def main():
    document = base_document()
    body = document.body
    paragraph = body.get_paragraph(content="consulat")

    # Annotations are inserted like notes but they are simpler:
    # Annotation arguments:
    # after   =>  The word after what the annotation is inserted.
    # body    =>  The annotation itself, at the end of the page.
    # creator =>  The author of the annotation.
    # date    =>  A datetime value, by default datetime.now().
    paragraph.insert_annotation(
        after="Domitius", body="Talking about Lucius Domitius", creator="Luis"
    )

    save_new(document, TARGET)

    # only for test suite:
    if "ODFDO_TESTING" in os.environ:
        assert len(body.get_annotations(creator="Luis")) == 1


if __name__ == "__main__":
    main()

Accessibility check on a document

Basic Accessibility test: check, for every picture in a document, if there is:

  • a title (svg_title),
  • a description (svg_description)

or, at least, some caption text.

See test file planes.odt file and the result of the script.

recipes/accessibility_check_on_a_document.py
#!/usr/bin/env python
"""Basic Accessibility test: check, for every picture in a document, if
there is:

  - a title (svg_title),
  - a description (svg_description)

or, at least, some caption text.

See test file `planes.odt` file and the result of the script.
"""
import os
import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 200
DATA = Path(__file__).parent / "data"
SOURCE = "planes.odt"


def read_source_document():
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def main():
    doc = read_source_document()
    # We want the images of the document.
    body = doc.body
    images = body.images

    nb_images = len(images)
    nb_title = 0
    nb_description = 0
    nb_caption = 0

    for image in images:
        uri = image.url
        filename = uri.rpartition("/")[2]
        print(f"Image filename: {filename}")
        frame = image.parent
        name = frame.name
        title = frame.svg_title
        description = frame.svg_description
        if title:
            nb_title += 1
        if description:
            nb_description += 1
        print(f"Name: {name}, title: {title}, description: {description}")
        link = frame.parent
        # this part requires some ODF know how:
        if link.tag == "draw:a":
            caption = link.get_attribute("office:name")
            if caption:
                nb_caption += 1
                print(f"Caption: {caption}")
    print()
    print(f"The document displays {nb_images} pictures:")
    print(f" - pictures with a title: {nb_title}")
    print(f" - pictures with a description: {nb_description}")
    print(f" - pictures with a caption: {nb_caption}")

    _expected_result = """
    Image filename: 100000000000013B000000D345859F604DCE636A.jpg
    Name: graphics2, title: Spitfire, general view, description: Green spitfire in a hall, view from left front.
    Image filename: 100000000000013B000000D3F908DA0A939D2F4B.jpg
    Name: graphics3, title: Spitfire, detail, description: None
    Image filename: 100000000000013B000000D375CEBFD6D7CB7CE9.jpg
    Name: graphics1, title: None, description: None

    The document displays 3 pictures:
     - pictures with a title: 2
     - pictures with a description: 1
     - pictures with a caption: 0
    """

    # only for test suite:
    if "ODFDO_TESTING" in os.environ:
        assert nb_images == 3
        assert nb_title == 2
        assert nb_description == 1
        assert nb_caption == 0


if __name__ == "__main__":
    main()

Add logo on presentation

Insert an image (e.g. the logo of an event, organization or a Creative Commons attribution) with size x,y at position x2,y2 on a number of slides in a presentation slide deck.

recipes/add_logo_on_presentation.py
#!/usr/bin/env python
"""Insert an image (e.g. the logo of an event, organization or a Creative Commons
attribution) with size `x,y` at position `x2,y2` on a number of slides in a
presentation slide deck.
"""
from pathlib import Path

# reading image size requires a graphic library
from PIL import Image

from odfdo import Document, Frame

_DOC_SEQUENCE = 250
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "add_logo"
TARGET = "presentation.odp"
DATA = Path(__file__).parent / "data"
SOURCE = DATA / "presentation_wo_logo.odp"
LOGO = DATA / "newlogo.png"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def make_image_size(path, size):
    width, height = Image.open(path).size
    ratio = max(width / size, height / size)
    return (f"{width / ratio:.2f}cm", f"{height / ratio:.2f}cm")


def add_logo(presentation):
    image_position = ("1.50cm", "1.50cm")
    svg_title = "New Logo"
    svg_description = "The new logo with blue background"

    image_size = make_image_size(LOGO, 4.0)
    presentation_body = presentation.body
    uri = presentation.add_file(str(LOGO))

    for slide in presentation_body.get_draw_pages():
        # Create a frame for the image
        image_frame = Frame.image_frame(
            image=uri,
            text="",  # Text over the image object
            size=image_size,  # Display size of image
            anchor_type="page",
            page_number=None,
            position=image_position,
            style=None,
        )
        image_frame.svg_title = svg_title
        image_frame.svg_description = svg_description
        slide.append(image_frame)


def main():
    document = Document(SOURCE)
    add_logo(document)
    save_new(document, TARGET)


if __name__ == "__main__":
    main()

Get pictures from document odt

Get all the pictures embeded in an .odt file.

recipes/get_pictures_from_document_odt.py
#!/usr/bin/env python
"""Get all the pictures embeded in an .odt file.
"""
import sys
from pathlib import Path
from pprint import pformat

from odfdo import Document

_DOC_SEQUENCE = 260
DATA = Path(__file__).parent / "data"
# ODF export of Wikipedia article Hitchhiker's Guide to the Galaxy (CC-By-SA)
# Remark: the document is badly made: the pictures are not displayed in the
# text, but are sill inside the document !
SOURCE = "collection.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "found_pics"


def read_source_document():
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def main():
    doc = read_source_document()
    # show the list the content of the document parts
    parts = doc.parts
    print("Parts:")
    print(pformat(parts))
    print()

    # We want the images of the document.
    body = doc.body
    found_pics = body.images
    print("Pics :")
    print(pformat(found_pics))
    print()

    # we use the get_part function from odfdo to get the actual content
    # of the image, to copy the images out of the .odt file:
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    for pic in found_pics:
        # where is the image actual content in the file:
        url = pic.url
        image_content = doc.get_part(url)
        origin_path = Path(url)
        destination_path = OUTPUT_DIR / origin_path.name
        destination_path.write_bytes(image_content)

    print(f"Files in {OUTPUT_DIR}:")
    for file in OUTPUT_DIR.glob("*"):
        print(file.name)


if __name__ == "__main__":
    main()

Change image in many documents

Change an image in many ODF files.

This recipe is suitable for the scenario where an organization is moving from one company logo to another and needs to replace the logo in several hundred existing documents.

recipes/change_image_in_many_documents.py
#!/usr/bin/env python
"""Change an image in many ODF files.

This recipe is suitable for the scenario where an organization
is moving from one company logo to another and needs to replace
the logo in several hundred existing documents.
"""
from hashlib import sha256
from pathlib import Path

from odfdo import Document

counter_image = 0
counter_odf = 0
counter_hit = 0

_DOC_SEQUENCE = 270
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "new_logo"
DATA = Path(__file__).parent / "data"
OLD_PRESENTATIONS = DATA / "old_presentations"
OLD_LOGO = OLD_PRESENTATIONS / "oldlogo.png"
NEW_LOGO = DATA / "newlogo.png"


def save_modified(document: Document):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    name = Path(document.container.path).name
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path)


def footprint(content: bytes) -> str:
    return sha256(content).hexdigest()


def update_document(
    path: Path,
    old_hash: str,
    new_content: bytes,
    stats: dict,
):
    stats["files"] += 1
    if not path.suffix.lower().startswith(".od"):
        return
    try:
        document = Document(path)
    except Exception:
        return

    stats["odf_files"] += 1
    document_changed = False
    for image in document.body.images:
        image_url = image.url
        if not image_url:
            continue
        try:
            image_content = document.get_part(image_url)
        except KeyError:
            print("- not found inside document:", path, end=" ")
            print("  image URL:", image_url)
            continue
        if footprint(image_content) == old_hash:
            document.set_part(image_url, new_content)
            document_changed = True
    if document_changed:
        save_modified(document)
        stats["updated_files"] += 1


def update_logos():
    old_hash = footprint(OLD_LOGO.read_bytes())

    # making the new image content :
    buffer = Document("text")
    url = buffer.add_file(str(NEW_LOGO))
    new_content = buffer.get_part(url)

    stats = {
        "files": 0,
        "odf_files": 0,
        "updated_files": 0,
    }
    for path in OLD_PRESENTATIONS.glob("**/*"):
        update_document(path, old_hash, new_content, stats)
    return stats


def main():
    stats = update_logos()
    print(f"Files: {stats['files']}")
    print(f"ODF files: {stats['odf_files']}")
    print(f"Updated files: {stats['updated_files']}")


if __name__ == "__main__":
    main()

Concatenate presentations

Concatenate several presentations (including presentations found in sub directories), possibly merge styles and images. Result for style may vary.

recipes/concatenate_presentations.py
#!/usr/bin/env python
"""Concatenate several presentations (including presentations found in sub
directories), possibly merge styles and images. Result for style may vary.
"""
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 280
DATA = Path(__file__).parent / "data"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "concatenate"
TARGET = "presentation.odp"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = concatenate_presentations()
    save_new(document, TARGET)


def concatenate_presentations():
    concat_presentation = Document("presentation")
    concat_presentation.body.clear()
    concat_presentation.delete_styles()

    count = 0
    for presentation_path in DATA.glob("**/*.odp"):
        count += 1
        add_presentation(concat_presentation, presentation_path)

    nb_slides = len(concat_presentation.body.get_draw_pages())
    print(f"{count} presentations concatenated, {nb_slides} slides.")

    return concat_presentation


def add_presentation(concat_presentation, path):
    """Using odfdo for:
    - open .odp document
    - copy content and merge styles
    """
    try:
        document = Document(path)
    except Exception:
        return
    concat_presentation.merge_styles_from(document)
    # add all slides
    dest_body = concat_presentation.body
    dest_manifest = concat_presentation.manifest
    manifest = document.manifest
    slides = document.body.get_draw_pages()
    print(f"- {path.name} has {len(slides)} slides")
    for slide in slides:
        slide = slide.clone
        # dont forget images:
        for image in slide.images:
            uri = image.url
            media_type = manifest.get_media_type(uri)
            dest_manifest.add_full_path(uri, media_type)
            concat_presentation.set_part(uri, document.get_part(uri))
        # append slide, expecting nothing good about its final style
        dest_body.append(slide)


if __name__ == "__main__":
    main()

Make a presentation from pictures of a text document

Open a .odt file with pictures in it, find and analyse all the images, create a new .odp presentation, display all the pictures in the presentation, one image per frame.

recipes/make_a_presentation_from_pictures_of_a_text_document.py
#!/usr/bin/env python
"""Open a .odt file with pictures in it, find and analyse all the images,
create a new .odp presentation, display all the pictures in the presentation,
one image per frame.
"""
import os
from pathlib import Path
from tempfile import mkstemp

# analyzing embedded image need Pillow library
from PIL import Image

from odfdo import Document, DrawPage, Frame

_DOC_SEQUENCE = 285
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "presentation_images_in_odt"
TARGET = "presentation.odp"
DATA = Path(__file__).parent / "data"
SOURCE = DATA / "collection.odt"


def embedded_image_ratio(href, part):
    image_suffix = "." + href.split(".")[-1]
    fd, tmp_file = mkstemp(suffix=image_suffix)
    tmp_file_handler = os.fdopen(fd, "wb")
    tmp_file_handler.write(part)
    tmp_file_handler.close()
    width, height = Image.open(tmp_file).size
    os.unlink(tmp_file)
    print(f"image {href} , size : {width}x{height}")
    ratio = 1.0 * width / height
    return ratio


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = generate_document()
    save_new(document, TARGET)

    _expected_result = """
    image Pictures/12918371211855030272.jpe , size : 333x386
    image Pictures/12918371212102410240.jpe , size : 200x350
    image Pictures/12918371212184750080.jpe , size : 384x552
    image Pictures/12918371212196450304.jpe , size : 373x576
    image Pictures/12918371212450449408.jpe , size : 400x596
    image Pictures/12918371212536940544.jpe , size : 800x1195
    image Pictures/12918371212580190208.jpe , size : 561x282
    image Pictures/12918371212597118976.jpe , size : 660x515
    image Pictures/12918371212741570560.jpe , size : 328x504
    """


def generate_document():
    # Open the input document
    # doc_source = Document_extend(filename)
    doc_source = Document(SOURCE)

    # Making of the output Presentation document :
    presentation = Document("presentation")

    # Presentation got a body in which elements are stored
    presentation_body = presentation.body
    presentation_body.clear()
    presentation_manifest = presentation.manifest

    # For each image, we create a page in the presentation and display the image
    # and some text on this frame
    # First, get all image elements available in document:
    images_source = doc_source.body.images
    manifest_source = doc_source.manifest

    for image in images_source:
        # we use the get_part function from odfdo to get the actual content
        # of the images, with the URI link to the image as argument
        uri = image.url
        # weight = len(doc_source.get_part(uri))  # only for info
        # print "image %s , size in bytes: %s" % (uri, weight)
        part = doc_source.get_part(uri)  # actual image content
        name = uri.split("/")[-1]  # lets make a file name for image

        # Compute the display size of the image on the final page
        ratio = embedded_image_ratio(uri, part)
        max_border = 16.0  # max size of the greatest border, in cm
        a = max_border * ratio
        b = max_border
        if ratio > 1.0:
            a /= ratio
            b /= ratio

        # Create an underlying page for the image and the text
        page = DrawPage("page " + name)

        # Create a frame for the image
        image_frame = Frame.image_frame(
            image=uri,
            text="",  # Text over the image object
            size=(f"{a}cm", f"{b}cm"),  # Display size of image
            anchor_type="page",
            page_number=None,
            position=("3.5cm", "3.5 cm"),
            style=None,
        )

        # Add some text object somehere on the frame, with a text frame
        legend = f"Image {name} from Wikipedia document / {SOURCE.name}"
        text_frame = Frame.text_frame(
            legend,
            size=("26cm", "2cm"),
            position=("0.5cm", "0.5cm"),
            style="Standard",
            text_style="Standard",
        )

        # Append all the component, do not forget to add the actuel image file
        # into the Picture global directory of the presentation file with set_part
        page.append(text_frame)
        page.append(image_frame)
        presentation_body.append(page)
        # for the same operation from a local filesystem image, just use:
        # presentation_output.add_file(uri)
        media_type = manifest_source.get_media_type(uri)
        presentation_manifest.add_full_path(uri, media_type)
        presentation.set_part(uri, doc_source.get_part(uri))

    return presentation


if __name__ == "__main__":
    main()

Make presentation from images

Create a presentation from a some images in a given directory, where each image is put on the center of its own page scaled to either the maximum available size, prefered maximum size, or cover the full page and lose some info.

recipes/make_presentation_from_images.py
#!/usr/bin/env python
"""Create a presentation from a some images in a given directory,
where each image is put on the center of its own page scaled to either
the maximum available size, prefered maximum size, or cover the full
page and lose some info.
"""
from pathlib import Path

# analyzing embedded image need Pillow library
from PIL import Image

from odfdo import Document, DrawPage, Frame

_DOC_SEQUENCE = 286
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "presentation_from_images"
TARGET = "presentation.odp"
IMAGES = Path(__file__).parent / "data" / "images"
MAX_SIZE = 15.0  # feel free to customize
CROP_SIZE = False  # feel free to customize

# Size (in cm) of a slide : (default page-layout)
SLIDE_W, SLIDE_H = 28.0, 21.0  # 4/3 screen
# FIXME: this is the default page-layout.
# - Changing the style of the page-layout by program is not done in this script
# - an other way, merging with external page-layout/master-page requires
#   extra files, out of the scope for this script.


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path)


def main():
    presentation = make_presentation()
    if presentation is None:
        print("Something went wrong.")
        exit(0)
    save_new(presentation, TARGET)


def make_presentation():
    # Collecting images
    images_pool = collect_images()
    if not images_pool:  # unable to find images
        print("No image found !")
        return None

    # Creation of the output Presentation document :
    # presentation = Document_from_type('presentation')  # 092
    presentation = Document("presentation")

    # Presentation got a body in which content is stored
    presentation_body = presentation.body
    presentation_body.clear()

    # For each image, we create a page in the presentation and display the image
    # and some text on this frame
    for image in images_pool:
        # add the file to the document
        uri = presentation.add_file(str(image.path))

        # Create an underlying page for the image and the text
        page = DrawPage("Page " + image.path.name)

        # Create a frame for the image
        image_frame = Frame.image_frame(
            image=uri,
            name=image.path.name,
            text="",  # Text over the image object
            size=(image.disp_w, image.disp_h),  # Display size of image
            anchor_type="page",
            page_number=None,
            position=(image.pos_x, image.pos_y),
            style=None,
        )

        # Append all the component
        page.append(image_frame)
        presentation_body.append(page)

    return presentation


# Principle :
# - original image are left unmodified by the script
# - only the size they should appear is computed
# - later, the display engine (say LibreOffice) will merge this display
#   information with other informations, like the size of the page
#   (page-layout) and should act like a mask against the "big" croped image.
class ImageInfo:
    def __init__(self, path: Path):
        self.path = path
        self.size = None
        self.disp_w = self.disp_h = None
        self.pos_x = self.pos_y = None

    def adjust(self):
        try:
            self.size = Image.open(self.path).size
        except OSError:
            # Not an image ?
            self.size = None
            return
        width, height = self.size
        if MAX_SIZE:
            ratio = max(width / MAX_SIZE, height / MAX_SIZE)
            display_w = width / ratio
            display_h = height / ratio
        elif CROP_SIZE:
            ratio = min(width / SLIDE_W, height / SLIDE_H)
            display_w = width / ratio
            display_h = height / ratio
        else:
            ratio = max(width / SLIDE_W, height / SLIDE_H)
            display_w = width / ratio
            display_h = height / ratio
        self.disp_w = f"{display_w:2f}cm"
        self.disp_h = f"{display_h:2f}cm"
        self.pos_x = f"{(SLIDE_W - display_w) / 2:2f}cm"
        self.pos_y = f"{(SLIDE_H - display_h) / 2:2f}cm"
        print(self.path.name, self.disp_w, self.disp_h)


def collect_images():
    pool = []
    for path in IMAGES.glob("**/*"):
        if not path.is_file():
            continue
        image_info = ImageInfo(path)
        image_info.adjust()
        if image_info.size:
            pool.append(image_info)
    return pool


if __name__ == "__main__":
    main()

Make a presentation from text with different styles

Each line of the text becomes a slide of the presentation, we change of style depending on the length of text line.

recipes/make_a_presentation_from_text_with_different_styles.py
#!/usr/bin/env python
"""Each line of the text becomes a slide of the presentation, we change of style
depending on the length of text line.
"""
from pathlib import Path

from odfdo import Document, DrawPage, Frame, Style

_DOC_SEQUENCE = 287
# lst = open(sys.argv[1]).readlines()
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled_prez"
TARGET = "presentation.odp"
CONTENT = """123
azertyuiop
azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
end.
""".splitlines()


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = generate_document()
    save_new(document, TARGET)


def generate_document():
    presentation = Document("presentation")
    body = presentation.body
    body.clear()

    # Creating a smooth style for the graphic item
    base_style = Style(
        "graphic",
        name="Gloup48",
        parent="standard",
        stroke="none",
        fill_color="#b3b3b3",
        textarea_vertical_align="middle",
        padding_top="1cm",
        padding_bottom="1cm",
        padding_left="1cm",
        padding_right="1cm",
        line_distance="0cm",
        guide_overhang="0cm",
        guide_distance="0cm",
    )
    base_style.set_properties(area="paragraph", align="center")
    base_style.set_properties(
        area="text",
        color="#dd0000",
        text_outline="false",
        font="Liberation Sans",
        font_family="Liberation Sans",  # compatibility
        font_style_name="Bold",
        family_generic="swiss",
        size="48pt",
        weight="bold",
    )

    presentation.insert_style(base_style)

    # Making o lot of variations
    variants = [10, 11, 14, 16, 20, 24, 32, 40, 44]
    text_size = [95, 80, 65, 50, 40, 30, 20, 10, 5]
    for size in variants:
        variant_style = base_style.clone
        variant_style.set_attribute("style:name", f"Gloup{size}")
        variant_style.set_properties(area="text", size=f"{size}pt")
        presentation.insert_style(variant_style)

    for count, blurb in enumerate(CONTENT):
        text = blurb
        name = f"{count + 1} - { text[:10]}"
        page = DrawPage(name)
        # choosing some style:
        size = 48
        for index, max_size in enumerate(text_size):
            if len(text) > max_size:
                size = variants[index]
                break

        text_frame = Frame.text_frame(
            text,
            size=("24cm", "2cm"),
            position=("2cm", "8cm"),
            style=f"Gloup{size}",
            text_style=f"Gloup{size}",
        )

        page.append(text_frame)
        body.append(page)

    return presentation


if __name__ == "__main__":
    main()

Extract and reorder slides

Create a new presentation from a previous one by extracting some slides, in a different order.

recipes/extract_and_reorder_slides.py
#!/usr/bin/env python
"""Create a new presentation from a previous one by extracting some slides,
in a different order.
"""
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 290
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "presentation_extracted"
TARGET = "presentation.odp"
DATA = Path(__file__).parent / "data"
SOURCE = DATA / "presentation_base.odp"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    new_order = (3, 5, 2, 2)
    presentation_base = Document(SOURCE)
    extracted = Document("presentation")

    # Important, copy styles too:
    extracted.delete_styles()
    extracted.merge_styles_from(presentation_base)
    extracted.body.clear()

    for index in new_order:
        try:
            slide_position = index - 1
            slide = presentation_base.body.get_draw_page(position=slide_position)
        except Exception:  # noqa: S112
            continue
        if slide is None:
            continue

        slide = slide.clone
        extracted.body.append(slide)

    save_new(extracted, TARGET)


if __name__ == "__main__":
    main()

Change values of a chart inside a document

Open a text document with an embedded chart and change some values.

recipes/change_values_of_a_chart_inside_a_document.py
#!/usr/bin/env python
"""Open a text document with an embedded chart and change some values.
"""

from pathlib import Path

# for cell style
from odfdo import Document

_DOC_SEQUENCE = 295
DATA = Path(__file__).parent / "data"
SOURCE = DATA / "chart.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "modified_chart"
TARGET = "modified_chart.odt"


def save_new(document: Document, name: str) -> None:
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main() -> None:
    document = Document(SOURCE)
    change(document)
    save_new(document, TARGET)


def change(document: Document) -> None:
    # list the parts if needed
    print(document.parts)
    # -> ['mimetype', 'ObjectReplacements/Object 1', 'Object 1/meta.xml', 'Object 1/styles.xml', 'Object 1/content.xml', ...

    part = document.get_part("Object 1/content.xml")
    body = part.body
    table = body.get_table(0)

    # if needed, get the values:
    values = table.get_values()
    print(values)
    # -> [
    #     [None, "", "Column 2", "Column 3"],
    #     ["Row 1", Decimal("NaN"), 10, 20],
    #     ["Row 2", Decimal("NaN"), 30, 40],
    #     ["Row 3", Decimal("NaN"), 50, 360],
    #     ["Row 4", Decimal("NaN"), Decimal("9.02"), Decimal("6.2")],
    # ]

    # change some values
    table.set_value("A2", "label changed")
    table.set_value("D3", 4000)
    table.set_value("D4", 4321)


if __name__ == "__main__":
    main()

Add text span styles

Transform a not styled document into a multi styled document, by changing size and color of each parts of words.

recipes/add_text_span_styles.py
#!/usr/bin/env python
"""Transform a not styled document into a multi styled document,
by changing size and color of each parts of words.
"""
import os
from itertools import chain
from pathlib import Path

from odfdo import Document, Style

_DOC_SEQUENCE = 300
DATA = Path(__file__).parent / "data"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled3"
TARGET = "dormeur_styled.odt"
SOURCE = DATA / "dormeur_notstyled.odt"
RANDOM_SEED = 1234


class SimpleRandom:
    """Q&D reproductible random generator for tests."""

    MODULUS = 2**31 - 1
    MAXI = 2**31 - 2

    def __init__(self):
        self.current = 16807

    def _next_number(self):
        self.current = (16807 * self.current) % self.MODULUS

    def set_seed(self, seed=16807):
        self.current = seed

    def randint(self, max_value):
        self._next_number()
        return int(self.current * max_value / self.MAXI + 1)


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def color_hex(r, g, b):
    return f"#{r:02X}{g:02X}{b:02X}"


def style_name_index(index):
    return f"rnd_{index}"


def generate_random_styles(document, rnd):
    """Generate 64 random styles."""
    for index in range(1, 64):
        style = Style(
            "text",
            name=style_name_index(index),
            color=color_hex(rnd.randint(256), rnd.randint(256), rnd.randint(256)),
            size=f"{8 + index / 5}",
        )
        document.insert_style(style)


def main():
    document = Document(SOURCE)
    add_styles(document)
    save_new(document, TARGET)


def add_styles(document):
    rnd = SimpleRandom()
    body = document.body

    generate_random_styles(document, rnd)

    words = sorted(set(str(body).split()))
    for word in words:
        style_name = style_name_index(rnd.randint(64))
        for paragraph in chain(body.paragraphs, body.headers):
            # apply style to each text matching with the regex of some word
            paragraph.set_span(style_name, regex=word)

    # only for test suite:
    if "ODFDO_TESTING" in os.environ:
        print(len(body.spans))
        assert len(body.spans) == 157


if __name__ == "__main__":
    main()

How to copy some style from another document

Minimal example of copy of a style from another document.

recipes/how_to_copy_some_style_from_another_document.py
"""Minimal example of copy of a style from another document.
"""

from odfdo import Document, Style

_DOC_SEQUENCE = 310


def main():
    document = Document("text")
    body = document.body
    body.clear()

    # Let's imagine the sample_styles.odt document contains an interesting style.
    #
    # So let's first fetch the style:
    try:
        odfdo_styles = Document("sample_styles.odt")
        highlight = odfdo_styles.get_style("text", display_name="Yellow Highlight")
    except Exception:
        # let's create some *very simple* text style.
        highlight = Style(
            "text", display_name="Yellow Highlight", color="blue", italic=True
        )

    # We made some assumptions here:
    #
    # 'text'              : The family of the style, text styles apply on
    #                       individual characters.
    # ”Yellow Highlight”  : The name of the style as we see it in a desktop
    #                       application.
    # display_name        : Styles have an internal name (“Yellow_20_Highlight”
    #                       in this example) but we gave the display_name
    #                       instead.
    #
    # We hopefully have a style object that we add to our own collection:
    document.insert_style(highlight, automatic=True)


if __name__ == "__main__":
    main()

Copy style from another document

Copy the styles from an existing document.

For more advanced version, see the odfdo-style script.

recipes/copy_style_from_another_document.py
#!/usr/bin/env python
"""Copy the styles from an existing document.

For more advanced version, see the odfdo-style script.
"""
import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 320
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
# copied here from the odfdo package:
STYLE_SOURCE = DATA / "lpod_styles.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled1"
TARGET = "document.odt"


def read_source_document():
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    # We want to change the styles of collection2.odt,
    # we know the odfdo_styles.odt document contains an interesting style,
    # So let's first fetch the style:
    style_document = Document(STYLE_SOURCE)

    # Open our document:
    document = read_source_document()

    # We could change only some styles, but here we want a clean basis:
    document.delete_styles()

    # And now the actual style change:
    document.merge_styles_from(style_document)

    save_new(document, TARGET)


if __name__ == "__main__":
    main()

Create basic text styles

Create basic text styles.

recipes/create_basic_text_styles.py
#!/usr/bin/env python
"""Create basic text styles.
"""
import os
from pathlib import Path

from odfdo import Document, Header, Paragraph, Style

_DOC_SEQUENCE = 330
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_styles"
TARGET = "document.odt"


def save_new(document: Document, name: str) -> None:
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def _create_style_header_blue(document: Document) -> None:
    style = Style(
        family="paragraph",
        name="header_blue",
        display_name="header_blue",
        parent_style="Heading",
        area="text",
        bold=True,
        color="blue",
        size="160%",
    )
    style.set_attribute("style:default-outline-level", "1")
    document.insert_style(style)


def _create_style_header_navy(document: Document) -> None:
    style = Style(
        family="paragraph",
        name="header_navy",
        display_name="header_navy",
        parent_style="Heading",
        area="text",
        bold=True,
        color="navy",
        size="120%",
    )
    style.set_attribute("style:default-outline-level", "2")
    document.insert_style(style)


def _create_style_steel(document: Document) -> None:
    style = Style(
        family="paragraph",
        area="text",
        name="steel",
        display_name="steel",
        color="yellow",
        background_color="darkblue",
    )
    style.set_properties(
        area="graphic",
        properties={
            "draw:fill": "solid",
            "draw:fill-color": "darkblue",
        },
    )
    document.insert_style(style)


def _create_style_special(document: Document) -> None:
    style = Style(
        family="paragraph",
        area="text",
        name="special",
        display_name="special",
        font="Courier New",
        font_family="Courier New",
        font_style_name="Regular",
        font_pitch="fixed",
        background_color="AntiqueWhite",
    )
    style.set_properties(
        area="paragraph",
        properties={
            "fo:margin-left": "2cm",
            "fo:margin-right": "2cm",
            "fo:line-height": "150%",
            "fo:text-align": "center",
        },
    )
    document.insert_style(style)


def _create_style_bold_gold(document: Document) -> None:
    style = Style(
        family="text",
        name="bold_gold",
        display_name="bold_gold",
        bold=True,
        color="darkgoldenrod",
    )
    document.insert_style(style)


def _create_style_italic_lime(document: Document) -> None:
    style = Style(
        family="text",
        name="italic_lime",
        display_name="italic_lime",
        italic=True,
        size="120%",
        color="lime",
    )
    document.insert_style(style)


def add_styles(document: Document) -> None:
    _create_style_header_blue(document)
    _create_style_header_navy(document)
    _create_style_steel(document)
    _create_style_special(document)
    _create_style_bold_gold(document)
    _create_style_italic_lime(document)


def add_content(document: Document) -> None:
    body = document.body
    body.append(Header(1, "First level header", style="header_blue"))

    body.append(Header(2, "First sub header", style="header_navy"))
    para = Paragraph(
        "Lorem ipsum dolor sit amet, consectetuer "
        "adipiscing elit. Sed non risus. "
        "Suspendisse lectus tortor, dignissim sit amet, "
        "adipiscing nec, ultricies sed, dolor."
    )
    para.set_span("bold_gold", regex="dolor")
    para.set_span("italic_lime", regex=r"\w+ing")
    body.append(para)

    body.append(Header(2, "Second sub header", style="header_navy"))
    para = Paragraph(
        "Cras elementum ultrices diam. Maecenas ligula massa, "
        "varius a, semper congue, euismod non, mi. Proin porttitor, "
        "orci nec nonummy molestie, enim est eleifend mi, non "
        "fermentum diam nisl sit amet erat. Duis semper.",
        style="steel",
    )
    para.set_span("italic_lime", regex="semper")
    body.append(para)

    body.append(Header(2, "Third sub header", style="header_navy"))
    para = Paragraph(
        "Duis arcu massa, scelerisque vitae, consequat in, pretium a, "
        "enim. Pellentesque congue. Ut in risus volutpat libero "
        "pharetra tempor. Cras vestibulum bibendum augue. Praesent "
        "egestas leo in pede. Praesent blandit odio eu enim. "
        "Pellentesque sed dui ut augue blandit sodales.",
        style="special",
    )
    body.append(para)


def create_document() -> Document:
    document = Document()
    body = document.body
    body.clear()
    add_styles(document)
    add_content(document)
    return document


def main() -> None:
    document = create_document()
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    style1 = document.get_style("paragraph", "header_blue").serialize()
    assert 'name="header_blue"' in style1
    assert 'color="#0000FF"' in style1
    assert 'font-weight="bold"' in style1
    assert 'font-size="160%"' in style1

    style2 = document.get_style("paragraph", "header_navy").serialize()
    assert 'name="header_navy"' in style2
    assert 'color="#000080"' in style2
    assert 'font-weight="bold"' in style2
    assert 'font-size="120%"' in style2

    style3 = document.get_style("paragraph", "steel").serialize()
    assert 'name="steel"' in style3
    assert 'color="#FFFF00"' in style3
    assert "graphic-properties" in style3
    assert 'draw:fill-color="#00008B"' in style3

    style4 = document.get_style("paragraph", "special").serialize()
    assert 'name="special"' in style4
    assert 'background-color="#FAEBD7"' in style4
    assert "Courier" in style4
    assert 'line-height="150%"' in style4
    assert 'margin-left="2cm"' in style4
    assert 'margin-right="2cm"' in style4
    assert 'text-align="center"' in style4

    style5 = document.get_style("text", "bold_gold").serialize()
    assert 'name="bold_gold"' in style5
    assert 'color="#B8860B"' in style5
    assert 'font-weight="bold"' in style5

    style6 = document.get_style("text", "italic_lime").serialize()
    assert 'name="italic_lime"' in style6
    assert 'color="#00FF00"' in style6
    assert 'font-style="italic"' in style6
    assert 'font-size="120%"' in style6


if __name__ == "__main__":
    main()

How to apply a style to a paragraph

Minimal example of how to add a styled paragraph to a document.

recipes/how_to_apply_a_style_to_a_paragraph.py
"""Minimal example of how to add a styled paragraph to a document.
"""

from odfdo import Document, Paragraph

_DOC_SEQUENCE = 335


def main():
    document = Document("text")
    body = document.body
    body.clear()

    # we knwo we have a style of name "highlight" :
    body.append(Paragraph("Highlighting the word", style="highlight"))


if __name__ == "__main__":
    main()

Change paragraph styles methods

Many examples of how to change paragraph (and in-paragraph) styles, either by changing the paragraph style itself or by using Span to select parts of the paragraph. Includes several ways to create or import styles.

recipes/change_paragraph_styles_methods.py
#!/usr/bin/env python
"""Many examples of how to change paragraph (and in-paragraph) styles, either
by changing the paragraph style itself or by using Span to select parts
of the paragraph. Includes several ways to create or import styles.
"""
import os
from collections.abc import Iterator
from itertools import cycle
from pathlib import Path

from odfdo import Document, Element, Header, Paragraph, Style

_DOC_SEQUENCE = 340
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "change_styles"
DATA = Path(__file__).parent / "data"
LOREM = (DATA / "lorem.txt").read_text(encoding="utf8")
STYLED_SOURCE = "lpod_styles.odt"
TARGET_BEFORE = "document_before.odt"
TARGET_AFTER = "document_after.odt"


def save_new(document: Document, name: str) -> None:
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path)


def main() -> None:
    document = Document("odt")
    make_base_document(document)
    save_new(document, TARGET_BEFORE)
    add_some_styles(document)
    add_style_from_xml(document)
    import_style_from_other_doc(document)
    apply_styles(document)
    test_unit(document)
    save_new(document, TARGET_AFTER)


def iter_lorem() -> Iterator[str]:
    return cycle(lr.strip() for lr in LOREM.replace("\n", " ").split("."))


def make_base_document(document: Document) -> None:
    """Fill document from parts or lorem ipsum content."""
    # Create the document
    body = document.body
    body.clear()
    # Add some content with headers
    lorem = iter_lorem()
    title1 = Header(1, next(lorem))
    body.append(title1)
    for _i in range(3):
        title = Header(2, next(lorem))
        body.append(title)
        for _j in range(5):
            body.append(Paragraph(next(lorem) + ". " + next(lorem) + "."))


def add_some_styles(document) -> None:
    # Always simpler to copy styles from an actual .odt existing file, but:
    document.insert_style(
        Style(
            family="paragraph",
            area="text",
            display_name="bold-blue",
            color="blue",
            bold=True,
        ),
        automatic=True,
    )
    document.insert_style(
        Style(
            family="paragraph",
            area="text",
            display_name="italic-red",
            color="red",
            bold=True,
            italic=True,
        ),
        automatic=True,
    )
    document.insert_style(
        Style(
            family="text",
            area="text",
            display_name="green",
            background_color="green",
        ),
        automatic=True,
    )
    document.insert_style(
        Style(
            family="text",
            area="text",
            display_name="bold-yellow-blue",
            color="yellow",
            background_color="blue",
            bold=True,
        ),
        automatic=True,
    )
    document.insert_style(
        Style(
            family="text",
            area="text",
            display_name="bold-white-black",
            color="white",
            background_color="black",
            bold=True,
        ),
        automatic=True,
    )
    document.insert_style(
        Style(
            family="text",
            area="text",
            display_name="italic-red-yellow",
            color="red",
            background_color="yellow",
            bold=True,
            italic=True,
        ),
        automatic=True,
    )


def add_style_from_xml(document: Document) -> None:
    # Styles can be defined by WML definition
    document.insert_style(
        Element.from_tag(
            '<style:style style:name="custom" '
            'style:display-name="custom" '
            'style:family="paragraph" '
            'style:parent-style-name="Text">'
            '<style:paragraph-properties fo:margin-left="2cm"/>'
            '<style:text-properties fo:color="#808080" loext:opacity="100%" '
            'fo:font-size="16pt" fo:font-style="normal" '
            'style:text-underline-style="solid" '
            'style:text-underline-width="auto" '
            'style:text-underline-color="font-color" '
            'fo:font-weight="bold"/>'
            "</style:style>"
        )
    )


def import_style_from_other_doc(document: Document) -> None:
    styled_doc = Document(DATA / STYLED_SOURCE)
    highlight = styled_doc.get_style("text", display_name="Yellow Highlight")
    document.insert_style(highlight, automatic=True)


def apply_styles(document: Document) -> None:
    """Apply some style changes to document."""

    def change_all_headers():
        style = document.get_style(family="text", display_name="green")
        # header styles should include some hints about he numeration level
        # So, here we just prefer to apply style with a span
        for header in document.body.headers:
            header.set_span(style.name, offset=0)

    def change_all_paragraphs():
        style = document.get_style(family="paragraph", display_name="bold-blue")
        for para in document.body.paragraphs:
            para.style = style.name

    def change_some_paragraph():
        style = document.get_style(family="paragraph", display_name="italic-red")
        document.body.get_paragraph(3).style = style.name
        document.body.get_paragraph(5).style = style.name
        document.body.get_paragraph(7).style = style.name

    def apply_span_regex():
        yellow = document.get_style(family="text", display_name="bold-yellow-blue")
        white = document.get_style(family="text", display_name="bold-white-black")
        for para in document.body.paragraphs:
            para.set_span(yellow.name, regex=r"tortor|ipsum")
            para.set_span(white.name, regex=r"A\w+")

    def apply_span_offset():
        red = document.get_style(family="text", display_name="italic-red-yellow")
        para = document.body.get_paragraph(2)
        para.set_span(red.name, offset=9, length=22)

    def apply_custom_style():
        para = document.body.get_paragraph(13)
        para.style = "custom"

    def apply_imported_style():
        para = document.body.get_paragraph(14)
        style = document.get_style(family="text", display_name="Yellow Highlight")
        # feature: to not highlight spaces, make as many Spans as required:
        for start, end in para.search_all(r"\w+"):
            length = end - start
            para.set_span(style.name, offset=start, length=length)

    change_all_headers()
    change_all_paragraphs()
    change_some_paragraph()
    apply_span_regex()
    apply_span_offset()
    apply_custom_style()
    apply_imported_style()


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" in os.environ:
        assert len(list(document.body.paragraphs)) == 15
        for display_name in (
            "bold-blue",
            "italic-red",
            "custom",
        ):
            style = document.get_style(family="paragraph", display_name=display_name)
            assert document.get_styled_elements(style.name)
        for display_name in (
            "green",
            "bold-yellow-blue",
            "bold-white-black",
            "Yellow Highlight",
        ):
            style = document.get_style(family="text", display_name=display_name)
            assert document.get_styled_elements(style.name)
        style = document.get_style(family="text", display_name="Yellow Highlight")
        assert len(document.get_styled_elements(style.name)) == 21


if __name__ == "__main__":
    main()

Delete parts of a text document

Idea comming from issue #49: Deleting content from one point to another in a .odt document.

recipes/delete_parts_of_a_text_document.py
#!/usr/bin/env python
"""Idea comming from issue #49:
Deleting content from one point to another in a .odt document.
"""

import os
from pathlib import Path

from odfdo import Document, Element, Header, Paragraph

_DOC_SEQUENCE = 400
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "delete_content"
TARGET_INITIAL = "document_initial.odt"
TARGET_FINAL = "document_final.odt"


class KeepingState:
    def __init__(self, initial: str):
        self.step = initial


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def create_base_document():
    document = Document("text")
    body = document.body
    body.clear()
    body.append(Header(1, "Some title"))
    body.append(Header(2, "part A"))
    body.append(
        Paragraph(
            "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Sed non risus."
        )
    )
    body.append(
        Paragraph(
            "Suspendisse lectus tortor, dignissim sit amet, adipiscing "
            "nec, ultricies sed, dolor. Cras elementum ultrices diam. "
            "Maecenas ligula massa, varius a, semper congue, euismod non, mi."
        )
    )
    body.append(Header(2, "part B"))
    body.append(
        Paragraph(
            "Proin porttitor, orci nec nonummy molestie, enim est eleifend "
            "mi, non fermentum diam nisl sit amet erat. Duis semper. "
            "Duis arcu massa, scelerisque vitae, consequat in, pretium a, "
            "enim. Pellentesque congue. Ut in risus volutpat libero pharetra tempor."
        )
    )
    body.append(
        Paragraph(
            "Cras vestibulum bibendum augue. Praesent egestas leo in pede. "
            "Praesent blandit odio eu enim. Pellentesque sed dui ut augue "
            "blandit sodales. Vestibulum ante ipsum primis in faucibus orci "
            "luctus et ultrices posuere cubilia Curae; Aliquam nibh."
        )
    )

    body.append(Header(2, "part C"))
    body.append(
        Paragraph(
            "Mauris ac mauris sed pede pellentesque fermentum. "
            "Maecenas adipiscing ante non diam sodales hendrerit. Ut "
            "velit mauris, egestas sed, gravida nec, ornare ut, mi."
        )
    )
    body.append(
        Paragraph(
            "Aenean ut orci vel massa suscipit pulvinar. Nulla sollicitudin. "
            "Fusce varius, ligula non tempus aliquam, nunc turpis "
            "ullamcorper nibh, in tempus sapien eros vitae ligula. "
            "Pellentesque rhoncus nunc et augue. Integer id felis. Curabitur "
            "aliquet pellentesque diam. Integer quis metus vitae elit "
            "lobortis egestas."
        )
    )
    body.append(Header(2, "part D"))
    body.append(
        Paragraph(
            "Morbi vel erat non mauris convallis vehicula. Nulla et sapien. "
            "Integer tortor tellus, aliquam faucibus, convallis id, congue "
            "eu, quam. Mauris ullamcorper felis vitae erat."
            "Proin feugiat, augue non elementum posuere, metus purus "
            "iaculis lectus, et tristique ligula justo vitae magna. Aliquam "
            "convallis sollicitudin purus."
        )
    )
    body.append(
        Paragraph(
            "Praesent aliquam, enim at fermentum mollis, ligula massa "
            "adipiscing nisl, ac euismod nibh nisl eu lectus. Fusce "
            "vulputate sem at sapien. Vivamus leo. Aliquam euismod "
            "libero eu enim. Nulla nec felis sed leo placerat imperdiet."
        )
    )
    body.append(
        Paragraph(
            "Aenean suscipit nulla in justo. Suspendisse cursus rutrum augue. "
            "Nulla tincidunt tincidunt mi. Curabitur iaculis, lorem vel "
            "rhoncus faucibus, felis magna fermentum augue, et ultricies "
            "lacus lorem varius purus. Curabitur eu amet."
        )
    )
    return document


def keep_element(state: KeepingState, elem: Element) -> bool:
    # keep everythin until "part B"
    if state.step == "before":
        if isinstance(elem, Header) and "part B" in str(elem):
            state.step = "deleting"
    # delete everythin until paragraph strating with "Aenean"
    if state.step == "deleting":
        if isinstance(elem, Paragraph) and str(elem).startswith("Aenean"):
            state.step = "after"
    return state.step != "deleting"


def main():
    document = create_base_document()
    save_new(document, TARGET_INITIAL)
    # Removing part B and half the part C
    state = KeepingState("before")
    keep_list = []
    for elem in document.body.children:
        if keep_element(state, elem):
            keep_list.append(elem)
    document.body.clear()
    document.body.extend(keep_list)
    save_new(document, TARGET_FINAL)
    test_unit(document)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    text0 = str(document.body.get_paragraph(position=0))
    print(text0)
    assert text0.startswith("Lorem")
    text1 = str(document.body.get_paragraph(position=3))
    print(text1)
    assert text1.startswith("Morbi")


if __name__ == "__main__":
    main()

Create color chart in spreadsheet

Create some color chart in a spreadsheet using cells styles. (adapted from the odfdo library test cases)

recipes/create_color_chart_in_spreadsheet.py
#!/usr/bin/env python
"""Create some color chart in a spreadsheet using cells styles.
(adapted from the odfdo library test cases)
"""

from pathlib import Path

from odfdo import (
    Cell,
    Document,
    Row,
    Style,
    Table,
    __version__,
    create_table_cell_style,
    make_table_cell_border_string,
    rgb2hex,
)

_DOC_SEQUENCE = 420
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "chart"
TARGET = "color_chart.ods"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path)


def hello_messages():
    print("odfdo installation test")
    print(f" Version           : {__version__}")
    print()
    print(f"Generating color chart in {TARGET}")
    print("...")


def generate_chart():
    document = Document("spreadsheet")
    body = document.body
    body.clear()
    table = Table("chart")

    for y in range(0, 256, 8):
        row = Row()
        for x in range(0, 256, 32):
            cell_value = (x, y, (x + y) % 256)
            border_rl = make_table_cell_border_string(thick="0.20cm", color="white")
            border_bt = make_table_cell_border_string(
                thick="0.80cm",
                color="white",
            )
            style = create_table_cell_style(
                color="grey",
                background_color=cell_value,
                border_right=border_rl,
                border_left=border_rl,
                border_bottom=border_bt,
                border_top=border_bt,
            )
            name = document.insert_style(style=style, automatic=True)
            cell = Cell(value=rgb2hex(cell_value), style=name)
            row.append_cell(cell)
        table.append_row(row)

        row_style = Style("table-row", height="1.80cm")
        name_style_row = document.insert_style(style=row_style, automatic=True)
        for row in table.rows:
            row.style = name_style_row
            table.set_row(row.y, row)

        col_style = Style("table-column", width="3.6cm")
        name = document.insert_style(style=col_style, automatic=True)
        for column in table.columns:
            column.style = col_style
            table.set_column(column.x, column)

    body.append(table)

    return document


def main():
    hello_messages()
    document = generate_chart()
    save_new(document, TARGET)


if __name__ == "__main__":
    main()

Get cell background color

Read the background color of a table cell.

recipes/get_cell_background_color.py
#!/usr/bin/env python
"""Read the background color of a table cell.
"""
import os
import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 440
DATA = Path(__file__).parent / "data"
SOURCE = "cell_color.ods"


def read_source_document():
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def main():
    doc = read_source_document()

    # reading color from the table 0 (first sheet)
    color_b2 = doc.get_cell_background_color(0, "b2")
    print("Color for B2", color_b2)

    color_b3 = doc.get_cell_background_color(0, "b3")
    print("Color for B3", color_b3)

    color_c3 = doc.get_cell_background_color(0, "c3")
    print("Color for C3", color_c3)

    # default is "#ffffff"
    color_d3 = doc.get_cell_background_color(0, "d3")
    print("Color for D3", color_d3)

    # set another default
    color_e3 = doc.get_cell_background_color(0, "e3", "#123456")
    print("Color for e3", color_e3)

    # read very far cell
    color_far = doc.get_cell_background_color(0, (1000, 10000))
    print("Color for far", color_far)

    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    assert color_b2 == "#2a6099"
    assert color_b3 == "#ff4000"
    assert color_c3 == "#ffff00"
    assert color_d3 == "#ffffff"
    assert color_e3 == "#123456"
    assert color_far == "#ffffff"


if __name__ == "__main__":
    main()

Extract a sub table from some big table

Create a table of 1000 lines and 100 columns, extract a sub table of 100 lines 26 columns, save the result in a spreadsheet document.

recipes/extract_a_sub_table_from_some_big_table.py
#!/usr/bin/env python
"""Create a table of 1000 lines and 100 columns, extract a sub table
of 100 lines 26 columns, save the result in a spreadsheet document.
"""
import os
from pathlib import Path

from odfdo import Document, Row, Table

_DOC_SEQUENCE = 450
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "extract_table"
TARGET = "document.ods"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def syracuse(n: int) -> int:
    if n % 2 == 0:
        return n // 2
    return 3 * n + 1


def generate_big_table(table_name) -> Document:
    spreadsheet = Document("spreadsheet")
    body = spreadsheet.body
    body.clear()
    table = Table(table_name)
    body.append(table)

    lines = 1000
    cols = 100

    for line in range(lines):
        row = Row()
        values = []
        n = line
        for _i in range(cols):
            values.append(n)
            n = syracuse(n)
        row.set_values(values)
        table.append(row)

    return spreadsheet


def main():
    table_name = "Big Table"
    spreadsheet = generate_big_table(table_name)
    body = spreadsheet.body
    big_table = body.get_table(name=table_name)
    print("Size of Big Table :", big_table.size)

    # now extract 100 rows of 26 columns :
    table1 = Table("Extract 1")
    for r in range(800, 900):
        row = big_table.get_row(r)
        extracted_values = [row.get_value(x) for x in range(50, 76)]
        new_row = Row()
        new_row.set_values(extracted_values)
        table1.append(new_row)
    body.append(table1)
    print("Size of extracted table 1 :", table1.size)

    # other method
    table2 = Table("Extract 2")
    cells = big_table.get_cells(coord=(50, 800, 75, 899))
    table2.set_cells(coord=(0, 0), cells=cells)
    body.append(table2)
    print("Size of extracted table 2 :", table2.size)

    test_unit(spreadsheet)

    save_new(spreadsheet, TARGET)

    _expected_result = """
Size of Big Table : (100, 1000)
Size of extracted table 1 : (26, 100)
Size of extracted table 2 : (26, 100)
"""


def test_unit(spreadsheet: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    body = spreadsheet.body
    table1 = body.get_table(position=0)
    assert table1.size == (100, 1000)
    table2 = body.get_table(position=1)
    assert table2.size == (26, 100)


if __name__ == "__main__":
    main()

Make a basic spreadsheet

Create a spreadsheet with one table and a few data, strip the table and compute the table size.

recipes/make_a_basic_spreadsheet.py
#!/usr/bin/env python
"""Create a spreadsheet with one table and a few data, strip the table
and compute the table size.
"""
from pathlib import Path

from odfdo import Document, Table

_DOC_SEQUENCE = 460
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_ods"
TARGET = "spreadsheet.ods"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = generate_document()
    save_new(document, TARGET)


def generate_document():
    # creating an empty spreadsheet document:
    document = Document("spreadsheet")

    # Each sheet of a spreadsheet is a table:
    # setting drom the beginning width (columns) and height (rows)
    # is not mandatory, but a good practice, since odfdo don't check
    # actual existence of cells
    body = document.body
    body.clear()
    table = Table("First Table", width=20, height=3)
    body.append(table)

    # A table contains rows, we can append some more.
    for _ in range(2):
        table.append_row()
    print("rows in the table (3+2):", len(table.rows))

    #  A row contains cells
    for row in table.rows:
        print("row, nb of cells ", row.y, len(row.cells))

    last_row = table.get_row(-1)
    print("nb of cells of the last row:", len(last_row.cells))

    # cell can have different kind of values
    for row_nb in range(3):
        for col_nb in range(10):
            table.set_value((col_nb, row_nb), f"cell {col_nb} {row_nb}")
    for row_nb in range(3, 5):
        for col_nb in range(10):
            table.set_value((col_nb, row_nb), col_nb * 100 + row_nb)

    # Before saving the document,  we can strip the unused colums:
    print("table size:", table.size)
    table.rstrip()
    print("table size after strip:", table.size)
    print("nb of cells of the last row:", len(table.get_row(-1).cells))
    print("Content of the table:")
    print(table.to_csv())

    return document


if __name__ == "__main__":
    main()

Make spreadsheet with named ranges

Create a spreadsheet with two tables, using some named ranges.

recipes/make_spreadsheet_with_named_ranges.py
#!/usr/bin/env python
"""Create a spreadsheet with two tables, using some named ranges.
"""
from pathlib import Path

from odfdo import Document, Table

_DOC_SEQUENCE = 470
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "named_range"
TARGET = "spreadsheet.ods"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = generate_document()
    save_new(document, TARGET)


def generate_document():
    document = Document("spreadsheet")
    body = document.body
    body.clear()
    table = Table("First Table")
    body.append(table)
    # populate the table :
    for index in range(10):
        table.set_value((1, index), (index + 1) ** 2)
    table.set_value("A11", "Total:")

    # lets define a named range for the 10 values :
    range_squares = "B1:B10"
    name = "squares_values"
    table_name = table.name
    table.set_named_range(name, range_squares, table_name)

    # we can define a single cell range, using notation "B11" or (1, 10) :
    table.set_named_range("total", (1, 10), table_name)

    # get named range values :
    values = table.get_named_range("squares_values").get_values(flat=True)

    # set named range value :
    result = sum(values)
    table.get_named_range("total").set_value(result)

    # lets use the named ranges from a second table :
    table2 = Table("Second Table")
    body.append(table2)

    named_range1 = table2.get_named_range("total")
    table2.set_value("A1", "name:")
    table2.set_value("B1", named_range1.name)
    table2.set_value("A2", "range:")
    table2.set_value("B2", str(named_range1.crange))
    table2.set_value("A3", "from table:")
    table2.set_value("B3", named_range1.table_name)
    table2.set_value("A4", "content:")
    table2.set_value("B4", named_range1.get_value())

    named_range2 = table2.get_named_range("squares_values")
    table2.set_value("D1", "name:")
    table2.set_value("E1", named_range2.name)
    table2.set_value("D2", "range:")
    table2.set_value("E2", str(named_range2.crange))
    table2.set_value("D3", "from table:")
    table2.set_value("E3", named_range2.table_name)
    table2.set_value("D4", "content:")
    # using "E4:4" notaion is a little hack for the area starting at E4 on row 4
    table2.set_values(values=[named_range2.get_values(flat=True)], coord="E4:4")

    print("Content of the table1:")
    print(table.name)
    print(table.to_csv())
    print(table2.name)
    print(table2.to_csv())

    # of course named ranges are stored in the document :
    return document


if __name__ == "__main__":
    main()

Introspecting elements

Demo of quick introspecting of a document’s elements.

recipes/introspecting_elements.py
#!/usr/bin/env python
"""Demo of quick introspecting of a document's elements.
"""
import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 480
DATA = Path(__file__).parent / "data"
# ODF export of Wikipedia article Hitchhiker's Guide to the Galaxy (CC-By-SA) :
SOURCE = "collection2.odt"


def read_source_document():
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def main():
    document = read_source_document()

    # The body object is an XML element from which we can access one or several
    # other elements we are looking for.
    body = document.body

    # Should you be lost, remember elements are part of an XML tree:
    para = body.get_paragraph(position=42)
    print("Children of the praragraph:\n   ", para.children)
    print("\nParent of the paragraph:\n   ", para.parent)

    # And you can introspect any element as serialized XML:
    link0 = body.get_link(position=0)
    print("\nContent of the serialization link:")
    print("   ", link0.serialize())
    print("\nWhich is different from the text content of the link:")
    print("   ", str(link0))


if __name__ == "__main__":
    main()

Show meta data

Print the metadata informations of a ODF file.

recipes/show_meta_data.py
#!/usr/bin/env python
"""Print the metadata informations of a ODF file.
"""
import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 490
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"


def read_source_document():
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def main():
    document = read_source_document()

    # Metadata are accessible through the meta part:
    # meta = document.get_part("meta.xml")
    # or the shortcut:
    meta = document.meta

    # You then get access to various getters and setters. The getters return
    # Python types and the respective setters take the same Python type as
    # a parameter.
    #
    # Here are the output of the get_xxx methods for metadata.
    # (Notice that odfdo doesn't increment editing cycles nor statistics
    # when saving the document.
    # For the metadata using dates or durations, lpOD provides datatypes that
    # decode from and serialize back to strings.
    # Strings are always decoded as unicode, numeric values are always decoded
    # as Decimal (as they offer the best precision).

    print(f"Meta data of {document.container.path}")
    # print("Title                :", meta.get_title())
    print("Title                :", meta.title)
    # print("creator              :", meta.get_creator())
    print("creator              :", meta.creator)
    # print("creation date        :", meta.get_creation_date())
    print("creation date        :", meta.creation_date)
    # print("modification date    :", meta.get_modification_date())
    print("modification date    :", meta.date)
    # print("initial creator      :", meta.get_initial_creator())
    print("initial creator      :", meta.initial_creator)
    # print("subject              :", meta.get_subject())
    print("subject              :", meta.subject)
    # print("description          :", meta.get_description())
    print("description          :", meta.description)
    # print("editing cycles       :", meta.get_editing_cycles())
    print("editing cycles       :", meta.editing_cycles)
    # print("editing duration     :", meta.get_editing_duration())
    print("editing duration     :", meta.editing_duration)
    # print("generator            :", meta.get_generator())
    print("generator            :", meta.generator)
    # print("language             :", meta.get_language())
    print("language             :", meta.language)
    print("keywords             :", meta.keyword)
    print("statistics    ")
    if meta.statistic is not None:
        for key, value in meta.statistic.items():
            print(f"   {key[5:]:<18}: {value}")
    user_defined = meta.user_defined_metadata
    if user_defined:
        print("user defined metadata")
        for key, value in user_defined.items():
            print(f"   {key[5:]:<18}: {value}")

    # A quick way to have all of those informations:
    print("-" * 70)
    print(document.get_formated_meta())


if __name__ == "__main__":
    main()

Remove all links from a document, transforming each link information (URL, text) into a footnote. Of course, removing links already inside notes, just keeping plain text URL. (Side note: most office suite dislike notes in notes)

recipes/move_link_to_footnote.py
#!/usr/bin/env python
"""Remove all links from a document, transforming each link information (URL,
text) into a footnote. Of course, removing links already inside notes, just
keeping plain text URL. (Side note: most office suite dislike notes in notes)
"""

import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 500
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "footnote1"
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def remove_links(element):
    tag = "text:a"
    keep_inside_tag = "None"
    context = (tag, keep_inside_tag, False)
    element, _is_modified = _tree_remove_tag(element, context)


def main():
    try:
        source = Path(sys.argv[1])
    except IndexError:
        source = DATA / SOURCE

    document = Document(str(source))
    body = document.body

    print("Moving links to footnotes from", source)
    print("links occurrences:", len(body.get_links()))
    print("footnotes occurences:", len(body.get_notes()))

    counter_links_in_notes = 0
    for note in body.get_notes():
        for link in note.get_links():
            counter_links_in_notes += 1
            url = link.get_attribute("xlink:href")
            tail = link.tail
            new_tail = f" (link: {url}) {tail}"
            link.tail = new_tail
            remove_links(note)

    print("links in notes:", counter_links_in_notes)

    counter_added_note = 0  # added notes counter
    for paragraph in body.paragraphs:
        for link in paragraph.get_links():
            url = link.get_attribute("xlink:href")
            text = link.inner_text
            counter_added_note += 1
            paragraph.insert_note(
                after=link,  # citation is inserted after current link
                note_id=f"my_note_{counter_added_note}",
                citation="1",  # The symbol the user sees to follow the footnote.
                # The footnote itself, at the end of the page:
                body=(f". {text}, link: {url}"),
            )
        remove_links(paragraph)

    print("links occurrences:", len(body.get_links()))
    print("footnotes occurences:", len(body.get_notes()))

    save_new(document, TARGET)


def _tree_remove_tag(element, context):
    """Remove tag in the element, recursive.
    - context: tuple (tag to remove, protection tag, protection flag)
    where protection tag protect from change sub elements one sub
    level depth"""
    buffer = element.clone
    modified = False
    sub_elements = []
    tag, keep_inside_tag, protected = context
    if keep_inside_tag and element.tag == keep_inside_tag:
        protect_below = True
    else:
        protect_below = False
    for child in buffer.children:
        striped, is_modified = _tree_remove_tag(
            child, (tag, keep_inside_tag, protect_below)
        )
        if is_modified:
            modified = True
        if isinstance(striped, list):
            for item in striped:
                sub_elements.append(item)
        else:
            sub_elements.append(striped)
    if not protected and element.tag == tag:
        element = []
        modified = True
    else:
        if not modified:
            # no change in element sub tree, no change on element
            return (element, False)
        element.clear()
        try:
            for key, value in buffer.attributes.items():
                element.set_attribute(key, value)
        except ValueError:
            print("Incorrect attribute in", buffer)
    text = buffer.text
    tail = buffer.tail
    if text is not None:
        element.append(text)
    for child in sub_elements:
        element.append(child)
    if tail is not None:
        if isinstance(element, list):
            element.append(tail)
        else:
            element.tail = tail
    return (element, True)


if __name__ == "__main__":
    main()

Remove the links (the text:a tag), keeping the inner text.

recipes/remove_http_links.py
#!/usr/bin/env python
"""Remove the links (the text:a tag), keeping the inner text."""

import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 510
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "nolink"
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    try:
        source = Path(sys.argv[1])
    except IndexError:
        source = DATA / SOURCE

    document = Document(str(source))
    body = document.body

    print("Removing links from", source)
    print("'text:a' occurrences:", len(body.get_links()))

    remove_links(body)
    print("'text:a' occurrences after removal:", len(body.get_links()))

    save_new(document, TARGET)


def remove_links(element):
    tag = "text:a"
    keep_inside_tag = "None"
    context = (tag, keep_inside_tag, False)
    element, _is_modified = _tree_remove_tag(element, context)


def _tree_remove_tag(element, context):
    """Remove tag in the element, recursive.

    - context: a tuple (tag to remove, protection tag, protection flag)
    where protection tag protect from change sub elements one sub level depth
    """
    buffer = element.clone
    modified = False
    sub_elements = []
    tag, keep_inside_tag, protected = context
    if keep_inside_tag and element.tag == keep_inside_tag:
        protect_below = True
    else:
        protect_below = False
    for child in buffer.children:
        striped, is_modified = _tree_remove_tag(
            child, (tag, keep_inside_tag, protect_below)
        )
        if is_modified:
            modified = True
        if isinstance(striped, list):
            for item in striped:
                sub_elements.append(item)
        else:
            sub_elements.append(striped)
    if not protected and element.tag == tag:
        element = []
        modified = True
    else:
        if not modified:
            # no change in element sub tree, no change on element
            return (element, False)
        element.clear()
        try:
            for key, value in buffer.attributes.items():
                element.set_attribute(key, value)
        except ValueError:
            print("Bad attribute in", buffer)
    text = buffer.text
    tail = buffer.tail
    if text is not None:
        element.append(text)
    for child in sub_elements:
        element.append(child)
    if tail is not None:
        if isinstance(element, list):
            element.append(tail)
        else:
            element.tail = tail
    return (element, True)


if __name__ == "__main__":
    main()

Remove span styles

Remove span styles (like some words in bold in a paragraph), except in titles.

recipes/remove_span_styles.py
#!/usr/bin/env python
"""Remove span styles (like some words in bold in a paragraph),
except in titles.
"""

import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 520
DATA = Path(__file__).parent / "data"
SOURCE = "dormeur.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "nostyle"
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def remove_text_span(element):
    tag = "text:span"
    keep_inside_tag = "text:h"
    context = (tag, keep_inside_tag, False)
    element, _is_modified = _tree_remove_tag(element, context)


def main():
    try:
        source = Path(sys.argv[1])
    except IndexError:
        source = DATA / SOURCE

    document = Document(str(source))
    body = document.body

    print("Removing span styles from", source.name)
    print("'text:span' occurrences:", len(body.spans))

    remove_text_span(body)
    print("'text:span' occurrences after removal:", len(body.spans))

    save_new(document, TARGET)


def _tree_remove_tag(element, context):
    """Send back a copy of the element, without span styles. Element should be
    either paragraph or heading.

    - context: a tuple (tag to remove, protection tag, protection flag)
    where protection tag protects from change any sub elements one level depth
    """
    buffer = element.clone
    modified = False
    sub_elements = []
    tag, keep_inside_tag, protected = context
    if keep_inside_tag and element.tag == keep_inside_tag:
        protect_below = True
    else:
        protect_below = False
    for child in buffer.children:
        striped, is_modified = _tree_remove_tag(
            child, (tag, keep_inside_tag, protect_below)
        )
        if is_modified:
            modified = True
        if isinstance(striped, list):
            for item in striped:
                sub_elements.append(item)
        else:
            sub_elements.append(striped)
    if not protected and element.tag == tag:
        element = []
        modified = True
    else:
        if not modified:
            # no change in element sub tree, no change on element
            return (element, False)
        element.clear()
        try:
            for key, value in buffer.attributes.items():
                element.set_attribute(key, value)
        except ValueError:
            print("Bad attribute in", buffer)
    text = buffer.text
    tail = buffer.tail
    if text is not None:
        element.append(text)
    for child in sub_elements:
        element.append(child)
    if tail is not None:
        if isinstance(element, list):
            element.append(tail)
        else:
            element.tail = tail
    return (element, True)


if __name__ == "__main__":
    main()

Retrieve all pictures from odf files

Analyse a list of files and directory (recurse), open all ODF documents and copy pictures from documents in a directory.

recipes/retrieve_all_pictures_from_ODF_files.py
#!/usr/bin/env python
"""Analyse a list of files and directory (recurse), open all ODF documents
and copy pictures from documents in a directory.
"""
import sys
import time
from hashlib import sha256
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 530
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "collected_pics"
DATA = Path(__file__).parent / "data"

# encoding = "UTF8"
known_images = set()
counter_image = 0
counter_odf = 0
counter_outside = 0


def store_image(path, name, content):
    """Image new name is "odffile_imagename"."""
    global counter_image

    base = path.name.replace(".", "_")
    cpt = 1
    if not OUTPUT_DIR.is_dir():
        OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    target = OUTPUT_DIR / f"{base}_{cpt}_{name}"
    while target.exists():
        cpt += 1
        target = OUTPUT_DIR / f"{base}_{cpt}_{name}"
    target.write_bytes(content)
    counter_image += 1


def parse_odf_pics(path: Path):
    """Using odfdo for:
    - open possible ODF document: Document (including URI)
    - find images inside the document: get_image_list, get_attribute
    """
    if not path.suffix.lower().startswith(".od"):
        return
    try:
        document = Document(path)
    except Exception:
        return

    global counter_odf
    global counter_outside

    counter_odf += 1
    for image in document.body.images:
        image_url = image.url
        if not image_url:
            continue
        try:
            image_content = document.get_part(image_url)
        except KeyError:
            print("- not found inside document:", path)
            print("  image URL:", image_url)
            counter_outside += 1
            continue
        image_name = image_url.split("/")[-1]
        if not known_pic(image_content):
            store_image(path, image_name, image_content)


def known_pic(content) -> bool:
    """Remember already seen images by sha256 footprint."""
    footprint = sha256(content).digest()
    if footprint in known_images:
        return True
    known_images.add(footprint)
    return False


def analyse_document(source):
    for path in source.glob("**/*"):
        if path.is_file():
            parse_odf_pics(path)


def main():
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA

    t0 = time.time()
    analyse_document(Path(source))
    elapsed = time.time() - t0
    print(
        f"{counter_image} images copied ({counter_outside} not found) from "
        f"{counter_odf} ODF files to {OUTPUT_DIR} in {elapsed:.2f}sec."
    )


if __name__ == "__main__":
    main()

Read document from bytesio

Read a document from BytesIO.

recipes/read_document_from_bytesio.py
#!/usr/bin/env python
"""Read a document from BytesIO.
"""
import io
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 600
DATA = Path(__file__).parent / "data"
SOURCE = "lorem.odt"


def main():
    file_path = DATA / SOURCE
    with io.BytesIO() as bytes_content:
        # read the file in the BytesIO (or read from some network)
        bytes_content.write(file_path.read_bytes())
        # Create the odfdo.Document from the BytesIO
        bytes_content.seek(0)
        document = Document(bytes_content)
        # check :
        if document.body.search("Lorem ipsum dolor sit amet") is None:
            raise ValueError("string not found")


if __name__ == "__main__":
    main()

Save document as bytesio

Save a document as BytesIO.

recipes/save_document_as_bytesio.py
#!/usr/bin/env python
"""Save a document as BytesIO.
"""
import io
from pathlib import Path

from odfdo import Document, Paragraph

_DOC_SEQUENCE = 605
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "bytes"
TARGET = "document.odt"


def make_document():
    document = Document("text")
    body = document.body
    paragraph = Paragraph("Hello World")
    body.append(paragraph)
    return document


def main():
    document = make_document()
    with io.BytesIO() as bytes_content:
        document.save(bytes_content)
        # Now use the BytesIO in some way:
        # In a netwotk context, typically:
        #    response.write(bytes_content.getvalue())
        OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
        with open(OUTPUT_DIR / TARGET, "wb") as file:
            file.write(bytes_content.getvalue())


if __name__ == "__main__":
    main()

Search and replace words

Search and replace words in a text document.

recipes/search_and_replace_words.py
#!/usr/bin/env python
"""Search and replace words in a text document.
"""
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 700
DATA = Path(__file__).parent / "data"
SOURCE = "lorem.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "replaced_text"
TARGET = "lorem_replaced.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def search_replace(document):
    body = document.body

    # replace a string in the full document
    body.replace("Lorem", "(Lorem replaced)")

    # replace in paragraphs only
    for paragraph in body.paragraphs:
        paragraph.replace("ipsum", "(ipsum in paragraph)")

    # replace in headers
    for header in body.headers:
        header.replace("ipsum", "(ipsum in header)")

    # pattern is a regular expression
    body.replace(r"\S+lit ", "(...lit) ")
    body.replace(r"pul[a-z]+", "(pulvinar)")


def main():
    document = Document(DATA / SOURCE)
    search_replace(document)
    save_new(document, TARGET)


if __name__ == "__main__":
    main()

Spreadsheet with words frequency from a text

Load an ODF text, store the frequency of words in a spreadsheet, make requests on the table, by regex or value.

recipes/spreadsheet_with_words_frequency_from_a_text.py
#!/usr/bin/env python
"""Load an ODF text, store the frequency of words in a spreadsheet,
make requests on the table, by regex or value.
"""
import sys
from pathlib import Path

from odfdo import Document, Table

_DOC_SEQUENCE = 710
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "freq"
SOURCE = "collection2.odt"
DATA = Path(__file__).parent / "data"
TARGET = "frequency.ods"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def read_source_document():
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def main():
    document = generate_document()
    save_new(document, TARGET)
    _expected_result = """
    Word frequency analysis of collection2.odt
    Nb of words: 9128
    Unique words found: 2337
    Rows in the table : 2337
    Words corresponding to the regex: ^the
      word: the                   occurences: 644
      word: they                  occurences: 15
      word: their                 occurences: 11
      word: then                  occurences: 10
      word: there                 occurences: 7
      word: these                 occurences: 4
      word: them                  occurences: 4
      word: themselves            occurences: 2
      word: theme                 occurences: 2
      word: themed                occurences: 1
      word: theatrical            occurences: 1
    List of words of frequency 15: two, they, release, one, its, his, film,
    episodes, but, adaptation, UK, Radio, J, 0
"""


def frequence_count(document):
    print("Word frequency analysis of", Path(document.container.path).name)
    text = str(document.body)
    for char in "():;!.,[]{}#@/\\=-_+*#@`\"'":
        text = text.replace(char, " ")  # slow algorithm
    words = text.split()
    print("Nb of words:", len(words))

    frequences = {}

    for word in words:
        frequences[word] = frequences.get(word, 0) + 1

    print("Unique words found:", len(frequences))
    return frequences


def generate_document():
    document_source = read_source_document()
    spreadsheet = Document("spreadsheet")

    frequences = frequence_count(document_source)

    # Populate the table in the spreadsheet
    body = spreadsheet.body
    body.clear()
    table = Table("Frequency Table")
    body.append(table)

    sorted_keys = reversed([(value, key) for key, value in frequences.items()])

    # one solution :

    # for value, key in sorted:
    #    row = Row()
    #    row.set_value(0, key)
    #    row.set_value(1, value) # Cell type is guessed.
    #    table.append_row(row)

    # another solution :
    sorted_keys = [(k, v) for (v, k) in sorted_keys]
    table.set_values(sorted_keys)

    print("Rows in the table :", len(table.rows))

    # frequency of word:
    regex_query = "^the"
    print("Words corresponding to the regex:", regex_query)
    result = table.get_rows(content=regex_query)
    for row in result:
        print(f"  word: {row.get_value(0):<20}  occurences: {row.get_value(1)}")

    # list of words of frequecy = 15
    found = []
    for word, freq in table.iter_values():
        if freq == 15:
            found.append(word)
    print("List of words of frequency 15:", ", ".join(found))
    return spreadsheet


if __name__ == "__main__":
    main()

Transpose table

Transpose a table. Create a spreadsheet table (example: 50 rows and 20 columns), and subsequently create a new table in a separate sheet where the columns and rows are now swapped (e.g. 20 rows and 50 columns).

recipes/transpose_table.py
#!/usr/bin/env python
"""Transpose a table. Create a spreadsheet table (example: 50 rows and 20
columns), and subsequently create a new table in a separate sheet where the
columns and rows are now swapped (e.g. 20 rows and 50 columns).
"""
from pathlib import Path

from odfdo import Document, Row, Table

_DOC_SEQUENCE = 800
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "transpose"
TARGET = "transposed.ods"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = generate_document()
    save_new(document, TARGET)


def generate_document():
    spreadsheet = Document("spreadsheet")

    # Populate the table in the spreadsheet
    body = spreadsheet.body
    body.clear()
    table = Table("Table")
    body.append(table)

    lines = 50
    cols = 20

    for line in range(lines):
        row = Row()
        for column in range(cols):
            row.set_value(column, f"{chr(65 + column)}{line + 1}")
        table.append(row)

    print("Size of Table :", table.size)

    table2 = Table("Symetry")

    # building the symetric table using classical method :
    for x in range(cols):
        values = table.get_column_values(x)
        table2.set_row_values(x, values)
    body.append(table2)

    print("Size of symetric table 2 :", table2.size)

    # a more simple solution with the table.transpose() method :
    table3 = table.clone
    table3.transpose()
    table3.name = "Transpose"
    body.append(table3)

    print("Size of symetric table 3 :", table3.size)
    return spreadsheet


if __name__ == "__main__":
    main()