Coverage for src/pdfbaker/document.py: 89%
114 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-20 04:55 +1200
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-20 04:55 +1200
1"""PDFBakerDocument class.
3Document-level processing, variants, custom bake modules.
5Delegates the jobs of rendering and converting to its pages,
6combines and compresses the result and reports back to its baker.
7"""
9import importlib
10import os
11from pathlib import Path
12from typing import Any
14from .config import (
15 PDFBakerConfiguration,
16 deep_merge,
17 render_config,
18)
19from .errors import (
20 ConfigurationError,
21 PDFBakerError,
22 PDFCombineError,
23 PDFCompressionError,
24)
25from .logging import LoggingMixin
26from .page import PDFBakerPage
27from .pdf import (
28 combine_pdfs,
29 compress_pdf,
30)
32DEFAULT_DOCUMENT_CONFIG = {
33 # Default to directories relative to the config file
34 "directories": {
35 "pages": "pages",
36 "templates": "templates",
37 "images": "images",
38 },
39}
40DEFAULT_DOCUMENT_CONFIG_FILE = "config.yaml"
42__all__ = ["PDFBakerDocument"]
45class PDFBakerDocument(LoggingMixin):
46 """A document being processed."""
48 class Configuration(PDFBakerConfiguration):
49 """PDFBaker document-specific configuration."""
51 def __init__(
52 self,
53 document: "PDFBakerDocument",
54 base_config: "PDFBakerConfiguration", # type: ignore # noqa: F821
55 config_path: Path,
56 ) -> None:
57 """Initialize document configuration.
59 Args:
60 base_config: The PDFBaker configuration to merge with
61 config_file: The document configuration (YAML file)
62 """
63 self.document = document
65 if config_path.is_dir():
66 self.name = config_path.name
67 config_path = config_path / DEFAULT_DOCUMENT_CONFIG_FILE
68 else:
69 self.name = config_path.stem
71 base_config = deep_merge(base_config, DEFAULT_DOCUMENT_CONFIG)
73 self.document.log_trace_section(
74 "Loading document configuration: %s", config_path
75 )
76 super().__init__(base_config, config_path)
77 self.document.log_trace(self.pretty())
79 self.bake_path = self["directories"]["config"] / "bake.py"
80 self.build_dir = self["directories"]["build"] / self.name
81 self.dist_dir = self["directories"]["dist"] / self.name
83 if "pages" not in self:
84 raise ConfigurationError(
85 'Document "{document.name}" is missing key "pages"'
86 )
87 self.pages = []
88 for page_spec in self["pages"]:
89 if isinstance(page_spec, dict) and "path" in page_spec:
90 # Path was specified: relative to the config file
91 page = self.resolve_path(
92 page_spec["path"], directory=self["directories"]["config"]
93 )
94 else:
95 # Only name was specified: relative to the pages directory
96 page = self.resolve_path(
97 page_spec, directory=self["directories"]["pages"]
98 )
99 if not page.suffix:
100 page = page.with_suffix(".yaml")
101 self.pages.append(page)
103 def __init__(
104 self,
105 baker: "PDFBaker", # type: ignore # noqa: F821
106 base_config: dict[str, Any],
107 config_path: Path,
108 ):
109 """Initialize a document."""
110 super().__init__()
111 self.baker = baker
112 self.config = self.Configuration(
113 document=self,
114 base_config=base_config,
115 config_path=config_path,
116 )
118 def process_document(self) -> tuple[Path | list[Path] | None, str | None]:
119 """Process the document - use custom bake module if it exists.
121 Returns:
122 Tuple of (pdf_files, error_message) where:
123 - pdf_files is a Path or list of Paths to the created PDF
124 files, or None if creation failed
125 FIXME: could have created SOME PDF files
126 - error_message is a string describing the error, or None if successful
127 """
128 self.log_info_section('Processing document "%s"...', self.config.name)
130 self.config.build_dir.mkdir(parents=True, exist_ok=True)
131 self.config.dist_dir.mkdir(parents=True, exist_ok=True)
133 try:
134 if self.config.bake_path.exists():
135 return self._process_with_custom_bake(self.config.bake_path), None
136 return self.process(), None
137 except PDFBakerError as exc:
138 return None, str(exc)
140 def _process_with_custom_bake(self, bake_path: Path) -> Path | list[Path]:
141 """Process document using custom bake module."""
142 try:
143 spec = importlib.util.spec_from_file_location(
144 f"documents.{self.config.name}.bake", bake_path
145 )
146 if spec is None or spec.loader is None:
147 raise PDFBakerError(
148 f"Failed to load bake module for document {self.config.name}"
149 )
150 module = importlib.util.module_from_spec(spec)
151 spec.loader.exec_module(module)
152 return module.process_document(document=self)
153 except Exception as exc:
154 raise PDFBakerError(
155 f"Failed to process document with custom bake: {exc}"
156 ) from exc
158 def process(self) -> Path | list[Path]:
159 """Process document using standard processing."""
160 if "variants" in self.config:
161 # Multiple PDF documents
162 pdf_files = []
163 for variant in self.config["variants"]:
164 self.log_info_subsection('Processing variant "%s"...', variant["name"])
165 variant_config = deep_merge(self.config, variant)
166 variant_config["variant"] = variant
167 variant_config = render_config(variant_config)
168 page_pdfs = self._process_pages(variant_config)
169 pdf_files.append(self._finalize(page_pdfs, variant_config))
170 return pdf_files
172 # Single PDF document
173 doc_config = render_config(self.config)
174 page_pdfs = self._process_pages(doc_config)
175 return self._finalize(page_pdfs, doc_config)
177 def _process_pages(self, config: dict[str, Any]) -> list[Path]:
178 """Process pages with given configuration."""
179 pdf_files = []
180 self.log_debug_subsection("Pages to process:")
181 self.log_debug(self.config.pages)
182 for page_num, page_config in enumerate(self.config.pages, start=1):
183 page = PDFBakerPage(
184 document=self,
185 page_number=page_num,
186 base_config=config,
187 config_path=page_config,
188 )
189 pdf_files.append(page.process())
191 return pdf_files
193 def _finalize(self, pdf_files: list[Path], doc_config: dict[str, Any]) -> Path:
194 """Combine PDF pages and optionally compress."""
195 self.log_debug_subsection("Finalizing document...")
196 self.log_debug("Combining PDF pages...")
197 try:
198 combined_pdf = combine_pdfs(
199 pdf_files,
200 self.config.build_dir / f"{doc_config['filename']}.pdf",
201 )
202 except PDFCombineError as exc:
203 raise PDFBakerError(f"Failed to combine PDFs: {exc}") from exc
205 output_path = self.config.dist_dir / f"{doc_config['filename']}.pdf"
207 if doc_config.get("compress_pdf", False):
208 self.log_debug("Compressing PDF document...")
209 try:
210 compress_pdf(combined_pdf, output_path)
211 self.log_info("PDF compressed successfully")
212 except PDFCompressionError as exc:
213 self.log_warning(
214 "Compression failed, using uncompressed PDF: %s",
215 exc,
216 )
217 os.rename(combined_pdf, output_path)
218 else:
219 os.rename(combined_pdf, output_path)
221 self.log_info("Created %s", output_path.name)
222 return output_path
224 def teardown(self) -> None:
225 """Clean up build directory after processing."""
226 self.log_debug_subsection(
227 "Tearing down build directory: %s", self.config.build_dir
228 )
229 if self.config.build_dir.exists():
230 self.log_debug("Removing files in build directory...")
231 for file_path in self.config.build_dir.iterdir():
232 if file_path.is_file():
233 file_path.unlink()
235 try:
236 self.log_debug("Removing build directory...")
237 self.config.build_dir.rmdir()
238 except OSError:
239 self.log_warning("Build directory not empty - not removing")