Coverage for src/pdfbaker/document.py: 89%

114 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-20 04:55 +1200

1"""PDFBakerDocument class. 

2 

3Document-level processing, variants, custom bake modules. 

4 

5Delegates the jobs of rendering and converting to its pages, 

6combines and compresses the result and reports back to its baker. 

7""" 

8 

9import importlib 

10import os 

11from pathlib import Path 

12from typing import Any 

13 

14from .config import ( 

15 PDFBakerConfiguration, 

16 deep_merge, 

17 render_config, 

18) 

19from .errors import ( 

20 ConfigurationError, 

21 PDFBakerError, 

22 PDFCombineError, 

23 PDFCompressionError, 

24) 

25from .logging import LoggingMixin 

26from .page import PDFBakerPage 

27from .pdf import ( 

28 combine_pdfs, 

29 compress_pdf, 

30) 

31 

32DEFAULT_DOCUMENT_CONFIG = { 

33 # Default to directories relative to the config file 

34 "directories": { 

35 "pages": "pages", 

36 "templates": "templates", 

37 "images": "images", 

38 }, 

39} 

40DEFAULT_DOCUMENT_CONFIG_FILE = "config.yaml" 

41 

42__all__ = ["PDFBakerDocument"] 

43 

44 

45class PDFBakerDocument(LoggingMixin): 

46 """A document being processed.""" 

47 

48 class Configuration(PDFBakerConfiguration): 

49 """PDFBaker document-specific configuration.""" 

50 

51 def __init__( 

52 self, 

53 document: "PDFBakerDocument", 

54 base_config: "PDFBakerConfiguration", # type: ignore # noqa: F821 

55 config_path: Path, 

56 ) -> None: 

57 """Initialize document configuration. 

58 

59 Args: 

60 base_config: The PDFBaker configuration to merge with 

61 config_file: The document configuration (YAML file) 

62 """ 

63 self.document = document 

64 

65 if config_path.is_dir(): 

66 self.name = config_path.name 

67 config_path = config_path / DEFAULT_DOCUMENT_CONFIG_FILE 

68 else: 

69 self.name = config_path.stem 

70 

71 base_config = deep_merge(base_config, DEFAULT_DOCUMENT_CONFIG) 

72 

73 self.document.log_trace_section( 

74 "Loading document configuration: %s", config_path 

75 ) 

76 super().__init__(base_config, config_path) 

77 self.document.log_trace(self.pretty()) 

78 

79 self.bake_path = self["directories"]["config"] / "bake.py" 

80 self.build_dir = self["directories"]["build"] / self.name 

81 self.dist_dir = self["directories"]["dist"] / self.name 

82 

83 if "pages" not in self: 

84 raise ConfigurationError( 

85 'Document "{document.name}" is missing key "pages"' 

86 ) 

87 self.pages = [] 

88 for page_spec in self["pages"]: 

89 if isinstance(page_spec, dict) and "path" in page_spec: 

90 # Path was specified: relative to the config file 

91 page = self.resolve_path( 

92 page_spec["path"], directory=self["directories"]["config"] 

93 ) 

94 else: 

95 # Only name was specified: relative to the pages directory 

96 page = self.resolve_path( 

97 page_spec, directory=self["directories"]["pages"] 

98 ) 

99 if not page.suffix: 

100 page = page.with_suffix(".yaml") 

101 self.pages.append(page) 

102 

103 def __init__( 

104 self, 

105 baker: "PDFBaker", # type: ignore # noqa: F821 

106 base_config: dict[str, Any], 

107 config_path: Path, 

108 ): 

109 """Initialize a document.""" 

110 super().__init__() 

111 self.baker = baker 

112 self.config = self.Configuration( 

113 document=self, 

114 base_config=base_config, 

115 config_path=config_path, 

116 ) 

117 

118 def process_document(self) -> tuple[Path | list[Path] | None, str | None]: 

119 """Process the document - use custom bake module if it exists. 

120 

121 Returns: 

122 Tuple of (pdf_files, error_message) where: 

123 - pdf_files is a Path or list of Paths to the created PDF 

124 files, or None if creation failed 

125 FIXME: could have created SOME PDF files 

126 - error_message is a string describing the error, or None if successful 

127 """ 

128 self.log_info_section('Processing document "%s"...', self.config.name) 

129 

130 self.config.build_dir.mkdir(parents=True, exist_ok=True) 

131 self.config.dist_dir.mkdir(parents=True, exist_ok=True) 

132 

133 try: 

134 if self.config.bake_path.exists(): 

135 return self._process_with_custom_bake(self.config.bake_path), None 

136 return self.process(), None 

137 except PDFBakerError as exc: 

138 return None, str(exc) 

139 

140 def _process_with_custom_bake(self, bake_path: Path) -> Path | list[Path]: 

141 """Process document using custom bake module.""" 

142 try: 

143 spec = importlib.util.spec_from_file_location( 

144 f"documents.{self.config.name}.bake", bake_path 

145 ) 

146 if spec is None or spec.loader is None: 

147 raise PDFBakerError( 

148 f"Failed to load bake module for document {self.config.name}" 

149 ) 

150 module = importlib.util.module_from_spec(spec) 

151 spec.loader.exec_module(module) 

152 return module.process_document(document=self) 

153 except Exception as exc: 

154 raise PDFBakerError( 

155 f"Failed to process document with custom bake: {exc}" 

156 ) from exc 

157 

158 def process(self) -> Path | list[Path]: 

159 """Process document using standard processing.""" 

160 if "variants" in self.config: 

161 # Multiple PDF documents 

162 pdf_files = [] 

163 for variant in self.config["variants"]: 

164 self.log_info_subsection('Processing variant "%s"...', variant["name"]) 

165 variant_config = deep_merge(self.config, variant) 

166 variant_config["variant"] = variant 

167 variant_config = render_config(variant_config) 

168 page_pdfs = self._process_pages(variant_config) 

169 pdf_files.append(self._finalize(page_pdfs, variant_config)) 

170 return pdf_files 

171 

172 # Single PDF document 

173 doc_config = render_config(self.config) 

174 page_pdfs = self._process_pages(doc_config) 

175 return self._finalize(page_pdfs, doc_config) 

176 

177 def _process_pages(self, config: dict[str, Any]) -> list[Path]: 

178 """Process pages with given configuration.""" 

179 pdf_files = [] 

180 self.log_debug_subsection("Pages to process:") 

181 self.log_debug(self.config.pages) 

182 for page_num, page_config in enumerate(self.config.pages, start=1): 

183 page = PDFBakerPage( 

184 document=self, 

185 page_number=page_num, 

186 base_config=config, 

187 config_path=page_config, 

188 ) 

189 pdf_files.append(page.process()) 

190 

191 return pdf_files 

192 

193 def _finalize(self, pdf_files: list[Path], doc_config: dict[str, Any]) -> Path: 

194 """Combine PDF pages and optionally compress.""" 

195 self.log_debug_subsection("Finalizing document...") 

196 self.log_debug("Combining PDF pages...") 

197 try: 

198 combined_pdf = combine_pdfs( 

199 pdf_files, 

200 self.config.build_dir / f"{doc_config['filename']}.pdf", 

201 ) 

202 except PDFCombineError as exc: 

203 raise PDFBakerError(f"Failed to combine PDFs: {exc}") from exc 

204 

205 output_path = self.config.dist_dir / f"{doc_config['filename']}.pdf" 

206 

207 if doc_config.get("compress_pdf", False): 

208 self.log_debug("Compressing PDF document...") 

209 try: 

210 compress_pdf(combined_pdf, output_path) 

211 self.log_info("PDF compressed successfully") 

212 except PDFCompressionError as exc: 

213 self.log_warning( 

214 "Compression failed, using uncompressed PDF: %s", 

215 exc, 

216 ) 

217 os.rename(combined_pdf, output_path) 

218 else: 

219 os.rename(combined_pdf, output_path) 

220 

221 self.log_info("Created %s", output_path.name) 

222 return output_path 

223 

224 def teardown(self) -> None: 

225 """Clean up build directory after processing.""" 

226 self.log_debug_subsection( 

227 "Tearing down build directory: %s", self.config.build_dir 

228 ) 

229 if self.config.build_dir.exists(): 

230 self.log_debug("Removing files in build directory...") 

231 for file_path in self.config.build_dir.iterdir(): 

232 if file_path.is_file(): 

233 file_path.unlink() 

234 

235 try: 

236 self.log_debug("Removing build directory...") 

237 self.config.build_dir.rmdir() 

238 except OSError: 

239 self.log_warning("Build directory not empty - not removing")