Coverage for src/pdfbaker/pdf.py: 85%
73 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-20 04:55 +1200
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-20 04:55 +1200
1"""PDF-related functions."""
3import logging
4import os
5import select
6import subprocess
7from collections.abc import Sequence
8from pathlib import Path
10import pypdf
11from cairosvg import svg2pdf
13from .errors import (
14 PDFCombineError,
15 PDFCompressionError,
16 SVGConversionError,
17)
19__all__ = [
20 "combine_pdfs",
21 "compress_pdf",
22 "convert_svg_to_pdf",
23]
25logger = logging.getLogger(__name__)
28def combine_pdfs(
29 pdf_files: Sequence[Path], output_file: Path
30) -> Path | PDFCombineError:
31 """Combine multiple PDF files into a single PDF.
33 Args:
34 pdf_files: List of paths to PDF files to combine
35 output_file: Path where the combined PDF will be written
37 Returns:
38 Path to the combined PDF file
40 Raises:
41 PDFCombineError: If no PDF files provided or if combining fails
42 """
43 if not pdf_files:
44 raise PDFCombineError("No PDF files provided to combine")
46 pdf_writer = pypdf.PdfWriter()
48 with open(output_file, "wb") as output_stream:
49 for pdf_file in pdf_files:
50 with open(pdf_file, "rb") as file_obj:
51 try:
52 pdf_reader = pypdf.PdfReader(file_obj)
53 try:
54 pdf_writer.append(pdf_reader)
55 except KeyError as exc:
56 if str(exc) == "'/Subtype'":
57 # PDF has broken annotations with missing /Subtype
58 logger.warning(
59 "Broken annotations in PDF: %s"
60 "Falling back to page-by-page method.",
61 pdf_file,
62 )
63 for page in pdf_reader.pages:
64 pdf_writer.add_page(page)
65 else:
66 raise
67 except Exception as exc:
68 raise PDFCombineError(f"Failed to combine PDFs: {exc}") from exc
69 pdf_writer.write(output_stream)
71 return output_file
74def _run_subprocess_logged(cmd: list[str], env: dict[str, str] | None = None) -> int:
75 """Run a subprocess with output redirected to logging.
77 Args:
78 cmd: Command and arguments to run
79 env: Optional environment variables to set
81 Returns:
82 0 if successful, otherwise raises CalledProcessError
83 """
84 env = env or os.environ.copy()
85 env["PYTHONUNBUFFERED"] = "True"
87 with subprocess.Popen(
88 cmd,
89 bufsize=1,
90 text=True,
91 stdout=subprocess.PIPE,
92 stderr=subprocess.PIPE,
93 env=env,
94 ) as proc:
95 # Set up select for both pipes
96 readable = {
97 proc.stdout.fileno(): (proc.stdout, logger.info),
98 proc.stderr.fileno(): (proc.stderr, logger.warning),
99 }
101 while (ret_code := proc.poll()) is None:
102 # Wait for output on either pipe
103 ready, _, _ = select.select(readable.keys(), [], [])
105 for fd in ready:
106 stream, log = readable[fd]
107 line = stream.readline()
108 if line:
109 log(line.rstrip())
111 # Read any remaining output after process exits
112 for stream, log in readable.values():
113 for line in stream:
114 if line.strip():
115 log(line.rstrip())
117 if ret_code != 0:
118 raise subprocess.CalledProcessError(ret_code, cmd)
120 return 0
123def compress_pdf(
124 input_pdf: Path, output_pdf: Path, dpi: int = 300
125) -> Path | PDFCompressionError:
126 """Compress a PDF file using Ghostscript.
128 Args:
129 input_pdf: Path to the input PDF file
130 output_pdf: Path where the compressed PDF will be written
131 dpi: Resolution in dots per inch (default: 300)
133 Returns:
134 Path to the compressed PDF file
136 Raises:
137 PDFCompressionError: If Ghostscript compression fails
138 """
139 try:
140 _run_subprocess_logged(
141 [
142 "gs",
143 "-sDEVICE=pdfwrite",
144 "-dCompatibilityLevel=1.7",
145 "-dPDFSETTINGS=/printer",
146 f"-r{dpi}",
147 "-dNOPAUSE",
148 "-dQUIET",
149 "-dBATCH",
150 f"-sOutputFile={output_pdf}",
151 str(input_pdf),
152 ]
153 )
154 return output_pdf
155 except FileNotFoundError as exc:
156 raise PDFCompressionError(f"Ghostscript not found: {exc}") from exc
157 except subprocess.SubprocessError as exc:
158 raise PDFCompressionError(f"Ghostscript compression failed: {exc}") from exc
161def convert_svg_to_pdf(
162 svg_path: Path,
163 pdf_path: Path,
164 backend: str = "cairosvg",
165) -> Path | SVGConversionError:
166 """Convert an SVG file to PDF.
168 Args:
169 svg_path: Path to the input SVG file
170 pdf_path: Path where the PDF will be written
171 backend: Conversion backend to use, either "cairosvg" or "inkscape"
172 (default: "cairosvg")
174 Returns:
175 Path to the converted PDF file
177 Raises:
178 SVGConversionError: If SVG conversion fails, includes the backend used and cause
179 """
180 if backend == "inkscape":
181 try:
182 _run_subprocess_logged(
183 [
184 "inkscape",
185 f"--export-filename={pdf_path}",
186 str(svg_path),
187 ]
188 )
189 except subprocess.SubprocessError as exc:
190 raise SVGConversionError(svg_path, backend, str(exc)) from exc
191 else:
192 if backend != "cairosvg":
193 logger.warning(
194 "Unknown svg2pdf backend: %s - falling back to cairosvg",
195 backend,
196 )
197 try:
198 with open(svg_path, "rb") as svg_file:
199 svg2pdf(file_obj=svg_file, write_to=str(pdf_path))
200 except Exception as exc:
201 raise SVGConversionError(svg_path, backend, str(exc)) from exc
203 return pdf_path