Source code for paperap.models.document.metadata.model
"""
Document metadata models for Paperless-NgX.
This module provides models for representing document metadata in Paperless-NgX,
including file information, checksums, and document properties. These models
are used to access and manipulate metadata associated with documents stored
in the Paperless-NgX system.
"""
from __future__ import annotations
import pydantic
from paperap.models.abstract import StandardModel
[docs]
class MetadataElement(pydantic.BaseModel):
"""
Represents a key-value pair of document metadata in Paperless-NgX.
This model represents individual metadata elements extracted from document files,
such as author, creation date, or other file-specific properties. Each element
consists of a key and its corresponding value.
Attributes:
key: The metadata field name or identifier.
value: The value associated with the metadata field.
Examples:
>>> metadata = MetadataElement(key="Author", value="John Doe")
>>> print(f"{metadata.key}: {metadata.value}")
Author: John Doe
"""
key: str
value: str
[docs]
class DocumentMetadata(StandardModel):
"""
Represents comprehensive metadata for a Paperless-NgX document.
This model encapsulates all metadata associated with a document in Paperless-NgX,
including information about both the original document and its archived version
(if available). It provides access to file properties such as checksums, sizes,
MIME types, and extracted metadata elements.
The metadata is primarily read-only as it is generated by the Paperless-NgX
system during document processing.
Attributes:
original_checksum: The SHA256 checksum of the original document file.
original_size: The size of the original document in bytes.
original_mime_type: The MIME type of the original document (e.g., "application/pdf").
media_filename: The filename of the document in the Paperless-NgX media storage.
has_archive_version: Whether the document has an archived version (typically a PDF/A).
original_metadata: List of metadata elements extracted from the original document.
archive_checksum: The SHA256 checksum of the archived document version.
archive_media_filename: The filename of the archived version in media storage.
original_filename: The original filename of the document when it was uploaded.
lang: The detected language code of the document content.
archive_size: The size of the archived document version in bytes.
archive_metadata: List of metadata elements extracted from the archived version.
Examples:
>>> # Access document metadata
>>> metadata = client.documents.get(123).metadata
>>> print(f"Original file: {metadata.original_filename}")
>>> print(f"Size: {metadata.original_size} bytes")
>>> print(f"MIME type: {metadata.original_mime_type}")
>>>
>>> # Iterate through extracted metadata elements
>>> for element in metadata.original_metadata:
... print(f"{element.key}: {element.value}")
"""
original_checksum: str | None = None
original_size: int | None = None
original_mime_type: str | None = None
media_filename: str | None = None
has_archive_version: bool | None = None
original_metadata: list[MetadataElement] = []
archive_checksum: str | None = None
archive_media_filename: str | None = None
original_filename: str | None = None
lang: str | None = None
archive_size: int | None = None
archive_metadata: list[MetadataElement] = []
[docs]
class Meta(StandardModel.Meta):
"""
Metadata configuration for the DocumentMetadata model.
This class defines metadata properties for the DocumentMetadata model,
particularly specifying which fields are read-only.
"""
read_only_fields = {
"original_checksum",
"original_size",
"original_mime_type",
"media_filename",
"has_archive_version",
"original_metadata",
"archive_checksum",
"archive_media_filename",
"original_filename",
"lang",
"archive_size",
"archive_metadata",
}