Source code for prs_commons.aws.s3_client

"""Asynchronous S3 client for AWS S3 storage operations.

This module provides a thread-safe singleton S3 client that implements the
StorageClient interface.
"""

import asyncio
import base64
import io
import logging
import os
from contextlib import asynccontextmanager
from threading import Lock
from typing import Any, AsyncGenerator, Dict, Optional, Type, TypeVar

import aioboto3
from botocore.exceptions import ClientError, NoCredentialsError
from typing_extensions import override

from prs_commons.storage.base import StorageClient

# Type aliases
S3Response = Dict[str, Any]

# Type variables for generic class methods
T = TypeVar("T", bound="S3Client")

# Set up logger
logger = logging.getLogger(__name__)


[docs] class S3Client(StorageClient): """Thread-safe singleton client for AWS S3 operations. This client provides an async interface to interact with AWS S3 using aioboto3. It implements the singleton pattern for efficient resource usage. """ _instance: Optional["S3Client"] = None _lock: Lock = Lock() _session: Optional[aioboto3.Session] = None _client: Optional[Any] = None _initialized: bool = False _client_lock: asyncio.Lock = asyncio.Lock() def __new__(cls: Type[T]) -> T: """Create or return the singleton instance of S3Client.""" with cls._lock: if cls._instance is None: cls._instance = super(S3Client, cls).__new__(cls) cls._instance._initialized = False return cls._instance # type: ignore
[docs] def __init__(self) -> None: """Initialize the S3 client with configuration from environment variables.""" if getattr(self, "_initialized", False): return # Initialize session self._session = aioboto3.Session( region_name=os.getenv("S3_AWS_REGION"), aws_access_key_id=os.getenv("S3_AWS_ACCESS_KEY_ID"), aws_secret_access_key=os.getenv("S3_AWS_SECRET_ACCESS_KEY"), ) self._client = None self._client_close = None self._initialized = True
[docs] @asynccontextmanager async def resource(self) -> AsyncGenerator[Any, None]: """Async context manager for S3 resource access. Yields: An aioboto3 S3 resource instance for direct S3 operations. Example: async with s3.resource() as s3: bucket = await s3.Bucket('my-bucket') # Perform operations with the bucket """ if self._session is None: raise RuntimeError("Session not initialized") async with self._session.resource("s3") as s3: yield s3
@property @override async def client(self) -> Any: """Get the S3 client instance. Returns: aioboto3 S3 client instance for low-level operations. Note: Prefer using the higher-level methods when possible. """ async with self.resource() as s3: return s3.meta.client
[docs] @override async def upload_file( self, file_path: str, bucket: str, key: str, **kwargs: Any ) -> S3Response: """Upload a file to an S3 bucket. Args: file_path: Path to local file to upload bucket: Target S3 bucket name key: S3 object key/path **kwargs: Additional args for boto3 upload_file - ExtraArgs: Dict of additional args (e.g., ContentType, ACL) - Callback: Progress callback function - Config: boto3.s3.transfer.TransferConfig Returns: Dict with status, bucket, and key Raises: NoCredentialsError: If AWS credentials are invalid ClientError: For S3-specific errors FileNotFoundError: If local file doesn't exist """ if not bucket: raise ValueError("Bucket name is required") try: async with self.resource() as s3: s3_bucket_obj = await s3.Bucket(bucket) await s3_bucket_obj.upload_file(file_path, key) return {"status": "success", "bucket": bucket, "key": key} except (ClientError, NoCredentialsError) as e: logger.error( "Failed to upload file %s to s3://%s/%s: %s", file_path, bucket, key, str(e), ) raise
[docs] @override async def download_file( self, bucket: str, key: str, file_path: str, **kwargs: Any ) -> bool: """Download a file from S3 to local filesystem. Args: bucket: Source S3 bucket name key: S3 object key/path file_path: Local path to save file (must include filename) **kwargs: Additional args for boto3 download_file - ExtraArgs: Additional arguments for download - Callback: Progress callback function - Config: boto3.s3.transfer.TransferConfig Returns: bool: True if download was successful Raises: FileNotFoundError: If file doesn't exist in S3 or local path is invalid PermissionError: If permission issues with S3 or local filesystem ClientError: For S3-specific errors IOError: If issues writing to local filesystem ValueError: If bucket name or key is not provided """ try: async with self.resource() as s3: s3_bucket = await s3.Bucket(bucket) await s3_bucket.download_file(key, file_path, **kwargs) return True except ClientError as e: error_code = e.response.get("Error", {}).get("Code") if error_code == "404": raise FileNotFoundError(f"File not found: s3://{bucket}/{key}") from e if error_code in ["403", "AccessDenied"]: raise PermissionError( f"Access denied to file: s3://{bucket}/{key}" ) from e raise # Re-raise other ClientError instances
[docs] @override async def delete_object(self, bucket: str, key: str) -> S3Response: """Delete an object from S3. Args: bucket: S3 bucket name key: S3 object key to delete Returns: Dict with operation status and details Raises: ClientError: If deletion fails """ try: async with self.resource() as s3: # Use the object's delete method obj = await s3.Object(bucket, key) response = await obj.delete() return { "status": "success", "bucket": bucket, "key": key, "response": response, } except ClientError as e: logger.error(f"Error deleting object {key} from bucket {bucket}: {e}") raise except Exception as e: logger.error("Unexpected error in delete_object: %s", str(e)) raise
[docs] @override async def generate_presigned_url( self, bucket: str, key: str, operation: str = "get_object", expiration: int = 3600, **kwargs: Any, ) -> Optional[str]: """Generate a pre-signed URL for an S3 object. Args: bucket: S3 bucket name key: S3 object key operation: S3 operation ('get_object', 'put_object', etc.) expiration: URL expiration time in seconds (default: 1 hour) **kwargs: Additional parameters for the S3 operation Returns: str: Pre-signed URL, or None if credentials are invalid Example: # Generate upload URL url = await s3.generate_presigned_url( bucket='my-bucket', key='uploads/file.txt', operation='put_object', ContentType='text/plain' ) """ try: params = {"Bucket": bucket, "Key": key, **kwargs} url: str = await (await self.client).generate_presigned_url( ClientMethod=operation, Params=params, ExpiresIn=expiration ) return url except Exception as e: logger.error("Error generating presigned URL: %s", str(e)) raise
[docs] @override async def generate_upload_url( self, bucket: str, key: str, expiration: int = 3600, **kwargs: Any, ) -> Optional[str]: """Generate a pre-signed URL for uploading a file to S3. This is a convenience wrapper around generate_presigned_url for uploads. Args: bucket: S3 bucket name key: S3 object key where the file will be stored expiration: Time in seconds until the URL expires (default: 1 hour) **kwargs: Additional parameters to pass to the S3 put_object operation Common parameters: - ContentType: The content type of the file (e.g., 'image/jpeg') - ACL: Access control for the file (e.g., 'private', 'public-read') - Metadata: Dictionary of metadata to store with the object Returns: Optional[str]: The pre-signed URL as a string, or None if credentials are invalid Example: >>> upload_url = await s3.generate_upload_url( ... bucket='my-bucket', ... key='uploads/file.jpg', ... ContentType='image/jpeg', ... ACL='private', ... Metadata={ ... 'custom': 'value' ... } ... ) """ # Convert ContentType to proper case if provided if "contenttype" in kwargs: kwargs["ContentType"] = kwargs.pop("contenttype") return await self.generate_presigned_url( bucket=bucket, key=key, operation="put_object", expiration=expiration, **kwargs, )
[docs] @override async def generate_download_url( self, bucket: str, key: str, expiration: int = 3600, **kwargs: Any, ) -> Optional[str]: """Generate a pre-signed URL for downloading a file from S3. This is a convenience wrapper around generate_presigned_url for downloads. Args: bucket: S3 bucket name key: S3 object key of the file to download expiration: Time in seconds until the URL expires (default: 1 hour) **kwargs: Additional parameters to pass to the S3 get_object operation Returns: Optional[str]: The pre-signed URL as a string, or None if credentials are invalid Example: >>> download_url = await s3.generate_download_url( ... bucket='my-bucket', ... key='downloads/file.txt', ... ResponseContentType='application/pdf', ... ResponseContentDisposition='attachment; filename=report.pdf' ... ) """ return await self.generate_presigned_url( bucket=bucket, key=key, operation="get_object", expiration=expiration, **kwargs, )
[docs] @override async def download_as_base64( self, bucket: str, key: str, check_exists: bool = True, **kwargs: Any ) -> str: """Download file from S3 as base64-encoded string. This method is useful when you need to work with the file contents directly in memory without saving to disk, such as when sending files in API responses or processing file contents in memory. Args: bucket: S3 bucket name key: S3 object key check_exists: If True, verify file exists first **kwargs: Additional args for boto3 get_object - VersionId: Version ID of the object - SSECustomerAlgorithm: Server-side encryption algorithm - SSECustomerKey: Server-side encryption key Returns: str: Base64-encoded file contents Raises: FileNotFoundError: If file doesn't exist and check_exists is True ClientError: For S3-specific errors Note: Loads entire file into memory - not suitable for very large files. """ if check_exists and not await self.file_exists(bucket, key): raise FileNotFoundError(f"File {key} not found in bucket {bucket}") # Download file to in-memory buffer buffer = io.BytesIO() try: async with self.resource() as s3: s3_obj = await s3.Object(bucket, key) await s3_obj.download_fileobj(buffer, **kwargs) buffer.seek(0) return base64.b64encode(buffer.read()).decode('utf-8') except ClientError as e: if e.response.get('Error', {}).get('Code') == '404': raise FileNotFoundError(f"File {key} not found in bucket {bucket}") from e logger.error(f"Error downloading {key} from {bucket} as base64: {e}") raise
[docs] @override async def file_exists(self, bucket: str, key: str) -> bool: """Check if a file exists in S3. Args: bucket: S3 bucket name key: S3 object key to check Returns: bool: True if file exists, False otherwise Raises: ClientError: If error occurs during check """ try: async with self.resource() as s3: # Use the client directly for head_object client = s3.meta.client await client.head_object(Bucket=bucket, Key=key) return True except ClientError as e: if e.response.get('Error', {}).get('Code') == '404': return False logger.error(f"Error checking if {key} exists in bucket {bucket}: {e}") raise ClientError(f"Error checking if {key} exists in bucket {bucket}: {e}") from e