cloud_storage_utility.platforms.azure_cloud_storage

View Source
import os

from azure.identity import ClientSecretCredential
from azure.storage.filedatalake import DataLakeServiceClient

from ..common.base_cloud_storage import BaseCloudStorage
from ..config import config


class AzureCloudStorage(BaseCloudStorage):
    def __init__(self):
        super().__init__()
        self.service = self.__create_service_client()

    async def upload_file(self, bucket_name, local_filepath, callback=None, args=None):
        filesystem_client = self.service.get_file_system_client(file_system=bucket_name)
        cloud_file = os.path.basename(local_filepath)
        file_client = filesystem_client.get_file_client(cloud_file)
        file_client.create_file()

        with open(local_filepath, "rb") as file_handler:
            file_content = file_handler.read()
            # Append data to created file if it isn't empty
            if len(file_content) > 0:
                file_client.append_data(
                    file_content, offset=0, length=len(file_content)
                )
                file_client.flush_data(len(file_content))

        if callback is not None:
            callback(*args)

    async def remove_item(self, bucket_name, cloud_key, callback=None):
        filesystem_client = self.service.get_file_system_client(file_system=bucket_name)
        file_client = filesystem_client.get_file_client(cloud_key)
        file_client.delete_file()
        return cloud_key

    async def download_file(
        self, bucket_name, cloud_key, destination_filepath, callback=None, args=None
    ):
        base_file = os.path.basename(cloud_key)
        path = os.path.dirname(cloud_key)

        file_system_client = self.service.get_file_system_client(
            file_system=bucket_name
        )
        directory_client = file_system_client.get_directory_client(path)
        file_client = directory_client.get_file_client(base_file)

        download = file_client.download_file()
        downloaded_bytes = download.readall()

        local_file = open(destination_filepath, "wb")
        local_file.write(downloaded_bytes)
        local_file.close()

        if callback is not None:
            callback(*args)

    def get_bucket_keys(self, bucket_name):
        file_system = self.service.get_file_system_client(file_system=bucket_name)
        paths = file_system.get_paths()
        files = []
        for path in paths:
            files.append(path.name)

        return files

    @staticmethod
    def __create_service_client():
        # read the account information from the environment
        client_id = config.AZURE_CONFIG["client_id"]
        client_secret = config.AZURE_CONFIG["client_secret"]
        account_name = config.AZURE_CONFIG["storage_account_name"]
        tenant_id = config.AZURE_CONFIG["tenant_id"]
        connection_string = config.AZURE_CONFIG["connection_string"]

        # Prefer using a connection string if it's available
        if connection_string is not None:
            service_client = DataLakeServiceClient.from_connection_string(
                conn_str=connection_string
            )
        else:
            credential = ClientSecretCredential(tenant_id, client_id, client_secret)
            service_client = DataLakeServiceClient(
                account_url="{}://{}.dfs.core.windows.net".format(
                    "https", account_name
                ),
                credential=credential,
            )

        return service_client
View Source
class AzureCloudStorage(BaseCloudStorage):
    def __init__(self):
        super().__init__()
        self.service = self.__create_service_client()

    async def upload_file(self, bucket_name, local_filepath, callback=None, args=None):
        filesystem_client = self.service.get_file_system_client(file_system=bucket_name)
        cloud_file = os.path.basename(local_filepath)
        file_client = filesystem_client.get_file_client(cloud_file)
        file_client.create_file()

        with open(local_filepath, "rb") as file_handler:
            file_content = file_handler.read()
            # Append data to created file if it isn't empty
            if len(file_content) > 0:
                file_client.append_data(
                    file_content, offset=0, length=len(file_content)
                )
                file_client.flush_data(len(file_content))

        if callback is not None:
            callback(*args)

    async def remove_item(self, bucket_name, cloud_key, callback=None):
        filesystem_client = self.service.get_file_system_client(file_system=bucket_name)
        file_client = filesystem_client.get_file_client(cloud_key)
        file_client.delete_file()
        return cloud_key

    async def download_file(
        self, bucket_name, cloud_key, destination_filepath, callback=None, args=None
    ):
        base_file = os.path.basename(cloud_key)
        path = os.path.dirname(cloud_key)

        file_system_client = self.service.get_file_system_client(
            file_system=bucket_name
        )
        directory_client = file_system_client.get_directory_client(path)
        file_client = directory_client.get_file_client(base_file)

        download = file_client.download_file()
        downloaded_bytes = download.readall()

        local_file = open(destination_filepath, "wb")
        local_file.write(downloaded_bytes)
        local_file.close()

        if callback is not None:
            callback(*args)

    def get_bucket_keys(self, bucket_name):
        file_system = self.service.get_file_system_client(file_system=bucket_name)
        paths = file_system.get_paths()
        files = []
        for path in paths:
            files.append(path.name)

        return files

    @staticmethod
    def __create_service_client():
        # read the account information from the environment
        client_id = config.AZURE_CONFIG["client_id"]
        client_secret = config.AZURE_CONFIG["client_secret"]
        account_name = config.AZURE_CONFIG["storage_account_name"]
        tenant_id = config.AZURE_CONFIG["tenant_id"]
        connection_string = config.AZURE_CONFIG["connection_string"]

        # Prefer using a connection string if it's available
        if connection_string is not None:
            service_client = DataLakeServiceClient.from_connection_string(
                conn_str=connection_string
            )
        else:
            credential = ClientSecretCredential(tenant_id, client_id, client_secret)
            service_client = DataLakeServiceClient(
                account_url="{}://{}.dfs.core.windows.net".format(
                    "https", account_name
                ),
                credential=credential,
            )

        return service_client

Abstract definition of what a platform implementation needs to include. Any new platforms need to inherit from this.

#   AzureCloudStorage()
View Source
    def __init__(self):
        super().__init__()
        self.service = self.__create_service_client()

Sets up platform independent configurations, and operations.

Args
  • part_size (int, optional): The size of the chunks (how to divide up large files). Defaults to 5MB.
  • file_threshold (int, optional): How large a file needs to be before performing operations in chunks. Defaults to 15MB.
#   async def upload_file(self, bucket_name, local_filepath, callback=None, args=None):
View Source
    async def upload_file(self, bucket_name, local_filepath, callback=None, args=None):
        filesystem_client = self.service.get_file_system_client(file_system=bucket_name)
        cloud_file = os.path.basename(local_filepath)
        file_client = filesystem_client.get_file_client(cloud_file)
        file_client.create_file()

        with open(local_filepath, "rb") as file_handler:
            file_content = file_handler.read()
            # Append data to created file if it isn't empty
            if len(file_content) > 0:
                file_client.append_data(
                    file_content, offset=0, length=len(file_content)
                )
                file_client.flush_data(len(file_content))

        if callback is not None:
            callback(*args)

An implementation of this must provide a way to upload a single file, with a specified prefix.

Args
  • bucket_name (str): Target bucket.
  • cloud_key (str): What to name the file in the cloud.
  • file_path (str): Where to get the file from locally.
  • prefix (str, optional): Prefix to prepend in the cloud.
  • callback (function, optional): Implementations of this method need to call this after the operation is complete. Defaults to None.
Returns

bool: Whether the upload was successful or not.

#   async def remove_item(self, bucket_name, cloud_key, callback=None):
View Source
    async def remove_item(self, bucket_name, cloud_key, callback=None):
        filesystem_client = self.service.get_file_system_client(file_system=bucket_name)
        file_client = filesystem_client.get_file_client(cloud_key)
        file_client.delete_file()
        return cloud_key

An implementation for this must provide a way to send removal requests.

Args
  • bucket_name (str): Target bucket.
  • cloud_key (str): The name of the key we want to remove.
  • callback (Callable[[str, str, str], None], optional): Implementations of this method need to call this after the operation is complete. Defaults to None.
Returns

bool: Whether the remove was successful or not.

#   async def download_file( self, bucket_name, cloud_key, destination_filepath, callback=None, args=None ):
View Source
    async def download_file(
        self, bucket_name, cloud_key, destination_filepath, callback=None, args=None
    ):
        base_file = os.path.basename(cloud_key)
        path = os.path.dirname(cloud_key)

        file_system_client = self.service.get_file_system_client(
            file_system=bucket_name
        )
        directory_client = file_system_client.get_directory_client(path)
        file_client = directory_client.get_file_client(base_file)

        download = file_client.download_file()
        downloaded_bytes = download.readall()

        local_file = open(destination_filepath, "wb")
        local_file.write(downloaded_bytes)
        local_file.close()

        if callback is not None:
            callback(*args)

An implementation for this must provide a way to download a single file.

Args
  • bucket_name (str): Target bucket.
  • cloud_key (str): The name of the item we want to download from the cloud bucket.
  • destination_filepath (str): Where to put the downloaded item.
  • prefix (str, optional): Only download files under the matching prefix.
  • callback (Callable[[str, str, str, bool], None], optional): Implementations of this method need to call this after the operation is complete. Defaults to None.
Returns

bool: Whether the download was successful or not.

#   def get_bucket_keys(self, bucket_name):
View Source
    def get_bucket_keys(self, bucket_name):
        file_system = self.service.get_file_system_client(file_system=bucket_name)
        paths = file_system.get_paths()
        files = []
        for path in paths:
            files.append(path.name)

        return files

An implementation of this must provide a way to list the contents of a bucket.

Args
  • bucket_name (str): Target bucket.
  • prefix (str, optional): Only get keys that match this prefix.
  • delimiter (str, optional): Set the delimiter, defaults to '/'. i.e, photos/image.jpeg
Returns

Dict[str, BucketKeyMetadata]: Dictionary of key name -> KeyMetadata, i.e.

{
    "image.jpeg": {
        "bytes": 32,
        "last_modified": 1619195172
    },
    "file.txt": {
        "bytes": 32,
        "last_modified": 1619195172
    }
}