cloud_storage_utility.platforms.azure_cloud_storage
View Source
import os from azure.identity import ClientSecretCredential from azure.storage.filedatalake import DataLakeServiceClient from ..common.base_cloud_storage import BaseCloudStorage from ..config import config class AzureCloudStorage(BaseCloudStorage): def __init__(self): super().__init__() self.service = self.__create_service_client() async def upload_file(self, bucket_name, local_filepath, callback=None, args=None): filesystem_client = self.service.get_file_system_client(file_system=bucket_name) cloud_file = os.path.basename(local_filepath) file_client = filesystem_client.get_file_client(cloud_file) file_client.create_file() with open(local_filepath, "rb") as file_handler: file_content = file_handler.read() # Append data to created file if it isn't empty if len(file_content) > 0: file_client.append_data( file_content, offset=0, length=len(file_content) ) file_client.flush_data(len(file_content)) if callback is not None: callback(*args) async def remove_item(self, bucket_name, cloud_key, callback=None): filesystem_client = self.service.get_file_system_client(file_system=bucket_name) file_client = filesystem_client.get_file_client(cloud_key) file_client.delete_file() return cloud_key async def download_file( self, bucket_name, cloud_key, destination_filepath, callback=None, args=None ): base_file = os.path.basename(cloud_key) path = os.path.dirname(cloud_key) file_system_client = self.service.get_file_system_client( file_system=bucket_name ) directory_client = file_system_client.get_directory_client(path) file_client = directory_client.get_file_client(base_file) download = file_client.download_file() downloaded_bytes = download.readall() local_file = open(destination_filepath, "wb") local_file.write(downloaded_bytes) local_file.close() if callback is not None: callback(*args) def get_bucket_keys(self, bucket_name): file_system = self.service.get_file_system_client(file_system=bucket_name) paths = file_system.get_paths() files = [] for path in paths: files.append(path.name) return files @staticmethod def __create_service_client(): # read the account information from the environment client_id = config.AZURE_CONFIG["client_id"] client_secret = config.AZURE_CONFIG["client_secret"] account_name = config.AZURE_CONFIG["storage_account_name"] tenant_id = config.AZURE_CONFIG["tenant_id"] connection_string = config.AZURE_CONFIG["connection_string"] # Prefer using a connection string if it's available if connection_string is not None: service_client = DataLakeServiceClient.from_connection_string( conn_str=connection_string ) else: credential = ClientSecretCredential(tenant_id, client_id, client_secret) service_client = DataLakeServiceClient( account_url="{}://{}.dfs.core.windows.net".format( "https", account_name ), credential=credential, ) return service_client
View Source
class AzureCloudStorage(BaseCloudStorage): def __init__(self): super().__init__() self.service = self.__create_service_client() async def upload_file(self, bucket_name, local_filepath, callback=None, args=None): filesystem_client = self.service.get_file_system_client(file_system=bucket_name) cloud_file = os.path.basename(local_filepath) file_client = filesystem_client.get_file_client(cloud_file) file_client.create_file() with open(local_filepath, "rb") as file_handler: file_content = file_handler.read() # Append data to created file if it isn't empty if len(file_content) > 0: file_client.append_data( file_content, offset=0, length=len(file_content) ) file_client.flush_data(len(file_content)) if callback is not None: callback(*args) async def remove_item(self, bucket_name, cloud_key, callback=None): filesystem_client = self.service.get_file_system_client(file_system=bucket_name) file_client = filesystem_client.get_file_client(cloud_key) file_client.delete_file() return cloud_key async def download_file( self, bucket_name, cloud_key, destination_filepath, callback=None, args=None ): base_file = os.path.basename(cloud_key) path = os.path.dirname(cloud_key) file_system_client = self.service.get_file_system_client( file_system=bucket_name ) directory_client = file_system_client.get_directory_client(path) file_client = directory_client.get_file_client(base_file) download = file_client.download_file() downloaded_bytes = download.readall() local_file = open(destination_filepath, "wb") local_file.write(downloaded_bytes) local_file.close() if callback is not None: callback(*args) def get_bucket_keys(self, bucket_name): file_system = self.service.get_file_system_client(file_system=bucket_name) paths = file_system.get_paths() files = [] for path in paths: files.append(path.name) return files @staticmethod def __create_service_client(): # read the account information from the environment client_id = config.AZURE_CONFIG["client_id"] client_secret = config.AZURE_CONFIG["client_secret"] account_name = config.AZURE_CONFIG["storage_account_name"] tenant_id = config.AZURE_CONFIG["tenant_id"] connection_string = config.AZURE_CONFIG["connection_string"] # Prefer using a connection string if it's available if connection_string is not None: service_client = DataLakeServiceClient.from_connection_string( conn_str=connection_string ) else: credential = ClientSecretCredential(tenant_id, client_id, client_secret) service_client = DataLakeServiceClient( account_url="{}://{}.dfs.core.windows.net".format( "https", account_name ), credential=credential, ) return service_client
Abstract definition of what a platform implementation needs to include. Any new platforms need to inherit from this.
View Source
def __init__(self): super().__init__() self.service = self.__create_service_client()
Sets up platform independent configurations, and operations.
Args
- part_size (int, optional): The size of the chunks (how to divide up large files). Defaults to 5MB.
- file_threshold (int, optional): How large a file needs to be before performing operations in chunks. Defaults to 15MB.
View Source
async def upload_file(self, bucket_name, local_filepath, callback=None, args=None): filesystem_client = self.service.get_file_system_client(file_system=bucket_name) cloud_file = os.path.basename(local_filepath) file_client = filesystem_client.get_file_client(cloud_file) file_client.create_file() with open(local_filepath, "rb") as file_handler: file_content = file_handler.read() # Append data to created file if it isn't empty if len(file_content) > 0: file_client.append_data( file_content, offset=0, length=len(file_content) ) file_client.flush_data(len(file_content)) if callback is not None: callback(*args)
An implementation of this must provide a way to upload a single file, with a specified prefix.
Args
- bucket_name (str): Target bucket.
- cloud_key (str): What to name the file in the cloud.
- file_path (str): Where to get the file from locally.
- prefix (str, optional): Prefix to prepend in the cloud.
- callback (function, optional): Implementations of this method need to call this after the operation is complete. Defaults to None.
Returns
bool: Whether the upload was successful or not.
View Source
async def remove_item(self, bucket_name, cloud_key, callback=None): filesystem_client = self.service.get_file_system_client(file_system=bucket_name) file_client = filesystem_client.get_file_client(cloud_key) file_client.delete_file() return cloud_key
An implementation for this must provide a way to send removal requests.
Args
- bucket_name (str): Target bucket.
- cloud_key (str): The name of the key we want to remove.
- callback (Callable[[str, str, str], None], optional): Implementations of this method need to call this after the operation is complete. Defaults to None.
Returns
bool: Whether the remove was successful or not.
#  
async def
download_file(
self,
bucket_name,
cloud_key,
destination_filepath,
callback=None,
args=None
):
View Source
async def download_file( self, bucket_name, cloud_key, destination_filepath, callback=None, args=None ): base_file = os.path.basename(cloud_key) path = os.path.dirname(cloud_key) file_system_client = self.service.get_file_system_client( file_system=bucket_name ) directory_client = file_system_client.get_directory_client(path) file_client = directory_client.get_file_client(base_file) download = file_client.download_file() downloaded_bytes = download.readall() local_file = open(destination_filepath, "wb") local_file.write(downloaded_bytes) local_file.close() if callback is not None: callback(*args)
An implementation for this must provide a way to download a single file.
Args
- bucket_name (str): Target bucket.
- cloud_key (str): The name of the item we want to download from the cloud bucket.
- destination_filepath (str): Where to put the downloaded item.
- prefix (str, optional): Only download files under the matching prefix.
- callback (Callable[[str, str, str, bool], None], optional): Implementations of this method need to call this after the operation is complete. Defaults to None.
Returns
bool: Whether the download was successful or not.
View Source
def get_bucket_keys(self, bucket_name): file_system = self.service.get_file_system_client(file_system=bucket_name) paths = file_system.get_paths() files = [] for path in paths: files.append(path.name) return files
An implementation of this must provide a way to list the contents of a bucket.
Args
- bucket_name (str): Target bucket.
- prefix (str, optional): Only get keys that match this prefix.
- delimiter (str, optional): Set the delimiter, defaults to '/'. i.e, photos/image.jpeg
Returns
Dict[str, BucketKeyMetadata]: Dictionary of key name -> KeyMetadata, i.e.
{ "image.jpeg": { "bytes": 32, "last_modified": 1619195172 }, "file.txt": { "bytes": 32, "last_modified": 1619195172 } }