Coverage for src/artemis_sg/gcloud.py: 25%
61 statements
« prev ^ index » next coverage.py v7.3.1, created at 2024-03-06 10:59 -0800
« prev ^ index » next coverage.py v7.3.1, created at 2024-03-06 10:59 -0800
1#!/usr/bin/env python
2"""artemis_sg.gcloud
4Interface for Google Cloud blobs."""
6import datetime
7import logging
8import os
9import time
10import typing as t
12import puremagic
13from google.cloud import storage
15from artemis_sg.config import CFG
17MODULE = os.path.splitext(os.path.basename(__file__))[0]
20class GCloud:
21 """
22 Object that provides Google Cloud Bucket interaction.
24 :param cloud_key_file:
25 Path of file containing the authentication key for a Google Cloud.
26 :param bucket_name:
27 Name of the Google Cloud Bucket to be used by object instance.
28 """
30 def __init__(self, cloud_key_file: str, bucket_name: str = "default") -> None:
31 # This environ setting needs to stay.
32 os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = cloud_key_file
33 self.storage_client = storage.Client()
34 self.bucket_name = bucket_name
35 self.bucket = self.storage_client.bucket(self.bucket_name)
37 def upload_cloud_blob(self,
38 source_file_path: str,
39 destination_blob_name: str) -> None:
40 """
41 Upload local file to Google Cloud Bucket.
43 :param source_file_path:
44 Path of file to be uploaded to Google Cloud Bucket.
45 :param destination_blob_name:
46 Name of Google Cloud Bucket blob to be saved.
47 """
49 blob = self.bucket.blob(destination_blob_name)
50 blob.upload_from_filename(source_file_path)
52 def generate_cloud_signed_url(self, blob_name: str) -> str:
53 """Generates a v4 signed URL for downloading a blob.
55 Note that this method requires a service account key file. You can not use
56 this if you are using Application Default Credentials from Google Compute
57 Engine or from the Google Cloud SDK.
59 :param blob_name:
60 Name of Google Cloud Bucket blob to obtain URL for.
61 :returns: URL of blob
62 """
64 blob = self.bucket.blob(blob_name)
66 url = blob.generate_signed_url(
67 version="v4",
68 expiration=datetime.timedelta(minutes=30),
69 method="GET",
70 )
72 return url
74 def list_blobs(self, prefix: str) -> t.Iterator[storage.Blob]:
75 """
76 Get Iterator of blobs filtered by prefix.
78 :param prefix:
79 Name of Google Cloud Bucket prefix used to filter blobs
80 :returns: Iterator of matching Blob objects
81 """
83 # FIXME: use page_token
84 # page_token = None
85 blobs = self.storage_client.list_blobs(self.bucket_name, prefix=prefix)
86 return blobs
88 def list_image_blob_names(self, prefix: str) -> t.List[str]:
89 """
90 Get list of image blob names filtered by prefix.
92 :param prefix:
93 Name of Google Cloud Bucket prefix used to filter blobs
94 :returns: List of matching Blob names
95 """
97 blobs = self.list_blobs(prefix)
98 names = []
99 for blob in blobs:
100 if "image" in blob.content_type:
101 names.append(blob.name)
102 return names
105def upload(file_source_dir: str, bucket_prefix: str, cloud_object: GCloud) -> None:
106 """
107 Upload files in source directory to Google Cloud Bucket.
109 :param file_source_dir:
110 Path to directory containing source files to upload.
111 :param bucket_prefix:
112 Name of Google Cloud Bucket prefix used determine storage location.
113 :param cloud_object:
114 Instance of artemis_sg.GCloud to handle API interactions.
115 """
117 namespace = f"{MODULE}.{upload.__name__}"
118 blob_names = cloud_object.list_image_blob_names(bucket_prefix)
119 for filename in os.listdir(file_source_dir):
120 filepath = os.path.join(file_source_dir, filename)
121 if os.path.isfile(filepath):
122 file_blob_name = f"{bucket_prefix}/{filename}"
123 # verify the file is an image, otherwise delete it
124 try:
125 kind = puremagic.from_file(filepath)
126 except puremagic.main.PureError:
127 kind = None
128 if kind not in [".jpg", ".png"]:
129 logging.error(
130 f"{namespace}: Err reading '{filename}', deleting '{filepath}'"
131 )
132 os.remove(filepath)
133 continue
134 # don't upload existing blobs unless the file is new
135 file_age = time.time() - os.path.getmtime(filepath)
136 if (file_blob_name in blob_names
137 and file_age > CFG["google"]["cloud"]["new_threshold_secs"]):
138 logging.info(
139 f"{namespace}: File '{filename}' found in Google Cloud "
140 f"bucket, not uploading."
141 )
142 continue
143 else:
144 logging.info(
145 f"{namespace}: Uploading '{file_blob_name}' to Google Cloud bucket."
146 )
147 cloud_object.upload_cloud_blob(filepath, file_blob_name)
150def main() -> None:
151 """
152 Wrapper for uploading files to Google Cloud Bucket.
153 """
155 file_source_dir = CFG["asg"]["data"]["dir"]["upload_source"]
156 bucket_name = CFG["google"]["cloud"]["bucket"]
157 bucket_prefix = CFG["google"]["cloud"]["bucket_prefix"]
158 cloud_key_file = CFG["google"]["cloud"]["key_file"]
160 cloud_object = GCloud(cloud_key_file=cloud_key_file, bucket_name=bucket_name)
161 upload(file_source_dir, bucket_prefix, cloud_object)
164if __name__ == "__main__":
165 main()