Source code for igem_wikisync.wikisync

import os
import shutil
from hashlib import md5
from http.cookiejar import LWPCookieJar
from pathlib import Path
from datetime import date

import mechanicalsoup
import yaml

from igem_wikisync.browser import iGEM_login, iGEM_upload_file, iGEM_upload_page
from igem_wikisync.parsers import HTMLparser, CSSparser, JSparser
from igem_wikisync.files import HTMLfile, CSSfile, JSfile, OtherFile
from igem_wikisync.logger import logger

# pylint: disable=too-many-instance-attributes, fixme


[docs]def run(team: str, src_dir: str, build_dir: str, year=date.today().year, silence_warnings=False): ''' Runs iGEM-WikiSync and uploads all files to iGEM servers while replacing relative URLs with those on the iGEM server. Mandatory Arguments: team: iGEM Team Name src_dir: Path to the folder where the source files are present build_dir: Path to the folder where the built files will be stored before uploading Optional Arguments: year: Subdomain for igem.org. Current year by default. silence_warnings: Broken link warnings are not printed to console if true. The log still contains everything. ''' # * 1. CHECK AND FORMAT INPUTS if team is None or not isinstance(team, str): logger.critical('Please specify your team name.') raise SystemExit if src_dir is None or not isinstance(src_dir, str): logger.critical('Please specify where your code is stored ' + 'using the src_dir argument.') raise SystemExit if build_dir is None or not isinstance(build_dir, str): logger.critical('Please specify where your code should be temporarily stored ' + 'using the build_dir argument.') raise SystemExit if not isinstance(year, int) or len(str(year)) > 4: logger.critical('Year should be a four digit integer.') raise SystemExit if not isinstance(silence_warnings, bool): logger.critical('silence_warnings must have a boolean value.') raise SystemExit config = { 'team': team, 'src_dir': src_dir, 'build_dir': build_dir, 'year': str(year), 'silence_warnings': silence_warnings } # * 2. Load or create upload_map upload_map = get_upload_map() # * 3. Create build directory if not os.path.isdir(build_dir): os.mkdir(build_dir) # ? error handling here? # * 4. Get iGEM credentials from environment variables credentials = { 'username': os.environ.get('IGEM_USERNAME'), 'password': os.environ.get('IGEM_PASSWORD') } # * 5. Load/create cookie file browser, cookiejar = get_browser_with_cookies() # * 6. Login to iGEM login = iGEM_login(browser, credentials, config) if not login: message = 'Failed to login.' logger.critical(message) raise SystemExit # * 7. Save cookies # TODO: check if this works, might not cookiejar.save() # * 8. Cache files files = cache_files(upload_map, config) # * 9. Upload all assets and create a map uploaded_assets = upload_and_write_assets(files['other'], browser, upload_map, config) # * 10. write upload map just in case # things go wrong while dealing with code write_upload_map(upload_map) # * 11. Build files and upload changed files uploaded_code = build_and_upload(files, browser, config, upload_map) # * 12. Write final upload map write_upload_map(upload_map) print_summary(uploaded_assets, uploaded_code)
[docs]def get_upload_map(): """ Opens existing upload_map.yml or creates and empty upload map. Upload map is a dictionary that contains previously uploaded html, css, js and other files, along with their URLs and hashes. """ if os.path.isfile('upload_map.yml'): try: with open('upload_map.yml', 'r') as file: upload_map = yaml.safe_load(file) except Exception: logger.critical('upload_map.yml exists but could not be opened. Please try again.') raise SystemExit if isinstance(upload_map, type(None)): upload_map = {} # make sure upload map has all the keys for key in ['assets', 'html', 'css', 'js']: if key not in upload_map.keys() or isinstance(upload_map[key], type(None)): upload_map[key] = {} elif not isinstance(upload_map[key], dict): logger.critical('upload_map.yml has an invalid format.') logger.critical('Please fix/delete the file and run the program again.') raise SystemExit return upload_map else: return { 'assets': {}, 'html': {}, 'css': {}, 'js': {} }
[docs]def write_upload_map(upload_map: dict, filename='upload_map.yml'): """ Writes upload map to file. """ try: with open(filename, 'w') as file: yaml.dump(upload_map, file, sort_keys=True) except Exception: logger.error(f'Tried to write {filename} but could not.') # FIXME Can this be improved? return False return True
[docs]def get_browser_with_cookies(): """ Creates a mechanicalsoup.StatefulBrowser() instance with cookies loaded from file, if exists. Returns: browser: mechanicalsoup.StatefulBrowser() instance cookiejar: browser cookiejar that can be saved after logging in """ cookie_file = 'wikisync.cookies' cookiejar = LWPCookieJar(cookie_file) if os.path.exists(cookie_file): try: cookiejar.load() # in case file is empty except Exception: pass browser = mechanicalsoup.StatefulBrowser() # ? error handling here? browser.set_cookiejar(cookiejar) return browser, cookiejar
[docs]def cache_files(upload_map, config): """ Loads filenames into memory, along with setting up appropriate objects to generate URLs and hashes as required. Arguments: upload_map: custom upload map config: configuration for this run Returns: cache: dictionary with html, css, js and other file objects """ cache = { 'html': {}, 'css': {}, 'js': {}, 'other': {} } # for each file in src_dir for root, _, files in os.walk(config['src_dir']): for filename in files: # Store path and extension infile = (Path(root) / Path(filename)).relative_to(config['src_dir']) extension = infile.suffix[1:].lower() # create appropriate file object # file objects contain corresponding paths and URLs if extension == 'html': file_object = HTMLfile(infile, config) cache['html'][file_object.path] = file_object elif extension == 'css': file_object = CSSfile(infile, config) cache['css'][file_object.path] = file_object elif extension == 'js': file_object = JSfile(infile, config) cache['js'][file_object.path] = file_object elif extension.lower() in ['png', 'gif', 'jpg', 'jpeg', 'pdf', 'ppt', 'txt', 'zip', 'mp3', 'mp4', 'webm', 'mov', 'swf', 'xls', 'xlsx', 'docx', 'pptx', 'csv', 'm', 'ogg', 'gb', 'tif', 'tiff', 'fcs', 'otf', 'eot', 'ttf', 'woff', 'svg']: # make sure file path start with 'assets' if len(str(infile)) < 7 or str(infile)[:7] != 'assets/': logger.error(f'{infile} is an {extension} file outside the "assets" folder. Skipping.') continue # make sure file size is within limits elif (config['src_dir'] / infile).stat().st_size >= 1000000: logger.error(f'{infile} is larger than the 100MB file limit. Skipping.') continue # create OtherFile else: file_object = OtherFile(infile, config) if len(file_object.upload_filename) < 240: cache['other'][file_object.path] = file_object else: logger.error(f'{infile}: Upload filename too large. Skipping.') logger.error('Please do not nest assets too deep and take a look at our docs to see how WikiSync renames files.') continue else: logger.error(f'{infile} has an unsupported file extension. Skipping.') continue if extension in ['html', 'css', 'js']: if str(file_object.path) not in upload_map[extension].keys(): upload_map[extension][str(file_object.path)] = { 'md5': '', 'link_URL': file_object.link_URL } return cache
[docs]def upload_and_write_assets(other_files, browser, upload_map, config): """" Uploads and writes all files and stores URLs in upload_map. Arguments: other_files: dictionary containing OtherFile objects browser: mechanicalsoup.StatefulBrowser instance upload_map: custom upload map config: custom configuration options Returns: Number of files uploaded Raises: SystemExit on failure """ # count the number of files uploaded counter = 0 # files have to be uploaded before everything else because # the URLs iGEM assigns are random for path in other_files.keys(): file_object = other_files[path] # flag to see if file has already been uploaded uploaded = False # check if the file has already been uploaded for asset_path in upload_map['assets'].keys(): # if current path matches stored path if asset_path == str(path): asset = upload_map['assets'][asset_path] # and the md5 hash is also the same if file_object.md5_hash == asset['md5']: # the file has already been uploaded uploaded = True break else: # the file path matches, but the md5 hash doesn't # this means the file has changed uploaded = False break # if new file if not uploaded: # write to build_dir try: # create directory if doesn't exist if not os.path.isdir(file_object.build_path.parent): os.makedirs(file_object.build_path.parent) shutil.copyfile(file_object.src_path, file_object.build_path.parent / file_object.upload_filename) except Exception: # print upload map to save the current state write_upload_map(upload_map) message = f'Failed to write {str(file_object.path)} to build_dir. ' + \ 'The current upload map has been saved. ' + \ 'You will not have to upload everything again.' logger.debug(message, exc_info=True) logger.critical(message) raise SystemExit successful = iGEM_upload_file(browser, file_object, config['year']) if not successful: # print upload map to save the current state write_upload_map(upload_map) message = f'Failed to upload {str(file_object.path)}. ' message += 'The current upload map has been saved. ' message += 'You will not have to upload everything again.' logger.debug(message, exc_info=True) logger.critical(message) raise SystemExit else: counter += 1 if str(path) in upload_map['assets'].keys(): upload_map['assets'][str(path)]['md5'] = file_object.md5_hash upload_map['assets'][str(path)]['link_URL'] = file_object.link_URL else: upload_map['assets'][str(path)] = { 'link_URL': file_object.link_URL, 'md5': file_object.md5_hash, 'upload_filename': file_object.upload_filename } return counter
[docs]def build_and_upload(files, browser, config, upload_map): """ Replaces URLs in files and uploads changed files. Arguments: files: Custom file cache browser: mechanicalsoup.StatefulBrowser instance config: Configuration for this run upload_map: custom upload map Returns: Dictionary with no. of 'html', 'css' and 'js' files uploaded """ counter = { 'html': 0, 'css': 0, 'js': 0, } for file_dictionary in [files['html'], files['css'], files['js']]: for path in file_dictionary.keys(): file_object = file_dictionary[path] path_str = str(file_object.path) ext = file_object.extension # open file try: with open(file_object.src_path, 'r') as file: contents = file.read() except Exception: message = f'Could not open/read {file_object.path}. Skipping.' logger.error(message) continue # FIXME Can this be improved? processed = None # just so the linter doesn't freak out # parse and modify contents if ext == 'html': processed = HTMLparser( config, file_object.path, contents, upload_map) elif ext == 'css': processed = CSSparser( config, file_object.path, contents, upload_map) elif ext == 'js': processed = JSparser(contents) # calculate and store md5 hash of the modified contents build_hash = md5(processed.encode('utf-8')).hexdigest() if upload_map[ext][path_str]['md5'] == build_hash: message = f'Contents of {file_object.path} have been uploaded previously. Skipping.' logger.info(message) else: upload_map[ext][path_str]['md5'] = build_hash build_path = file_object.build_path try: # create directory if doesn't exist if not os.path.isdir(build_path.parent): os.makedirs(build_path.parent) # and write the processed contents with open(build_path, 'w') as file: file.write(processed) except Exception: message = f"Couldn not write {str(file_object.build_path)}. Skipping." logger.error(message) continue # FIXME Can this be improved? # upload successful = iGEM_upload_page(browser, processed, file_object.upload_URL) if not successful: message = f'Could not upload {str(file_object.path)}. Skipping.' logger.error(message) continue # FIXME Can this be improved? else: counter[ext] += 1 return counter
def print_summary(assets, code): total_count = assets + code['html'] + code['css'] + code['js'] if total_count == 0: print('WikiSync did not find any changes from the previous run. No files were uploaded.') elif total_count == assets: print(f"Done! Successfully uploaded {assets} assets.") elif total_count == code['html']: print(f"Done! Successfully uploaded {code['html']} HTML files.") elif total_count == code['css']: print(f"Done! Successfully uploaded {code['css']} stylesheets.") elif total_count == code['js']: print(f"Done! Successfully uploaded {code['js']} JS scripts.") else: print(f"Done! Successfully uploaded:") if assets != 0: print(f" {assets} assets") if code['html'] != 0: print(f" {code['html']} HTML files") if code['css'] != 0: print(f" {code['css']} stylesheets") if code['js'] != 0: print(f" {code['js']} JS scripts") print("Please look at the log for more details.")