Source code for idmtools.utils.gitrepo

"""
Utilities for getting information and examples from gitrepos.

Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
"""
import os
import sys
import json
from logging import getLogger
import requests
import urllib.request
from click import secho
from dataclasses import dataclass, field
logger = getLogger(__name__)
user_logger = getLogger('user')

REPO_OWNER = 'institutefordiseasemodeling'
REPO_NAME = 'idmtools'
GITHUB_HOME = 'https://github.com'
GITHUB_API_HOME = 'https://api.github.com'


[docs]@dataclass class GitRepo: """ GitRepo allows interaction with remote git repos, mainly for examples. """ repo_owner: str = field(default=None) repo_name: str = field(default=None) _branch: str = field(default='main', init=False, repr=False) _path: str = field(default='', init=False, repr=False) _verbose: bool = field(default=False, init=False, repr=False) def __post_init__(self): """ Initialize GitRepo. If repo_owner or repo_name is None, the defaults REPO_OWNER and REPO_NAME Returns: None """ self.repo_owner = self.repo_owner or REPO_OWNER self.repo_name = self.repo_name or REPO_NAME @property def path(self): """ Path property. Returns: Return path property """ return self._path @property def branch(self): """ Branch property. Returns: Return branch property """ return self._branch @property def verbose(self): """ Return verbose property. Returns: Return verbose property """ return self._verbose @property def repo_home_url(self): """ Construct repo home url. Returns: repo home url """ return f'{GITHUB_HOME}/{self.repo_owner}/{self.repo_name}' @property def repo_example_url(self): """ Construct repo example url. Returns: repo example url """ return f'{self.repo_home_url}/tree/{self._branch}/{self._path}' @property def api_example_url(self): """ Construct api url of the examples for download. Returns: api url """ return f'{GITHUB_API_HOME}/repos/{self.repo_owner}/{self.repo_name}/contents/{self._path}?ref={self._branch}'
[docs] def parse_url(self, url: str, branch: str = None, update: bool = True): """ Parse url for owner, repo, branch and example path. Args: url: example url branch: user branch to replace the branch in url update: True/False - update repo or not Returns: None """ default_branch = 'main' ex_text = 'Please Verify URL Format: \nhttps://github.com/<owner>/<repo>/(tree|blob)/<branch>/<path>\nor\nhttps://github.com/<owner>/<repo>/' example_url = url.lower().strip().rstrip('/') url_chunks = example_url.replace(f'{GITHUB_HOME}/', '').split('/') if len(url_chunks) < 2 or (len(url_chunks) >= 3 and url_chunks[2] not in ['tree', 'blob']): raise Exception(f'Your Example URL: {url}\n{ex_text}') repo_owner = url_chunks[0] repo_name = url_chunks[1] if len(url_chunks) <= 3: _branch = branch if branch else default_branch _path = '' else: _branch = branch if branch else url_chunks[3] if url_chunks[3] else default_branch _path = '/'.join(url_chunks[4:]) if update: self.repo_owner = repo_owner self.repo_name = repo_name self._branch = _branch self._path = _path else: return {'repo_owner': repo_owner, 'repo_name': repo_name, 'branch': _branch, 'path': _path}
[docs] def list_public_repos(self, repo_owner: str = None, page: int = 1, raw: bool = False): """ Utility method to retrieve all public repos. Args: repo_owner: the owner of the repo page: pagination of results raw: bool - return rwo data or simplified list Returns: repo list """ # build api url api_url = f'{GITHUB_API_HOME}/users/{repo_owner if repo_owner else self.repo_owner}/repos' if page: api_url = f'{api_url}?page={page}' resp = requests.get(api_url) if resp.status_code != 200: raise Exception(f'Failed to access: {api_url}') # get repos as json repo_list = resp.json() if raw: return repo_list else: return [r['full_name'] for r in repo_list]
[docs] def list_repo_releases(self, repo_owner: str = None, repo_name: str = None, raw: bool = False): """ Utility method to retrieve all releases of the repo. Args: repo_owner: the owner of the repo repo_name: the name of repo raw: bool - return raw data or simplified list Returns: the release list of the repo """ # build api url api_url = f'{GITHUB_API_HOME}/repos/{repo_owner if repo_owner else self.repo_owner}/{repo_name if repo_name else self.repo_name}/releases' # make api call resp = requests.get(api_url) if resp.status_code != 200: raise Exception(f'Failed to access: {api_url}') # get repos as json repo_list = resp.json() if raw: return repo_list else: return [f"{r['tag_name']} at {r['published_at']}" for r in repo_list]
[docs] def download(self, path: str = '', output_dir: str = "./", branch: str = 'main') -> int: """ Download files with example url provided. Args: path: local file path to the repo output_dir: user local folder to download files to branch: specify branch for files download from Returns: total file count downloaded """ if path.startswith('https://'): self.parse_url(path) else: self._path = path self._branch = branch if not os.path.exists(output_dir): raise Exception(f"output_dir does not exist: {output_dir}") # First time display download url and local destination info if self.verbose: user_logger.info(f'Download Examples From: {self.repo_example_url}') user_logger.info(f'Local Destination: {os.path.abspath(output_dir)}') user_logger.info('Processing...') self._verbose = False try: opener = urllib.request.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] urllib.request.install_opener(opener) response = urllib.request.urlretrieve(self.api_example_url) except KeyboardInterrupt: # when CTRL+C is pressed during the execution of this script, # bring the cursor to the beginning, erase the current line, and dont make a new line user_logger.error("✘ Got interrupted") sys.exit() except Exception as ex: secho(f'Failed to access: {self.api_example_url}', fg="yellow") logger.exception(ex) exit(1) download_dir = os.path.join(output_dir, self.repo_name) # total files count total_files = 0 with open(response[0], "r") as f: data = json.load(f) if isinstance(data, dict) and data["type"] == "file": # create folder when necessary path = data["path"] os.makedirs(os.path.dirname(os.path.join(download_dir, path)), exist_ok=True) try: # download the file opener = urllib.request.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] urllib.request.install_opener(opener) urllib.request.urlretrieve(data["download_url"], os.path.join(download_dir, path)) return 1 except KeyboardInterrupt: # when CTRL+C is pressed during the execution of this script, # bring the cursor to the beginning, erase the current line, and dont make a new line user_logger.error("✘ Got interrupted", ) sys.exit() except Exception as ex: secho(f'Failed to access: {self.api_example_url}', fg="yellow") user_logger.error(ex) exit(1) total_files += len([f for f in data if f['type'] == 'file']) for file in data: file_url = file["download_url"] path = file["path"] # create folder when necessary os.makedirs(os.path.dirname(os.path.join(download_dir, path)), exist_ok=True) if file_url is not None: try: # download the file opener = urllib.request.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] urllib.request.install_opener(opener) urllib.request.urlretrieve(file_url, os.path.join(download_dir, path)) except KeyboardInterrupt: # when CTRL+C is pressed during the execution of this script, # bring the cursor to the beginning, erase the current line, and dont make a new line user_logger.error("✘ Got interrupted", ) sys.exit() else: total_files += self.download(path, output_dir, branch) return total_files
[docs] def peep(self, path: str = '', branch: str = 'main'): """ Download files with example url provided. Args: path: local file path to the repo branch: specify branch for files download from Returns: None """ if path.startswith('https://'): repo_meta = self.parse_url(path, branch, False) else: self._path = path self._branch = branch repo_meta = {'repo_owner': self.repo_owner, 'repo_name': self.repo_name, 'branch': branch or self.branch, 'path': path or self.path} try: api_example_url = f"{GITHUB_API_HOME}/repos/{repo_meta['repo_owner']}/{repo_meta['repo_name']}/contents/{repo_meta['path']}?ref={repo_meta['branch']}" opener = urllib.request.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] urllib.request.install_opener(opener) response = urllib.request.urlretrieve(api_example_url) except KeyboardInterrupt: # when CTRL+C is pressed during the execution of this script, # bring the cursor to the beginning, erase the current line, and dont make a new line user_logger.error("✘ Got interrupted") sys.exit() result = [] with open(response[0], "r") as f: data = json.load(f) if isinstance(data, dict): d = {'type': data['type'], 'name': data['path'], 'path': data['path'], 'html_url': data['html_url']} result.append(d) else: for file in data: d = {'type': file['type'], 'name': file['name'], 'path': file['path'], 'html_url': file['html_url']} result.append(d) return result