Source code for COMPS.utils.create_asset_collection
import os
import logging
from hashlib import md5
from COMPS import Client
from COMPS.Data import AssetCollection, AssetCollectionFile
logger = logging.getLogger(__name__)
##########################
utility_metadata = {
'aliases': [ 'createac' ],
'help': 'Create an asset collection from a local directory',
'description': 'This utility creates an asset collection from all the files in the specified directory, mirroring ' +
'the directory hierarchy. Any files not already known to COMPS will be automatically identified ' +
'and uploaded; files already known will not be uploaded again, but just referenced by checksum ' +
'during asset collection creation (consequently, time to run can vary widely depending on count ' +
'and size of new files being uploaded). When calling from script, you can also provide an optional ' +
'parameter to control which files to include/exclude.',
'epilog': '''examples:
%(prog)s c:\\path\\to\\create\\asset\\collection\\from
'''
}
[docs]def fill_parser(p):
p.add_argument('asset_collection_dir', help='The path to the directory to generate an asset collection from')
p.add_argument('--name', '-n', default='', help='Name for the asset collection (default is the target directory name)')
##########################
[docs]def create_asset_collection(path_to_ac, ac_name, include=lambda fn, rp: fn not in ['idmtools.log', 'COMPS_log.log'] ):
path_to_ac = os.path.normpath(path_to_ac)
if not os.path.exists(path_to_ac) or not os.path.isdir(path_to_ac):
raise RuntimeError('Path \'{0}\' doesn\'t exist or is not a directory'.format(path_to_ac))
tags = {
'Name': ac_name if ac_name else os.path.basename(path_to_ac)
}
ac = AssetCollection()
ac.set_tags(tags)
# First try creating it without uploading any files (just by md5sum)
for (dirpath, dirnames, filenames) in os.walk(path_to_ac):
for fn in filenames:
rp = os.path.relpath(dirpath, path_to_ac) if dirpath != path_to_ac else ''
if not include(fn, rp):
continue
logger.info('Adding {0}'.format(os.path.join(rp, fn)))
with open(os.path.join(dirpath, fn), 'rb') as f:
md5calc = md5()
while True:
datachunk = f.read(8192)
if not datachunk:
break
md5calc.update(datachunk)
md5_checksum_str = md5calc.hexdigest()
acf = AssetCollectionFile(fn, rp, md5_checksum=md5_checksum_str, tags={'Executable':None} if os.path.splitext(fn)[1] == '.exe' else None)
ac.add_asset(acf)
missing_files = ac.save(return_missing_files=True)
# If COMPS responds that we're missing some files, then try creating it again,
# uploading only the files that COMPS doesn't already have.
if missing_files:
logger.info(f'Uploading {len(missing_files)} missing file{"s" if len(missing_files) > 1 else ""}')
logger.debug('Missing files: [' + ','.join([ str(u) for u in missing_files]) + ']')
ac2 = AssetCollection()
ac2.set_tags(tags)
for acf in ac.assets:
if acf.md5_checksum in missing_files:
rp = acf.relative_path
fn = acf.file_name
acf2 = AssetCollectionFile(fn, rp, tags=acf.tags)
ac2.add_asset(acf2, os.path.join(path_to_ac, rp, fn))
else:
ac2.add_asset(acf)
ac2.save()
ac = ac2
logger.info('')
logger.info('Done - created AC ' + str(ac.id))
return ac.id
[docs]def main(args):
Client.login(args.comps_server)
create_asset_collection(args.asset_collection_dir, args.name)