Source code for idmtools.utils.file_parser
"""
File parser utility. Used to automatically load data.
Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
"""
import json
import os
from logging import getLogger
from typing import Dict
import pandas as pd
from io import StringIO, BytesIO
logger = getLogger(__name__)
[docs]class FileParser:
"""
FileParser to load contents in analysis.
"""
[docs] @classmethod
def parse(cls, filename, content=None):
"""
Parse filename and load the content.
Args:
filename: Filename to load
content: Content to load
Returns:
Content loaded
"""
file_extension = os.path.splitext(filename)[1][1:].lower()
content = BytesIO(content)
if file_extension == 'json':
return cls.load_json_file(filename, content)
if file_extension == 'csv':
return cls.load_csv_file(filename, content)
if file_extension == 'xlsx':
return cls.load_xlsx_file(filename, content)
if file_extension == 'txt':
return cls.load_txt_file(filename, content)
if file_extension == 'bin' and 'SpatialReport' in filename:
return cls.load_bin_file(filename, content)
return cls.load_raw_file(filename, content)
[docs] @classmethod
def load_json_file(cls, filename, content) -> Dict:
"""
Load JSON File.
Args:
filename: Filename to load
content: Content
Returns:
JSOn as dict
"""
return json.load(content)
[docs] @classmethod
def load_raw_file(self, filename, content):
"""
Load content raw.
Args:
filename: Filename is none
content: Content to load
Returns:
Content as it was
"""
return content
[docs] @classmethod
def load_csv_file(cls, filename, content) -> pd.DataFrame:
"""
Load csv file.
Args:
filename: Filename to load
content: Content is loading
Returns:
Loaded csv file
"""
if not isinstance(content, StringIO) and not isinstance(content, BytesIO):
content = StringIO(content)
csv_read = pd.read_csv(content, skipinitialspace=True)
return csv_read
[docs] @classmethod
def load_xlsx_file(cls, filename, content) -> Dict[str, pd.ExcelFile]:
"""
Load excel_file.
Args:
filename: Filename to load
content: Content to load
Returns:
Loaded excel file
"""
excel_file = pd.ExcelFile(content)
return {sheet_name: excel_file.parse(sheet_name) for sheet_name in excel_file.sheet_names}
[docs] @classmethod
def load_txt_file(cls, filename, content):
"""
Load text file.
Args:
filename: Filename to load
content: Content to load
Returns:
Content
"""
return str(content.getvalue().decode())
[docs] @classmethod
def load_bin_file(cls, filename, content):
"""
Load a bin file.
Args:
filename: Filename to load
content: Content to load
Returns:
Loaded bin file
Notes:
We should move this to a plugin in emodpy. We need to figure out how to structure that.
"""
try:
from idmtools_platform_comps.utils.spatial_output import SpatialOutput
so = SpatialOutput.from_bytes(content.read(), 'Filtered' in filename)
return so.to_dict()
except ImportError as ex:
logger.exception(ex)
logger.error("Could not import item. Most likely dtk.tools is not installed")