#!/usr/bin/env python3
"""
Weather set module is implementing functionality for working with sets of weather files.
"""
from __future__ import annotations
import pandas as pd
from pathlib import Path
from typing import Dict, List, NoReturn, Tuple, Union
from emodpy_malaria.weather.weather_utils import make_path
from emodpy_malaria.weather.weather_variable import WeatherVariable
from emodpy_malaria.weather.weather_metadata import WeatherAttributes
from emodpy_malaria.weather.weather_data import WeatherData, DataFrameInfo
[docs]
class WeatherSet:
"""
Representation of a set of weather files required by EMOD, for all or a subset of weather variables.
Automate tasks for working with multiple weather files using WeatherData and WeatherMetadata objects.
WeatherSet contains a dictionary of weather variables to WeatherData and WeatherMetadata objects.
Supports:
1. Conversion from/to csv, dataframe (from_csv, to_csv, from_dataframe, to_dataframe)
2. Conversion from/to EMOD weather files, .bin and .bin.json (from_file, to_file)
"""
def __init__(self,
dir_path: Union[str, Path] = None,
file_names: Dict[WeatherVariable, str] = None,
weather_columns: Dict[WeatherVariable, str] = None):
"""
Initializes a WeatherSet object.
Args:
dir_path: (Optional) Path to the directory containing weather files.
file_names: (Optional) Dictionary of weather variables (keys) and file names (values).
weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".
"""
self._dir_path: Union[str, Path] = dir_path
self._file_names: Dict[WeatherVariable, str] = file_names or {}
self._weather_columns: Dict[WeatherVariable, str] = weather_columns or {}
self._weather_dict: Dict[WeatherVariable, WeatherData] = {}
# Dictionary methods
def __getitem__(self, weather_variable: WeatherVariable):
"""Getter method for the weather dictionary, to return WeatherData object for the given weather variable."""
return self._weather_dict[weather_variable]
def __setitem__(self, weather_variable: WeatherVariable, weather_object: WeatherData):
"""Setter method for the weather dictionary, to set WeatherData object for the given weather variable."""
self._weather_dict[weather_variable] = weather_object
def __len__(self):
"""Method to return the number of items in the weather dictionary."""
return len(self._weather_dict)
def __str__(self):
"""String representation used to print or debug WeatherSet objects."""
return str(self.weather_variables)
def __eq__(self, other: WeatherSet):
"""Equality operator for WeatherSet objects"""
if self.weather_variables != other.weather_variables:
return False
data_eq = [self[v] == other[v] for v in self.weather_variables]
return all(data_eq)
[docs]
def keys(self):
"""Returns the list of WeatherVariables."""
return self._weather_dict.keys()
[docs]
def values(self) -> List[WeatherData]:
"""Returns the list of WeatherData objects."""
return list(self._weather_dict.values())
[docs]
def items(self) -> Dict[WeatherVariable, WeatherData].items:
"""Returns an iterator for weather dictionary items."""
return self._weather_dict.items()
# Properties
@property
def dir_path(self) -> str:
"""Directory path containing weather files."""
return str(self._dir_path)
@property
def file_names(self) -> Dict[WeatherVariable, str]:
"""Dictionary of weather variables (keys) and weather file names (values)."""
return self._file_names
@property
def attributes(self) -> WeatherAttributes:
if len(self.weather_variables) > 0: # if any extract WeatherAttributes (common to all)
wa = self.values()[0].metadata.attributes
else:
wa = None
return wa
@property
def weather_variables(self) -> List[WeatherVariable]:
"""The list of weather variables the weather set covers."""
return list(self._weather_dict)
@property
def weather_columns(self) -> Dict[WeatherVariable, str]:
"""The list of weather columns."""
return self._weather_columns
# Export/import
[docs]
@classmethod
def from_dataframe(cls,
df: pd.DateFrame,
node_column: str = None,
step_column: str = None,
weather_columns: Dict[WeatherVariable, str] = None,
attributes: WeatherAttributes = None) -> WeatherSet:
"""
Initializes WeatherSet object from a dataframe containing weather time series.
The dataframe must have node ids, step and weather columns.
Args:
df: Dataframe containing weather data.
node_column: (Optional) Column containing node ids. The default is "nodes".
step_column: (Optional) Column containing node index for weather time series values. The default is "steps".
weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".
attributes: (Optional) Weather attribute object containing metadata for WeatherMetadata object.
Returns:
WeatherSet object.
"""
assert isinstance(df, pd.DataFrame), f"Unsupported dataframe argument type {type(df)}."
args = {k: v for k, v in locals().items() if k not in ["cls", "df"]}
args["data_csv"] = df
return cls._from_csv_data(**args)
[docs]
@classmethod
def from_csv(cls,
file_path: Union[str, Path],
node_column: str = None,
step_column: str = None,
weather_columns: Dict[WeatherVariable, str] = None,
attributes: WeatherAttributes = None) -> WeatherSet:
"""
Initializes WeatherSet object from a dataframe containing weather time series.
The csv file must have node ids, step and weather columns.
Args:
file_path: The csv file path.
node_column: (Optional) Column containing node ids. The default is "nodes".
step_column: (Optional) Column containing node index for weather time series values. The default is "steps".
weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".
attributes: (Optional) The weather attribute object containing metadata for WeatherMetadata object.
Returns:
WeatherSet object.
"""
assert Path(file_path).is_file(), f"The csv file not found: {str(file_path)}."
args = {k: v for k, v in locals().items() if k not in ["cls", "file_path"]}
args["data_csv"] = str(file_path)
return cls._from_csv_data(**args)
@classmethod
def _from_csv_data(cls,
data_csv: Union[str, pd.DataFrame],
node_column: str = None,
step_column: str = None,
weather_columns: Dict[WeatherVariable, str] = None,
attributes: WeatherAttributes = None) -> WeatherSet:
"""
Creates WeatherSet from a csv file or dataframe by instantiating WeatherData object for each weather variable.
Column arguments are used to interpret input file/dataframe. Weather attribute argument is used for
instantiating weather metadata objects.
Args:
data_csv: Dataframe or a csv file containing weather time series.
node_column: (Optional) Column containing node ids. The default is "nodes".
step_column: (Optional) Column containing node index for weather time series values. The default is "steps". The default is "steps".
weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".
attributes: (Optional) The weather attribute object containing metadata for WeatherMetadata object.
Returns:
WeatherSet object.
"""
# Obtain dataframe info objects, to name dataframe columns
infos, weather_columns = cls._init_dataframe_info_dict(node_column, step_column, weather_columns)
# Construct the final weather column dictionary (relevant if weather_columns was None or None column names)
attributes = attributes or WeatherAttributes()
ws = WeatherSet(weather_columns=weather_columns)
for v, info in infos.items():
if isinstance(data_csv, str):
ws[v] = WeatherData.from_csv(file_path=data_csv, info=info, attributes=attributes)
elif isinstance(data_csv, pd.DataFrame):
ws[v] = WeatherData.from_dataframe(df=data_csv, info=info, attributes=attributes)
else:
raise TypeError(f"Unsupported argument type {type(data_csv)}. Only string or dataframe are expected.")
ws.validate()
return ws
[docs]
def to_dataframe(self,
node_column: str = None,
step_column: str = None,
weather_columns: Dict[WeatherVariable, str] = None) -> pd.DataFrame:
"""
Creates a dataframe containing node ids, time steps and weather columns.
Args:
node_column: (Optional) Column containing node ids. The default is "nodes".
step_column: (Optional) Column containing node index for weather time series values. The default is "steps".
weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".
Returns:
Dataframe containing node ids and weather time series.
"""
# If no columns, init keys to filter variables
weather_columns = weather_columns or {v: None for v in self.weather_variables}
not_available = [v for v in weather_columns if v.value not in [w.value for w in self.weather_variables]]
if len(not_available) > 0:
raise ValueError(f"weather_columns contain unavailable weather variables: {not_available}")
# Obtain dataframe info objects, to name dataframe columns
infos, weather_columns = self._init_dataframe_info_dict(node_column, step_column, weather_columns)
self._weather_columns = weather_columns
df = None # used to collect all weather columns in a single df
for v in infos: # for each dataframe info (weather variable)
df2 = self[v].to_dataframe(infos[v]) # get dataframe for current weather variable
if df is None: # if first iteration
df = df2 # init outer dataframe
else: # if 2nd or higher iteration
col = infos[v].value_column # take column name
df[col] = df2[col] # add weather column to the outer dataframe
return df
[docs]
def to_csv(self,
file_path: Union[str, Path],
node_column: str = None,
step_column: str = None,
weather_columns: Dict[WeatherVariable, str] = None) -> pd.DataFrame:
"""
Creates a csv file containing node ids, time steps and weather columns.
Args:
file_path: The path of a csv file to be generated.
node_column: (Optional) Column containing node ids. The default is "nodes".
step_column: (Optional) Column containing node index for weather time series values. The default is "steps".
weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".
Returns:
Dataframe containing node ids and weather time series, used to create the csv file.
"""
df = self.to_dataframe(node_column, step_column, weather_columns)
df.to_csv(file_path, index=False)
return df
# Save/load DTK files
def _load(self) -> WeatherSet:
"""Loads weather files based on weather set attributes."""
assert self.dir_path and Path(self.dir_path).is_dir(), "A valid dir is a required argument."
assert isinstance(self.file_names, Dict) and len(self.file_names) > 0, "File names dictionary is required."
for v, n in self.file_names.items():
bin_path = self._weather_file_path(n)
self[v] = WeatherData.from_file(bin_path)
self.validate()
return self
def _save(self) -> NoReturn:
"""Saves weather data and metadata into weather files based on weather set attributes."""
assert self._dir_path, "Directory is a required argument."
assert self._file_names and len(self._file_names) > 0, "File names are required."
make_path(self._dir_path)
for v, wd in self._weather_dict.items():
bin_path = self._weather_file_path(self._file_names[v])
wd.to_file(bin_path)
wd.metadata.to_file(f"{bin_path}.json")
[docs]
@classmethod
def from_files(cls,
dir_path: Union[str, Path],
prefix: str = "",
file_names: Dict[WeatherVariable, str] = None) -> WeatherSet:
"""
Instantiates WeatherSet from to weather files which paths are determined based on given arguments.
Args:
dir_path: Directory path containing weather files.
prefix: Weather files prefix, e.g. "dtk_15arcmin\_"
file_names: Dictionary of weather variables (keys) and weather .bin file names (values).
Returns:
WeatherSet object.
"""
WeatherVariable.validate_types(file_names, [str, Path])
file_names = file_names or cls.select_weather_files(dir_path=dir_path, prefix=prefix)
ws = WeatherSet(dir_path=dir_path, file_names=file_names)
ws._load()
return ws
[docs]
def to_files(self,
dir_path: Union[str, Path],
file_names: Dict[WeatherVariable, str] = None) -> NoReturn:
"""Saves WeatherSet to weather files which paths are determined based on given arguments."""
file_names = file_names or self.make_file_paths()
self._dir_path = Path(dir_path)
self._file_names = file_names
self._save()
# Helpers
@classmethod
def _init_weather_columns(cls, weather_columns: Dict[WeatherVariable, Union[str, None]] = None
) -> Dict[WeatherVariable, str]:
"""
Initializes a weather_columns dictionary from defaults or a partially populated weather_columns dictionary.
The following cases are supported in respect to weather_columns argument:
- all columns names are specified -> returns unchanged weather_columns
- some columns names are None: column names are set to WeatherVariable values.
- weather_columns is None: all weather columns are set to WeatherVariable values.
Args:
weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".
Returns:
Dictionary of weather variables (keys) to weather column names (values).
"""
WeatherVariable.validate_types(weather_columns, [str, None])
# Get the list of weather variables - keys from weather_columns or all weather variables
weather_variables = list(weather_columns) if weather_columns else WeatherVariable.list()
# If not provided set to empty dict - this will make the following line set WeatherVariable values as defaults.
weather_columns = weather_columns or {}
# Transform or construct weather_columns dictionary and fill in missing column names with defaults.
weather_columns = {v: weather_columns.get(v, None) or v.value for v in weather_variables}
return weather_columns
@classmethod
def _init_dataframe_info_dict(cls,
node_column: str = None,
step_column: str = None,
weather_columns: Dict[WeatherVariable, str] = None
) -> Tuple[Dict[WeatherVariable, DataFrameInfo], Dict[WeatherVariable, str]]:
"""
Initializes dataframe info objects containing column names.
Args:
node_column: (Optional) Column containing node ids. The default is "nodes".
step_column: (Optional) Column containing node index for weather time series values. The default is "steps".
weather_columns: (Optional) Dictionary of weather variables (keys) and weather column names (values).
Defaults are WeatherVariables values are used: "airtemp", "humidity", "rainfall", "landtemp".
Returns:
Tuple of two dictionaries mapping weather variables to dataframe info and weather columns.
"""
weather_columns = cls._init_weather_columns(weather_columns)
info_dict = {}
for v in weather_columns:
info_dict[v] = DataFrameInfo(node_column=node_column,
step_column=step_column,
value_column=weather_columns[v])
return info_dict, weather_columns
@classmethod
def _make_file_templates(cls,
prefix: str = "*",
suffix: str = "*{}*.bin",
weather_variables: List[WeatherVariable] = None,
weather_names: Dict[WeatherVariable, str] = None) -> Dict[WeatherVariable, str]:
"""
Construct file name templates using weather file name prefix/suffix and weather variable names.
The logic of this method is the same as of "Path.glob" method, with two adjustments, added to make its use more
convenient for working with weather files:
- if prefix/suffix are not specified, defaults are used (see method arguments).
- if suffix doesn't end with ".bin" or "\*", "\*.bin" is added (since, otherwise, no matches can be found).
Used for two scenarios:
1. Get expected weather file patsh.
2. Select weather files from a dir, when exact names are not known, e.g. Path.glob("dtk_\*{tag}\*.bin").
Args:
prefix: (Optional) Weather file name prefix, usually a fixed string like "dtk\_".
suffix: (Optional) Weather file name suffix, usually containing a weather variable name parameter like "\*{tag}\*.bin").
weather_names: (Optional) Dictionary of weather variables (keys) and custom weather variable names (values).
weather_variables: (Optional) Weather variables to be used in case custom weather names are not specified.
In this case lowercase weather variable names are used, for example: AIR_TEMPERATURE -> air_temperature.
Returns:
Dictionary of weather variables (keys) and weather file name templates.
For example, air temperature could be represented as:
- exact name: WeatherVariable.AIR_TEMPERATURE: "dtk\_15arcmin\_air\_temperature\_daily.bin" or
- name pattern: WeatherVariable.AIR_TEMPERATURE: "dtk\_\*air_temperature\*.bin"
"""
# Validate arguments
if prefix is None:
raise ValueError("Prefix cannot be None.")
if suffix is None:
raise ValueError("fFile pattern cannot be None.")
WeatherVariable.validate_types(weather_names, [str])
is_ok = weather_variables is None or isinstance(weather_variables, List) and len(weather_variables) > 0
assert is_ok, "If specified weather variables must be a nonempty list."
# Append *.bin if missing
if not suffix.endswith(".bin") and not suffix.endswith("*"):
suffix += "*.bin"
template = prefix + suffix
template = template.replace("**", "*")
# Init default weather name dictionary, if not provided.
weather_variables = weather_variables or WeatherVariable.list()
weather_names = weather_names or {v: v.name.lower() for v in weather_variables}
names = {}
# Create dictionary of weather variable and file name templates
for v, t in weather_names.items():
names[v] = template.format(weather_names[v])
return names
[docs]
@classmethod
def make_file_paths(cls,
dir_path: Union[str, Path] = None,
prefix: str = "dtk_15arcmin_",
suffix: str = "{}_daily.bin",
weather_variables: List[WeatherVariable] = None,
weather_names: Dict[WeatherVariable, str] = None) -> Dict[WeatherVariable, str]:
"""
Construct file paths using the weather directory path, file name prefix/suffix and weather variable names.
The logic of this method is the same as of "Path.glob" method, with two adjustments, added to make its use more
convenient for working with weather files:
- if prefix/suffix are not specified, defaults are used (see method arguments).
- if suffix doesn't end with ".bin" or "\*", "\*.bin" is added (since, otherwise, no matches can be found).
Args:
dir_path: (Optional) Directory path containing weather files.
prefix: (Optional) Weather file name prefix, usually a fixed string like "dtk\_".
suffix: (Optional) Weather file name suffix, usually containing a weather variable name parameter like "\*{tag}\*.bin").
weather_names: (Optional) Dictionary of weather variables (keys) and custom weather variable names (values).
weather_variables: (Optional) Weather variables to be used in case custom weather names are not specified.
In this case lowercase weather variable names are used, for example: AIR_TEMPERATURE -> air_temperature.
Returns:
Dictionary of weather variables (keys) and weather file paths.
For example, air temperature could be represented as:
WeatherVariable.AIR_TEMPERATURE: "dtk_15arcmin_air_temperature_daily.bin"
"""
names = cls._make_file_templates(prefix=prefix,
suffix=suffix,
weather_names=weather_names,
weather_variables=weather_variables)
if dir_path is not None:
names = {v: str(Path(dir_path).joinpath(n)) for v, n in names.items()}
return names
[docs]
@classmethod
def select_weather_files(cls,
dir_path: Union[str, Path],
prefix: str = "*",
suffix: str = "*{}*.bin",
weather_variables: List[WeatherVariable] = None,
weather_names: Dict[WeatherVariable, str] = None) -> Dict[WeatherVariable, str]:
"""
Select a set of weather files using the weather directory path, file name prefix/suffix and weather variable names.
The logic of this method is the same as of "Path.glob" method, with two adjustments, added to make its use more
convenient for working with weather files:
- if prefix/suffix are not specified, defaults are used (see method arguments).
- if suffix doesn't end with ".bin" or "\*", "\*.bin" is added (since, otherwise, no matches can be found).
Args:
dir_path: (Optional) Directory path containing weather files.
prefix: (Optional) Weather file name prefix, usually a fixed string like "dtk\_".
suffix: (Optional) Weather file name suffix, usually containing a weather variable name parameter like "\*{tag}\*.bin").
weather_names: (Optional) Dictionary of weather variables (keys) and custom weather variable names (values).
weather_variables: (Optional) Weather variables to be used in case custom weather names are not specified.
In this case lowercase weather variable names are used, for example: AIR_TEMPERATURE -> air_temperature.
Returns:
Dictionary of weather variables (keys) and weather file names.
For example, WeatherVariable.AIR_TEMPERATURE\: "dtk_15arcmin_air_temperature_daily.bin"
"""
assert dir_path is not None, f"Directory path cannot be None."
templates = cls._make_file_templates(prefix=prefix,
suffix=suffix,
weather_names=weather_names,
weather_variables=weather_variables)
names = {}
# Use name patterns to pick up files via Path.glob().
for v, pattern in templates.items():
files = list(Path(dir_path).glob(pattern))
assert len(files) < 2, f"More than one weather file matches name pattern {pattern}"
if len(files) == 1:
names[v] = files[0].name
return names
def _weather_file_path(self, file_name: Union[str, Path]) -> Path:
"""Construct a weather file path."""
return Path(self.dir_path).joinpath(str(file_name))
[docs]
def validate(self) -> NoReturn:
"""Validate WeatherSet object."""
series_len0: Union[int, None] = None
node_count0: Union[int, None] = None
if_reference0: Union[str, None] = None
resolution0: Union[str, None] = None
years0: Union[str, None] = None
for v, wd in self._weather_dict.items():
wm = wd.metadata
# Validate each weather data and metadata object
wd.validate()
wd.metadata.validate()
# Validate weather objects consistency
series_len = wm.series_len
node_count = wm.node_count
if_reference = wm.id_reference
resolution = wm.spatial_resolution
years = wm.data_years
# total_values = wm.total_value_count
series_len0 = series_len0 or series_len
node_count0 = node_count0 or node_count
if_reference0 = if_reference0 or if_reference
resolution0 = resolution0 or resolution
years0 = years0 or years
# total_values0 = total_values0 or total_values
file_name = f": {self.file_names[v]}(.json)" if v in self.file_names else ""
msg = "WeatherSet {} mismatch for " + str(v) + file_name
assert series_len0 == series_len, msg.format("series_len")
assert node_count0 == node_count, msg.format("node_count")
assert if_reference0 == if_reference, msg.format("if_reference")
assert resolution0 == resolution, msg.format("resolution")
assert years0 == years, msg.format("data years")
# assert total_values0 == total_values, msg.format("total_values")
# Validate that if weather columns are specified they match weather set variables.
if len(self._weather_columns) > 0:
for v in WeatherVariable.list():
both_has = v in self._weather_dict and v in self._weather_columns
none_has = v not in self._weather_dict and v not in self._weather_columns
assert both_has or none_has, ""