Source code for eta_ctrl.timeseries.scenarios

from __future__ import annotations

from typing import TYPE_CHECKING

import pandas as pd

from eta_ctrl import timeseries
from eta_ctrl.util.utils import timestep_to_timedelta

if TYPE_CHECKING:
    from collections.abc import Mapping
    from datetime import datetime

    import numpy as np

    from eta_ctrl.timeseries.scenario_manager import ConfigCsvScenario
    from eta_ctrl.util.type_annotations import TimeStep


[docs] def scenario_from_csv( scenario_configs: list[ConfigCsvScenario], *, start_time: datetime, end_time: datetime | None = None, total_time: TimeStep | None = None, random: np.random.Generator | bool | None = False, resample_time: TimeStep | None = None, prefix_renamed: bool = True, ) -> pd.DataFrame: """Import (possibly multiple) scenario data files from csv files and return them as a single pandas data frame. The import function supports column renaming and will slice and resample data as specified. :raises ValueError: If start and/or end times are outside the scope of the imported scenario files. .. note:: The ValueError will only be raised when this is true for all files. If only one file is outside the range, an empty series will be returned for that file. :param start_time: Starting time for the scenario import. :param end_time: Latest ending time for the scenario import (default: inferred from start_time and total_time). :param total_time: Total duration of the imported scenario. If given as int this will be interpreted as seconds (default: inferred from start_time and end_time). :param random: Set to true if a random starting point (within the interval determined by start_time and end_time) should be chosen. This will use the environments' random generator. :param resample_time: Resample the scenario data to the specified interval. If given as an int, this will be interpreted as seconds. If resample_time is None, it will be treated as 0 (default: None). :param prefix_renamed: Should prefixes be applied to renamed columns as well? When setting this to false make sure that all columns in all loaded scenario files have different names. Otherwise, there is a risk of overwriting data. :return: Imported and processed data as pandas.DataFrame. """ # Set defaults and convert values where necessary if total_time is not None: total_time = timestep_to_timedelta(total_time) # If resample_time is None, default to 0 resample_time = resample_time if resample_time is not None else 0 _resample_time = timestep_to_timedelta(resample_time) _random = random if random is not None else False slice_begin, slice_end = timeseries.find_time_slice( start_time, end_time, total_time=total_time, random=_random, round_to_interval=_resample_time, ) def import_scenario( scenario_config: ConfigCsvScenario, ) -> pd.DataFrame: data = timeseries.df_from_csv( scenario_config.abs_path, infer_datetime_from=scenario_config.infer_datetime_cols, time_conversion_str=scenario_config.time_conversion_str, ) data = timeseries.df_resample(data, _resample_time, missing_data=scenario_config.interpolation_method) data = data[slice_begin:slice_end].copy() # type: ignore[misc] col_names = {} for col in data.columns: col_names[col] = _fix_col_name( name=col, prefix=scenario_config.prefix, prefix_renamed=prefix_renamed, rename_cols=scenario_config.rename_cols, ) scaling = scenario_config.scale_factors if scaling is None: continue if col in scaling: data[col] = data[col].multiply(scaling[col]) # rename all columns with the name mapping determined above return data.rename(columns=col_names) scenario = pd.DataFrame() for scenario_config in scenario_configs: data = import_scenario(scenario_config=scenario_config) scenario = pd.concat((data, scenario), axis=1) # Make sure that the resulting file corresponds to the requested time slice if ( len(scenario) <= 0 or scenario.first_valid_index() > slice_begin + _resample_time or scenario.last_valid_index() < slice_end - _resample_time ): msg = ( "The loaded scenario file does not contain enough data for the entire selected time slice. Or the set " "scenario times do not correspond to the provided data." ) raise ValueError(msg) return scenario
def _fix_col_name( name: str, *, prefix: str | None = None, prefix_renamed: bool = False, rename_cols: Mapping[str, str] | None = None, ) -> str: """Figure out correct name for the column. :param name: Name to rename. :param prefix: Prefix to prepend to the name. :param prefix_renamed: Prepend prefix if name is renamed? :param rename_cols: Mapping of old names to new names. """ rename_cols = rename_cols if rename_cols is not None else {} # Keep the same name if no new name is provided new_name = str(rename_cols.get(name, name)) if prefix is None: return new_name # Prefix is given but should not be applied if name != new_name and not prefix_renamed: return new_name return f"{prefix}_{new_name}"