Source code for ai4water.utils.visualizations

__all__ = ["Plot", "murphy_diagram", "edf_plot", "fdc_plot"]

from typing import Union, Callable

from ai4water.backend import os, np, pd, plt, plotly
from ai4water.backend import easy_mpl as em

from .plotting_tools import save_or_show, to_1d_array


class Plot(object):

    def __init__(self, path=None, backend='plotly', save=True, dpi=300):
        self.path = path
        self.backend = backend
        self.save = save
        self.dpi = dpi

    @property
    def backend(self):
        return self._backend

    @backend.setter
    def backend(self, x):

        _backend = x
        assert x in ['plotly', 'matplotlib'], f"unknown backend {x}. Allowed values are `plotly` and `matplotlib`"

        if x == 'plotly':
            if plotly is None:
                _backend = 'matplotlib'

        self._backend = _backend

    @property
    def path(self):
        return self._path

    @path.setter
    def path(self, x):
        if x is None:
            x = os.getcwd()
        self._path = x

    def save_or_show(self, save: bool = None, fname=None, where='', dpi=None,
                     bbox_inches='tight',
                     close=True, show=False):

        if save is None:
            save = self.save

        if dpi is None:
            dpi = self.dpi

        return save_or_show(self.path, save, fname, where, dpi, bbox_inches, close,
                            show=show)


def linear_model(
        model_name: str,
        inputs,
        target
):
    import sklearn
    from ai4water.backend import get_attributes

    models = get_attributes(sklearn, "linear_model", case_sensitive=True)
    if model_name not in models:
        raise ValueError(f"Can not find {model_name} in sklearn.linear_model")
    model = models[model_name]
    reg = model().fit(inputs, target)

    return reg.predict(inputs)


[docs]def murphy_diagram( observed: Union[list, np.ndarray, pd.Series, pd.DataFrame], predicted: Union[list, np.ndarray, pd.Series, pd.DataFrame], reference: Union[list, np.ndarray, pd.Series, pd.DataFrame] = None, reference_model: Union[str, Callable] = None, inputs=None, plot_type: str = "scores", xaxis: str = "theta", ax: plt.Axes = None, line_colors: tuple = None, fill_color: str = "lightgray", show: bool = True ) -> plt.Axes: """Murphy diagram as introducted by Ehm_ et al., 2015 and illustrated by Rob Hyndman_ Arguments: observed: observed or true values predicted: model's prediction reference: reference prediction reference_model: The model for reference prediction. Only relevent if `reference` is None and `plot_type` is `diff`. It can be callable or a string. If it is a string, then it can be any model name from sklearn.linear_model_ inputs: inputs for reference model. Only relevent if `reference_model` is not None and `plot_type` is `diff` plot_type: either of `scores` or `diff` xaxis: either of `theta` or `time` ax: the axis to use for plotting line_colors: colors of line fill_color: color to fill confidence interval show: whether to show the plot or not Returns: matplotlib axes Example: >>> import numpy as np >>> from ai4water.utils.visualizations import murphy_diagram >>> yy = np.random.randint(1, 1000, 100) >>> ff1 = np.random.randint(1, 1000, 100) >>> ff2 = np.random.randint(1, 1000, 100) >>> murphy_diagram(yy, ff1, ff2) ... >>> murphy_diagram(yy, ff1, ff2, plot_type="diff") .. _Ehm: https://arxiv.org/pdf/1503.08195.pdf .. _Hyndman: https://robjhyndman.com/hyndsight/murphy-diagrams/ .. _sklearn.linear_model: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.linear_model """ assert plot_type in ("scores", "diff") assert xaxis in ("theta", "time") y = to_1d_array(observed) f1 = to_1d_array(predicted) if reference is None: if plot_type == "diff": assert reference_model is not None if callable(reference_model): reference = reference_model(inputs) else: assert inputs is not None, f"You must specify the inputs for {reference_model}" reference = linear_model(reference_model, inputs, predicted) f2 = to_1d_array(reference) else: f2 = None else: f2 = to_1d_array(reference) line_colors = line_colors or ["dimgrey", "tab:orange"] n = len(y) _min, _max = np.nanmin(np.hstack([y, f1, f2])), np.nanmax(np.hstack([y, f1, f2])) tmp = _min - 0.2 * (_max - _min), _max + 0.2 * (_max - _min) theta = np.linspace(tmp[0], tmp[1], 501) s1 = np.full((501, n), np.nan) s2 = np.full((501, n), np.nan) max1 = np.maximum(f1, y) max2 = np.maximum(f2, y) min1 = np.minimum(f1, y) min2 = np.minimum(f2, y) for j in range(n): s1[:, j] = abs(y[j] - theta) * (max1[j] > theta) * (min1[j] <= theta) s2[:, j] = abs(y[j] - theta) * (max2[j] > theta) * (min2[j] <= theta) # grab the axes if ax is None: ax = plt.gca() if xaxis == "theta": s1ave, s2ave = _data_for_theta(s1, s2) else: raise NotImplementedError if plot_type == "scores": _plot_scores(theta, s1ave, s2ave, ax, line_colors) ax.set_ylabel("Empirical Scores", fontsize=16) else: _plot_diff(theta, s1, s2, n, ax, line_colors[0], fill_color) ax.set_ylabel("Difference in scores", fontsize=16) ax.set_xlabel(xaxis, fontsize=16) ax.set_title("Murphy Diagram", fontsize=16) if show: plt.show() return ax
def last_nonzero(arr, axis, invalid_val=-1): mask = arr != 0 val = arr.shape[axis] - np.flip(mask, axis=axis).argmax(axis=axis) - 1 return np.where(mask.any(axis=axis), val, invalid_val) def _plot_diff(theta, s1, s2, n, ax, line_color="black", fill_color="lightgray"): se = np.std(s1 - s2) / np.sqrt(n) diff = np.mean(s1 - s2, axis=1) upper = diff + 1.96 * se lower = diff - 1.96 * se ax.plot(theta, diff, color=line_color) # first_nonzero occurence st = (diff != 0).argmax(axis=0) en = last_nonzero(diff, axis=0).item() ax.fill_between(theta[st:en], upper[st:en], lower[st:en], # alpha=0.2, color=fill_color) return ax
[docs]def fdc_plot( sim: Union[list, np.ndarray, pd.Series, pd.DataFrame], obs: Union[list, np.ndarray, pd.Series, pd.DataFrame], ax: plt.Axes = None, legend: bool = True, xlabel: str = "Exceedence [%]", ylabel: str = "Flow", show: bool = True ) -> plt.Axes: """Plots flow duration curve Arguments: sim: simulated flow obs: observed flow ax: axis on which to plot legend: whether to apply legend or not xlabel: label to set on x-axis. set to None for no x-label ylabel: label to set on y-axis show: whether to show the plot or not Returns: matplotlib axes Example: >>> import numpy as np >>> import matplotlib.pyplot as plt >>> from ai4water.utils.visualizations import fdc_plot >>> simulated = np.random.random(100) >>> observed = np.random.random(100) >>> fdc_plot(simulated, observed) >>> plt.show() """ sim = to_1d_array(sim) obs = to_1d_array(obs) sort_obs = np.sort(sim)[::-1] exceedence_obs = np.arange(1., len(sort_obs) + 1) / len(sort_obs) sort_sim = np.sort(obs)[::-1] exceedence_sim = np.arange(1., len(sort_sim) + 1) / len(sort_sim) if ax is None: ax = plt.gca() ax.plot(exceedence_obs * 100, sort_obs, color='b', label="Observed") ax.plot(exceedence_sim * 100, sort_sim, color='r', label="Simulated") if legend: ax.legend() if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) if show: plt.show() return ax
def _plot_scores(theta, s1ave, s2ave, ax, line_colors): ax.plot(theta, s1ave, color=line_colors[0]) ax.plot(theta, s2ave, color=line_colors[1]) return ax def _data_for_time(s1, s2): s1ave, s2ave = np.mean(s1, axis=0), np.mean(s2, axis=0) return s1ave, s2ave def _data_for_theta(s1, s2): return np.mean(s1, axis=1), np.mean(s2, axis=1) def init_subplots(width=None, height=None, nrows=1, ncols=1, **kwargs): """Initializes the fig for subplots""" plt.close('all') fig, ax = plt.subplots(nrows=nrows, ncols=ncols, **kwargs) if width is not None: fig.set_figwidth(width) if height is not None: fig.set_figheight(height) return fig, ax
[docs]def edf_plot( y: np.ndarray, num_points: int = 100, xlabel="Objective Value", marker: str = '-', ax: plt.Axes = None, show:bool = True, **kwargs ) -> plt.Axes: """ Plots the empirical distribution function. Parameters ---------- y : np.ndarray array of values num_points : int xlabel : str marker : str ax : plt.Axes, optional show : bool, optional (default=True) whether to show the plot or not **kwargs : key word arguments for plot Returns ------- plt.Axes """ x = np.linspace(np.min(y), np.max(y), num_points) y_values = np.sum(y[:, np.newaxis] <= x, axis=0) / y.size y_values = y_values.reshape(-1, ) if ax is None: _, ax = plt.subplots() ax.grid() ax_kws = dict(title="Empirical Distribution Function Plot", ylabel="Cumulative Probability", xlabel=xlabel) if 'ax_kws' in kwargs: ax_kws.update(ax_kws) kwargs.pop('ax_kws') ax = em.plot( x, y_values, marker, show=False, ax_kws=ax_kws, ax=ax, **kwargs ) if show: plt.show() return ax