Source code for ai4water.utils.visualizations

__all__ = ["Plot", "murphy_diagram", "edf_plot", "fdc_plot"]

from typing import Union, Callable

from ai4water.backend import os, np, pd, plt, plotly
from ai4water.backend import easy_mpl as em

from .plotting_tools import save_or_show, to_1d_array


class Plot(object):

    def __init__(self, path=None, backend='plotly', save=True, dpi=300):
        self.path = path
        self.backend = backend
        self.save = save
        self.dpi = dpi

    @property
    def backend(self):
        return self._backend

    @backend.setter
    def backend(self, x):

        _backend = x
        assert x in ['plotly', 'matplotlib'], f"unknown backend {x}. Allowed values are `plotly` and `matplotlib`"

        if x == 'plotly':
            if plotly is None:
                _backend = 'matplotlib'

        self._backend = _backend

    @property
    def path(self):
        return self._path

    @path.setter
    def path(self, x):
        if x is None:
            x = os.getcwd()
        self._path = x

    def save_or_show(self, save: bool = None, fname=None, where='', dpi=None,
                     bbox_inches='tight',
                     close=True, show=False):

        if save is None:
            save = self.save

        if dpi is None:
            dpi = self.dpi

        return save_or_show(self.path, save, fname, where, dpi, bbox_inches, close,
                            show=show)


def linear_model(
        model_name: str,
        inputs,
        target
):
    import sklearn
    from ai4water.backend import get_attributes

    models = get_attributes(sklearn, "linear_model", case_sensitive=True)
    if model_name not in models:
        raise ValueError(f"Can not find {model_name} in sklearn.linear_model")
    model = models[model_name]
    reg = model().fit(inputs, target)

    return reg.predict(inputs)


[docs]def murphy_diagram(
        observed: Union[list, np.ndarray, pd.Series, pd.DataFrame],
        predicted: Union[list, np.ndarray, pd.Series, pd.DataFrame],
        reference: Union[list, np.ndarray, pd.Series, pd.DataFrame] = None,
        reference_model: Union[str, Callable] = None,
        inputs=None,
        plot_type: str = "scores",
        xaxis: str = "theta",
        ax: plt.Axes = None,
        line_colors: tuple = None,
        fill_color: str = "lightgray",
        show: bool = True
) -> plt.Axes:
    """Murphy diagram as introducted by Ehm_ et al., 2015
     and illustrated by Rob Hyndman_

    Arguments:
        observed:
            observed or true values
        predicted:
             model's prediction
        reference:
             reference prediction
        reference_model:
             The model for reference prediction. Only relevent if `reference` is
             None and `plot_type` is `diff`. It can be callable or a string. If it is a
             string, then it can be any model name from sklearn.linear_model_
        inputs:
             inputs for reference model. Only relevent if `reference_model` is not
             None and `plot_type` is `diff`
        plot_type:
             either of `scores` or `diff`
        xaxis:
             either of `theta` or `time`
        ax:
             the axis to use for plotting
        line_colors:
             colors of line
        fill_color:
             color to fill confidence interval
        show:
             whether to show the plot or not

    Returns:
         matplotlib axes

    Example:
        >>> import numpy as np
        >>> from ai4water.utils.visualizations import murphy_diagram
        >>> yy = np.random.randint(1, 1000, 100)
        >>> ff1 = np.random.randint(1, 1000, 100)
        >>> ff2 = np.random.randint(1, 1000, 100)
        >>> murphy_diagram(yy, ff1, ff2)
        ...
        >>> murphy_diagram(yy, ff1, ff2, plot_type="diff")

    .. _Ehm:
        https://arxiv.org/pdf/1503.08195.pdf

    .. _Hyndman:
        https://robjhyndman.com/hyndsight/murphy-diagrams/

    .. _sklearn.linear_model:
        https://scikit-learn.org/stable/modules/classes.html#module-sklearn.linear_model
    """
    assert plot_type in ("scores", "diff")
    assert xaxis in ("theta", "time")

    y = to_1d_array(observed)
    f1 = to_1d_array(predicted)

    if reference is None:
        if plot_type == "diff":
            assert reference_model is not None
            if callable(reference_model):
                reference = reference_model(inputs)
            else:
                assert inputs is not None, f"You must specify the inputs for {reference_model}"
                reference = linear_model(reference_model, inputs, predicted)
            f2 = to_1d_array(reference)
        else:
            f2 = None
    else:
        f2 = to_1d_array(reference)

    line_colors = line_colors or ["dimgrey", "tab:orange"]

    n = len(y)
    _min, _max = np.nanmin(np.hstack([y, f1, f2])), np.nanmax(np.hstack([y, f1, f2]))
    tmp = _min - 0.2 * (_max - _min), _max + 0.2 * (_max - _min)

    theta = np.linspace(tmp[0], tmp[1], 501)

    s1 = np.full((501, n), np.nan)
    s2 = np.full((501, n), np.nan)

    max1 = np.maximum(f1, y)
    max2 = np.maximum(f2, y)

    min1 = np.minimum(f1, y)
    min2 = np.minimum(f2, y)

    for j in range(n):
        s1[:, j] = abs(y[j] - theta) * (max1[j] > theta) * (min1[j] <= theta)
        s2[:, j] = abs(y[j] - theta) * (max2[j] > theta) * (min2[j] <= theta)

    # grab the axes
    if ax is None:
        ax = plt.gca()

    if xaxis == "theta":
        s1ave, s2ave = _data_for_theta(s1, s2)
    else:
        raise NotImplementedError

    if plot_type == "scores":
        _plot_scores(theta, s1ave, s2ave, ax, line_colors)
        ax.set_ylabel("Empirical Scores", fontsize=16)
    else:
        _plot_diff(theta, s1, s2, n, ax, line_colors[0], fill_color)
        ax.set_ylabel("Difference in scores", fontsize=16)

    ax.set_xlabel(xaxis, fontsize=16)
    ax.set_title("Murphy Diagram", fontsize=16)

    if show:
        plt.show()

    return ax


def last_nonzero(arr, axis, invalid_val=-1):
    mask = arr != 0
    val = arr.shape[axis] - np.flip(mask, axis=axis).argmax(axis=axis) - 1
    return np.where(mask.any(axis=axis), val, invalid_val)


def _plot_diff(theta, s1, s2, n, ax, line_color="black", fill_color="lightgray"):
    se = np.std(s1 - s2) / np.sqrt(n)

    diff = np.mean(s1 - s2, axis=1)

    upper = diff + 1.96 * se
    lower = diff - 1.96 * se

    ax.plot(theta, diff, color=line_color)

    # first_nonzero occurence
    st = (diff != 0).argmax(axis=0)
    en = last_nonzero(diff, axis=0).item()

    ax.fill_between(theta[st:en], upper[st:en], lower[st:en],  # alpha=0.2,
                    color=fill_color)

    return ax


[docs]def fdc_plot(
        sim: Union[list, np.ndarray, pd.Series, pd.DataFrame],
        obs: Union[list, np.ndarray, pd.Series, pd.DataFrame],
        ax: plt.Axes = None,
        legend: bool = True,
        xlabel: str = "Exceedence [%]",
        ylabel: str = "Flow",
        show: bool = True
) -> plt.Axes:
    """Plots flow duration curve

    Arguments:
        sim:
            simulated flow
        obs:
            observed flow
        ax:
            axis on which to plot
        legend:
            whether to apply legend or not
        xlabel:
            label to set on x-axis. set to None for no x-label
        ylabel:
            label to set on y-axis
        show:
            whether to show the plot or not

    Returns:
        matplotlib axes

    Example:
        >>> import numpy as np
        >>> import matplotlib.pyplot as plt
        >>> from ai4water.utils.visualizations import fdc_plot
        >>> simulated = np.random.random(100)
        >>> observed = np.random.random(100)
        >>> fdc_plot(simulated, observed)
        >>> plt.show()
    """

    sim = to_1d_array(sim)
    obs = to_1d_array(obs)

    sort_obs = np.sort(sim)[::-1]
    exceedence_obs = np.arange(1., len(sort_obs) + 1) / len(sort_obs)
    sort_sim = np.sort(obs)[::-1]
    exceedence_sim = np.arange(1., len(sort_sim) + 1) / len(sort_sim)

    if ax is None:
        ax = plt.gca()

    ax.plot(exceedence_obs * 100, sort_obs, color='b', label="Observed")
    ax.plot(exceedence_sim * 100, sort_sim, color='r', label="Simulated")

    if legend:
        ax.legend()

    if xlabel is not None:
        ax.set_xlabel(xlabel)
    if ylabel is not None:
        ax.set_ylabel(ylabel)

    if show:
        plt.show()

    return ax


def _plot_scores(theta, s1ave, s2ave, ax, line_colors):
    ax.plot(theta, s1ave, color=line_colors[0])
    ax.plot(theta, s2ave, color=line_colors[1])

    return ax


def _data_for_time(s1, s2):
    s1ave, s2ave = np.mean(s1, axis=0), np.mean(s2, axis=0)

    return s1ave, s2ave


def _data_for_theta(s1, s2):
    return np.mean(s1, axis=1), np.mean(s2, axis=1)


def init_subplots(width=None, height=None, nrows=1, ncols=1, **kwargs):
    """Initializes the fig for subplots"""
    plt.close('all')
    fig, ax = plt.subplots(nrows=nrows, ncols=ncols, **kwargs)
    if width is not None:
        fig.set_figwidth(width)
    if height is not None:
        fig.set_figheight(height)
    return fig, ax


[docs]def edf_plot(
        y: np.ndarray,
        num_points: int = 100,
        xlabel="Objective Value",
        marker: str = '-',
        ax: plt.Axes = None,
        show:bool = True,
        **kwargs
) -> plt.Axes:
    """
    Plots the empirical distribution function.

    Parameters
    ----------
        y : np.ndarray
            array of values
        num_points : int
        xlabel : str
        marker : str
        ax : plt.Axes, optional
        show : bool, optional (default=True)
            whether to show the plot or not
        **kwargs :
            key word arguments for plot

    Returns
    -------
    plt.Axes

    """
    x = np.linspace(np.min(y), np.max(y), num_points)

    y_values = np.sum(y[:, np.newaxis] <= x, axis=0) / y.size

    y_values = y_values.reshape(-1, )

    if ax is None:
        _, ax = plt.subplots()

    ax.grid()

    ax_kws = dict(title="Empirical Distribution Function Plot",
        ylabel="Cumulative Probability",
        xlabel=xlabel)

    if 'ax_kws' in kwargs:
        ax_kws.update(ax_kws)
        kwargs.pop('ax_kws')

    ax = em.plot(
        x,
        y_values,
        marker,
        show=False,
        ax_kws=ax_kws,
        ax=ax,
        **kwargs
    )

    if show:
        plt.show()

    return ax