Source code for nueramic_mathml.visualize.ml_animation

from __future__ import annotations

import gc
from typing import Optional

import plotly.express as px
import plotly.graph_objs as go
import torch
from sklearn.manifold import TSNE

from .one_animation import standard_layout, COLOR3, COLOR5
from ..ml.classification import LogisticRegressionRBF
from ..ml.metrics import roc_curve_plot
from ..ml.regression import LinearRegression


[docs]def gen_classification_plot(x_tensor: torch.Tensor,
                            y_true: torch.Tensor,
                            model: Optional[torch.nn.Module] = None,
                            threshold: float = 0.5,
                            cnt_points: int = 1000,
                            k: float = 0.1,
                            title: Optional[str] = None,
                            epsilon: float = 1e-4,
                            insert_na: bool = False) -> go.Figure:
    """
    Returns a graph with a distribution and an optional line. If dim(x) = 2, then you can get model. If dim(x) > 2,
    then returns graph of TSNE from sklearn.manifold with default settings. dim(x) is not support

    .. note::

        if model os linear and have one layer, simple activation function, then visualization will faster

    .. warning::

        if the model is heavy, then you should reduce cnt_points, but the probability of missing points is higher,
        and the visualization will be rather incorrect. You can increase the gap by increasing the epsilon.

    :param x_tensor: training tensor
    :param y_true: target tensor. array with true values of binary classification
    :param model: some model that returns a torch tensor with class 1 probabilities using the call: model(x)
    :param threshold: if model(xi) >= threshold, then yi = 1
    :param cnt_points: number of points on each of the two axes when dim(x) = 2
    :param k: constant for draw on section: [x.min() - (x.max() - x.min()) * k, x.max() + (x.max() - x.min()) * k]
    :param title: title of plots
    :param epsilon: contour line points: :math:`\\{x\\in \\mathbb{R}^2 \\, | \\,
                \\text{threshold} - \\text{epsilon} \\le \\text{model}(x) \\le \\text{threshold} + \\text{epsilon}\\}`
    :param insert_na: na insertion flag when two points too far away
    :return: scatter plot go.Figure

    .. code-block:: python3

        >>> from sklearn.datasets import make_moons
        >>> torch.random.manual_seed(7)
        >>> x, y = make_moons(1000, noise=0.15, random_state=7)
        >>> x, y = torch.tensor(x), torch.tensor(y)

        >>> lr_rbf = LogisticRegressionRBF(x[:50])
        >>> lr_rbf.fit(x, y, epochs=5000)

        >>> lr_rbf.metrics_tab(x, y)

    .. code-block:: python3

        {'recall': 0.9980000257492065,
         'precision': 0.9842209219932556,
         'accuracy': 0.9909999966621399,
         'f1': 0.9910625822119956,
         'auc_roc': 0.9995800006320514}

    .. code-block:: python3

        >>> gen_classification_plot(x, y, model, threshold=0.5, epsilon=0.001)

    """
    colors = list(map(lambda e: str(int(e)), y_true))

    if x_tensor.shape[1] < 2:
        raise AssertionError('x.shape[1] must be >= 2')

    elif x_tensor.shape[1] == 2:
        title = '<b>Initial Distribution</b>' if title is None else f'<b>{title}</b>'
        fig = px.scatter(x=x_tensor[:, 0], y=x_tensor[:, 1], title=title, color=colors)

        if model is not None:
            dx = x_tensor[:, 0].max() - x_tensor[:, 0].min()
            dy = x_tensor[:, 1].max() - x_tensor[:, 1].min()

            x1 = torch.tensor([x_tensor[:, 0].min() - dx * k,
                               x_tensor[:, 1].min() - dy * k])

            x2 = torch.tensor([x_tensor[:, 0].max() + dx * k,
                               x_tensor[:, 1].max() + dy * k])

            flag, grid = _make_line_linear((x1[0], x2[0]), model, threshold)

            if flag:
                mask = (
                        (grid[:, 0] >= x1[0] + dx * k) &
                        (grid[:, 0] <= x2[0] - dx * k) &
                        (grid[:, 1] >= x1[1] + dy * k) &
                        (grid[:, 1] <= x2[1] - dx * k)
                )

                grid = grid[mask, :]
            else:

                grid = _make_line(x1, x2, model, threshold, cnt_points, epsilon, insert_na)

            line_x, line_y = grid.detach().cpu().T

            fig.add_scatter(x=line_x, y=line_y, name='sep plane', mode='lines')

    else:
        title = '<b>TSNE of Distribution</b>' if title is None else title
        tsne_x = TSNE().fit_transform(x_tensor)
        fig = px.scatter(x=tsne_x[:, 0], y=tsne_x[:, 1], title=title, color=colors)

    fig.update_layout(**standard_layout)
    fig.update_layout({'xaxis_title': r'<b>x1</b>', 'yaxis_title': r'<b>x2</b>'})
    gc.collect()
    return fig


def _sort_points(line: torch.Tensor, epsilon: float = 1e-3, metric: int = 2, insert_na: bool = True) -> torch.Tensor:
    """
    Returns tensor sorted by closeness between each other. if || lines[i] - closest{lines[j]} ||_metric > epsilon
    insert [nan, nan]

    :param line: tensor n x 2
    :param epsilon: maximum closeness
    :param metric: l1, l2, or some other metric
    :param insert_na: na insertion flag
    :return: sorted tensor line with probably added nan values
    """

    copy_line = [line[0, :]]
    mask = torch.tile(torch.tensor([True]), line.shape[:1])
    mask[0] = False
    for i in range(line.shape[0] - 1):
        distances = torch.norm(line - copy_line[-1], p=metric, dim=1)
        distances[torch.logical_not(mask)] = torch.inf

        min_d, argmin_d = distances.min(), distances.argmin()
        if min_d <= epsilon ** 0.3 or insert_na is False:
            copy_line.append(line[[argmin_d]])
        else:
            copy_line.append(torch.tensor([torch.nan, torch.nan]))
            copy_line.append(line[[argmin_d]])

        mask[argmin_d] = False

    line = torch.zeros(len(copy_line), 2)
    for i in range(line.shape[0]):
        line[i, :] = copy_line[i]
    return line


roc_curve_plot = roc_curve_plot


def _make_line(x1: torch.Tensor, x2: torch.Tensor, model: torch.nn.Module, threshold: float = 0.5,
               cnt_points: int = 25, epsilon: float = 1e-3, insert_na: bool = True) -> torch.Tensor:
    """
    Returns x in [x1, x2] : threshold - epsilon <= model(x) <= threshold + epsilon

    :param x1: 2-dim tensor start
    :param x2: 2-dim tensor end
    :param model: some model that returns a torch tensor with class 1 probabilities using the call: model(x)
    :param threshold: if model(xi) >= threshold, then yi = 1
    :param cnt_points: number of points on each of the two axes
    :param epsilon: contour line points: :math:`\\{x\\in \\mathbb{R}^2 \\, | \\,
                \\text{threshold} - \\text{epsilon} \\le \\text{model}(x) \\le \\text{threshold} + \\text{epsilon}\\}`
    :param insert_na: na insertion flag
    :return: scatter plot go.Figure
    """
    if torch.isnan(x1[0]) or torch.isnan(x1[1]) or torch.isnan(x2[0]) or torch.isnan(x2[1]):
        return torch.tensor([[torch.nan, torch.nan]])

    lin_settings_1 = (min(x1[0], x2[0]), max(x1[0], x2[0]), cnt_points)
    lin_settings_2 = (min(x1[1], x2[1]), max(x1[1], x2[1]), cnt_points)

    grid = torch.cartesian_prod(torch.linspace(*lin_settings_1), torch.linspace(*lin_settings_2))

    with torch.no_grad():
        grid_pred = model(grid)

    mask = (threshold - epsilon <= grid_pred) & (grid_pred <= threshold + epsilon)
    if sum(mask) > 0:
        if sum(mask) > 1000:
            grid = grid[mask.flatten(), :]
            grid = grid[torch.linspace(0, grid.shape[0], 1000, dtype=torch.int64), :]
        else:
            grid = grid[mask.flatten(), :]
        grid = _sort_points(grid, epsilon=epsilon, insert_na=insert_na)
    else:
        grid = torch.tensor([torch.nan, torch.nan])
    return grid


def _make_line_linear(bounds_x: tuple[float, float],
                      model: torch.nn.Module,
                      threshold: float = 0.5) -> [bool, tuple | None]:
    """
    Returns for a linear model or a linear model with a sigmoid activation line on the plane

    :param bounds_x: bounds for x. tuple with two numbers
    :param model: linear model. e.g. SVM, Sigmoid
    :param threshold:
    :return:
    """
    # Check linear model
    if len(list(model.parameters())) > 2:
        return False, None

    try:
        w, b = model.parameters()
        w, b = w.flatten(), b.flatten()
    except Exception as e:
        print(e, 'non-linear model. is used basic _make_line')
        return False, None

    if len(w) > 2:
        return False, None

    x = torch.linspace(*bounds_x, 100)

    try:
        if hasattr(model, 'Sigmoid'):
            y = (torch.log(torch.tensor(threshold / (1 - threshold))) - b - w[0] * x) / w[1]

        else:
            y = (threshold - b - w[0] * x) / w[1]

    except Exception as e:
        print(e, 'non-linear model. is used basic _make_line')
        return False, None

    return True, torch.stack([x, y]).T


[docs]def gen_regression_plot(x_tensor: torch.Tensor,
                        y_tensor: torch.Tensor,
                        model: Optional[torch.nn.Module] = None,
                        title: Optional[str] = '<b>Scatter plot</b>') -> go.Figure:
    """
    Returns a graph with a regression and scatter of initial distribution.

    .. note::
        Support 1d x_tensor. If x_tensor n_d method applied t-SNE

    :param x_tensor: training tensor
    :param y_tensor: target tensor. array with true regression values
    :param model: some model that returns a torch tensor with class 1 probabilities using the call: model(x)
    :param title: title of plots
    :return: scatter plot go.Figure and line of regression

    .. code-block:: python3

        >>> from sklearn.datasets import make_regression
        >>> x, y = make_regression(200, 1, noise=20, random_state=21)
        >>> x, y = torch.tensor(x), torch.tensor(y)
        >>> regression = LinearRegression().fit(x, y)
        >>> gen_regression_plot(x, y, regression)

    """
    flag_tsne = False
    y_axis = False
    y_tensor = y_tensor.flatten()

    if model is not None:
        y_axis = model(x_tensor.float()).flatten().detach().numpy()

    if len(x_tensor.shape) > 1:
        if x_tensor.shape[1] > 1:
            tsne = TSNE(1, init='random', learning_rate=200, random_state=21)
            x_tensor = torch.tensor(tsne.fit_transform(x_tensor.detach().numpy()))
            flag_tsne = True
            print('x_tensor is not 1d. TSNE applied')

    x_tensor = x_tensor.flatten()
    p_size = 8 if x_tensor.shape[0] < 10 else 6 if x_tensor.shape[0] < 50 else 4
    dist = go.Scatter(x=x_tensor, y=y_tensor, name='initial values', mode='markers', marker={'size': p_size, 'color': COLOR3})
    data = [dist]
    if model is not None:
        x_tensor, indices = torch.sort(x_tensor)
        y_axis = y_axis[indices]
        line = go.Scatter(x=x_tensor, y=y_axis, name='predictions', mode='lines', line={'width': 2.5, 'color': COLOR5})
        data.append(line)

    if flag_tsne:
        title = f'<b>{title}</b>' + '<b> after t-SNE</b>'
    else:
        title = f'<b>{title}</b>'

    fig = go.Figure(data=data)
    fig.update_layout(**standard_layout)
    fig.update_layout(title={'text': title, 'font': {'size': 24}})

    return fig