Source code for nueramic_mathml.visualize.ml_animation

from __future__ import annotations

import gc
from typing import Optional

import plotly.express as px
import plotly.graph_objs as go
import torch
from sklearn.manifold import TSNE

from .one_animation import standard_layout, COLOR3, COLOR5
from ..ml.classification import LogisticRegressionRBF
from ..ml.metrics import roc_curve_plot
from ..ml.regression import LinearRegression


[docs]def gen_classification_plot(x_tensor: torch.Tensor, y_true: torch.Tensor, model: Optional[torch.nn.Module] = None, threshold: float = 0.5, cnt_points: int = 1000, k: float = 0.1, title: Optional[str] = None, epsilon: float = 1e-4, insert_na: bool = False) -> go.Figure: """ Returns a graph with a distribution and an optional line. If dim(x) = 2, then you can get model. If dim(x) > 2, then returns graph of TSNE from sklearn.manifold with default settings. dim(x) is not support .. note:: if model os linear and have one layer, simple activation function, then visualization will faster .. warning:: if the model is heavy, then you should reduce cnt_points, but the probability of missing points is higher, and the visualization will be rather incorrect. You can increase the gap by increasing the epsilon. :param x_tensor: training tensor :param y_true: target tensor. array with true values of binary classification :param model: some model that returns a torch tensor with class 1 probabilities using the call: model(x) :param threshold: if model(xi) >= threshold, then yi = 1 :param cnt_points: number of points on each of the two axes when dim(x) = 2 :param k: constant for draw on section: [x.min() - (x.max() - x.min()) * k, x.max() + (x.max() - x.min()) * k] :param title: title of plots :param epsilon: contour line points: :math:`\\{x\\in \\mathbb{R}^2 \\, | \\, \\text{threshold} - \\text{epsilon} \\le \\text{model}(x) \\le \\text{threshold} + \\text{epsilon}\\}` :param insert_na: na insertion flag when two points too far away :return: scatter plot go.Figure .. code-block:: python3 >>> from sklearn.datasets import make_moons >>> torch.random.manual_seed(7) >>> x, y = make_moons(1000, noise=0.15, random_state=7) >>> x, y = torch.tensor(x), torch.tensor(y) >>> lr_rbf = LogisticRegressionRBF(x[:50]) >>> lr_rbf.fit(x, y, epochs=5000) >>> lr_rbf.metrics_tab(x, y) .. code-block:: python3 {'recall': 0.9980000257492065, 'precision': 0.9842209219932556, 'accuracy': 0.9909999966621399, 'f1': 0.9910625822119956, 'auc_roc': 0.9995800006320514} .. code-block:: python3 >>> gen_classification_plot(x, y, model, threshold=0.5, epsilon=0.001) """ colors = list(map(lambda e: str(int(e)), y_true)) if x_tensor.shape[1] < 2: raise AssertionError('x.shape[1] must be >= 2') elif x_tensor.shape[1] == 2: title = '<b>Initial Distribution</b>' if title is None else f'<b>{title}</b>' fig = px.scatter(x=x_tensor[:, 0], y=x_tensor[:, 1], title=title, color=colors) if model is not None: dx = x_tensor[:, 0].max() - x_tensor[:, 0].min() dy = x_tensor[:, 1].max() - x_tensor[:, 1].min() x1 = torch.tensor([x_tensor[:, 0].min() - dx * k, x_tensor[:, 1].min() - dy * k]) x2 = torch.tensor([x_tensor[:, 0].max() + dx * k, x_tensor[:, 1].max() + dy * k]) flag, grid = _make_line_linear((x1[0], x2[0]), model, threshold) if flag: mask = ( (grid[:, 0] >= x1[0] + dx * k) & (grid[:, 0] <= x2[0] - dx * k) & (grid[:, 1] >= x1[1] + dy * k) & (grid[:, 1] <= x2[1] - dx * k) ) grid = grid[mask, :] else: grid = _make_line(x1, x2, model, threshold, cnt_points, epsilon, insert_na) line_x, line_y = grid.detach().cpu().T fig.add_scatter(x=line_x, y=line_y, name='sep plane', mode='lines') else: title = '<b>TSNE of Distribution</b>' if title is None else title tsne_x = TSNE().fit_transform(x_tensor) fig = px.scatter(x=tsne_x[:, 0], y=tsne_x[:, 1], title=title, color=colors) fig.update_layout(**standard_layout) fig.update_layout({'xaxis_title': r'<b>x1</b>', 'yaxis_title': r'<b>x2</b>'}) gc.collect() return fig
def _sort_points(line: torch.Tensor, epsilon: float = 1e-3, metric: int = 2, insert_na: bool = True) -> torch.Tensor: """ Returns tensor sorted by closeness between each other. if || lines[i] - closest{lines[j]} ||_metric > epsilon insert [nan, nan] :param line: tensor n x 2 :param epsilon: maximum closeness :param metric: l1, l2, or some other metric :param insert_na: na insertion flag :return: sorted tensor line with probably added nan values """ copy_line = [line[0, :]] mask = torch.tile(torch.tensor([True]), line.shape[:1]) mask[0] = False for i in range(line.shape[0] - 1): distances = torch.norm(line - copy_line[-1], p=metric, dim=1) distances[torch.logical_not(mask)] = torch.inf min_d, argmin_d = distances.min(), distances.argmin() if min_d <= epsilon ** 0.3 or insert_na is False: copy_line.append(line[[argmin_d]]) else: copy_line.append(torch.tensor([torch.nan, torch.nan])) copy_line.append(line[[argmin_d]]) mask[argmin_d] = False line = torch.zeros(len(copy_line), 2) for i in range(line.shape[0]): line[i, :] = copy_line[i] return line roc_curve_plot = roc_curve_plot def _make_line(x1: torch.Tensor, x2: torch.Tensor, model: torch.nn.Module, threshold: float = 0.5, cnt_points: int = 25, epsilon: float = 1e-3, insert_na: bool = True) -> torch.Tensor: """ Returns x in [x1, x2] : threshold - epsilon <= model(x) <= threshold + epsilon :param x1: 2-dim tensor start :param x2: 2-dim tensor end :param model: some model that returns a torch tensor with class 1 probabilities using the call: model(x) :param threshold: if model(xi) >= threshold, then yi = 1 :param cnt_points: number of points on each of the two axes :param epsilon: contour line points: :math:`\\{x\\in \\mathbb{R}^2 \\, | \\, \\text{threshold} - \\text{epsilon} \\le \\text{model}(x) \\le \\text{threshold} + \\text{epsilon}\\}` :param insert_na: na insertion flag :return: scatter plot go.Figure """ if torch.isnan(x1[0]) or torch.isnan(x1[1]) or torch.isnan(x2[0]) or torch.isnan(x2[1]): return torch.tensor([[torch.nan, torch.nan]]) lin_settings_1 = (min(x1[0], x2[0]), max(x1[0], x2[0]), cnt_points) lin_settings_2 = (min(x1[1], x2[1]), max(x1[1], x2[1]), cnt_points) grid = torch.cartesian_prod(torch.linspace(*lin_settings_1), torch.linspace(*lin_settings_2)) with torch.no_grad(): grid_pred = model(grid) mask = (threshold - epsilon <= grid_pred) & (grid_pred <= threshold + epsilon) if sum(mask) > 0: if sum(mask) > 1000: grid = grid[mask.flatten(), :] grid = grid[torch.linspace(0, grid.shape[0], 1000, dtype=torch.int64), :] else: grid = grid[mask.flatten(), :] grid = _sort_points(grid, epsilon=epsilon, insert_na=insert_na) else: grid = torch.tensor([torch.nan, torch.nan]) return grid def _make_line_linear(bounds_x: tuple[float, float], model: torch.nn.Module, threshold: float = 0.5) -> [bool, tuple | None]: """ Returns for a linear model or a linear model with a sigmoid activation line on the plane :param bounds_x: bounds for x. tuple with two numbers :param model: linear model. e.g. SVM, Sigmoid :param threshold: :return: """ # Check linear model if len(list(model.parameters())) > 2: return False, None try: w, b = model.parameters() w, b = w.flatten(), b.flatten() except Exception as e: print(e, 'non-linear model. is used basic _make_line') return False, None if len(w) > 2: return False, None x = torch.linspace(*bounds_x, 100) try: if hasattr(model, 'Sigmoid'): y = (torch.log(torch.tensor(threshold / (1 - threshold))) - b - w[0] * x) / w[1] else: y = (threshold - b - w[0] * x) / w[1] except Exception as e: print(e, 'non-linear model. is used basic _make_line') return False, None return True, torch.stack([x, y]).T
[docs]def gen_regression_plot(x_tensor: torch.Tensor, y_tensor: torch.Tensor, model: Optional[torch.nn.Module] = None, title: Optional[str] = '<b>Scatter plot</b>') -> go.Figure: """ Returns a graph with a regression and scatter of initial distribution. .. note:: Support 1d x_tensor. If x_tensor n_d method applied t-SNE :param x_tensor: training tensor :param y_tensor: target tensor. array with true regression values :param model: some model that returns a torch tensor with class 1 probabilities using the call: model(x) :param title: title of plots :return: scatter plot go.Figure and line of regression .. code-block:: python3 >>> from sklearn.datasets import make_regression >>> x, y = make_regression(200, 1, noise=20, random_state=21) >>> x, y = torch.tensor(x), torch.tensor(y) >>> regression = LinearRegression().fit(x, y) >>> gen_regression_plot(x, y, regression) """ flag_tsne = False y_axis = False y_tensor = y_tensor.flatten() if model is not None: y_axis = model(x_tensor.float()).flatten().detach().numpy() if len(x_tensor.shape) > 1: if x_tensor.shape[1] > 1: tsne = TSNE(1, init='random', learning_rate=200, random_state=21) x_tensor = torch.tensor(tsne.fit_transform(x_tensor.detach().numpy())) flag_tsne = True print('x_tensor is not 1d. TSNE applied') x_tensor = x_tensor.flatten() p_size = 8 if x_tensor.shape[0] < 10 else 6 if x_tensor.shape[0] < 50 else 4 dist = go.Scatter(x=x_tensor, y=y_tensor, name='initial values', mode='markers', marker={'size': p_size, 'color': COLOR3}) data = [dist] if model is not None: x_tensor, indices = torch.sort(x_tensor) y_axis = y_axis[indices] line = go.Scatter(x=x_tensor, y=y_axis, name='predictions', mode='lines', line={'width': 2.5, 'color': COLOR5}) data.append(line) if flag_tsne: title = f'<b>{title}</b>' + '<b> after t-SNE</b>' else: title = f'<b>{title}</b>' fig = go.Figure(data=data) fig.update_layout(**standard_layout) fig.update_layout(title={'text': title, 'font': {'size': 24}}) return fig