Source code for nueramic_mathml.multi_optimize

# Multi dimensional optimization algorithms for function optimization
from __future__ import annotations

from typing import Callable, Tuple, Sequence

import numpy
import pandas as pd
import torch

from .calculus import gradient, hessian, jacobian
from .one_optimize import brent
from .support import HistoryGD, update_history_gd, HiddenPrints, print_verbose


def initialize(function: Callable[[torch.Tensor], torch.Tensor],
               x0: torch.Tensor,
               epsilon: float = 1e-5,
               keep_history: bool = False) -> Tuple:
    """
    Returns initial x_k with double dtype, func_k, grad_k, round_precision, history

    :param function: callable that depends on the first positional argument
    :param x0: numpy ndarray which is initial approximation
    :param epsilon: optimization accuracy
    :param keep_history: flag of return history
    :return: tuple with x_k, func_k, grad_k, history, round_precision

    """
    x_k = x0.double().flatten()
    func_k = function(x_k)
    grad_k = gradient(function, x_k)
    round_precision = -int(numpy.log10(epsilon))  # variable to determine the rounding accuracy

    # if keep_history=True, we will save history. here is initial step
    if keep_history:
        history: HistoryGD = {
            'iteration': [0],
            'f_value': [func_k],
            'f_grad_norm': [grad_k.norm(2)],
            'x': [x_k.flatten()],
            'message': ''
        }

    else:
        history: HistoryGD = {'iteration': [], 'f_value': [], 'x': [], 'f_grad_norm': [], 'message': ''}

    return x_k, func_k, grad_k, history, round_precision


[docs]def bfgs(function: Callable[[torch.Tensor], torch.Tensor], x0: torch.Tensor, tolerance: float = 1e-8, max_iter: int = 500, verbose: bool = False, keep_history: bool = False) -> Tuple[torch.Tensor, HistoryGD]: """ Returns a tensor n x 1 with optimal point and history using the BFGS method [1]_ Broyden–Fletcher–Goldfarb–Shanno algorithm The algorithm does not use Wolfe conditions. Instead of wolfe, alg uses the optimal step. .. note:: The algorithm only works for a flat x0, and the functions should depend on a flat array .. rubric:: References .. [1] Wright and Nocedal, 'Numerical Optimization', 1999; pp.136-140 BFGS algorithm. :math:`\\rule{125mm}{0.2pt} \\\\` :param function: callable that depends on the first positional argument. Other arguments are passed through kwargs :param x0: start minimization point :param tolerance: criterion of stop os l2 norm(grad f) < tolerance :param max_iter: maximum number of iterations :param verbose: flag of printing iteration logs :param keep_history: flag of return history :return: tuple with point and history. .. rubric:: Examples .. code-block:: python3 >>> def func(x): return 10 * x[0] ** 2 + x[1] ** 2 / 5 >>> x_0 = torch.tensor([1, 2]) >>> solution = bfgs(func, x_0) >>> print(solution[0]) tensor([3.4372e-14, 1.8208e-14], dtype=torch.float64) """ # initialization x_k, func_k, grad_k, history, round_precision = initialize(function, x0, tolerance, keep_history) h_k = torch.eye(x_k.shape[0], dtype=torch.float64) * tolerance ** 0.1 grad_k = grad_k.reshape(-1, 1) x_k = x_k.reshape(-1, 1) s_k = x_k + 1 # first verbose print_verbose(x_k, func_k, verbose, 0, round_precision) try: for i in range(max_iter): # stop criterion if grad_k.norm(2) < tolerance or s_k.norm(2) < tolerance: history['message'] = 'Searching finished. Successfully. code 0' return x_k.reshape(-1), history p_k = -h_k @ grad_k with HiddenPrints(): def optimization_f(alpha: float | torch.Tensor) -> float | torch.Tensor: return function(x_k + alpha * p_k) alpha_k = brent(optimization_f, (0, 1))[0] # step x_k_plus1 = x_k + alpha_k * p_k grad_f_k_plus1 = gradient(function, x_k_plus1, delta_x=1e-1).reshape(-1, 1) s_k = x_k_plus1 - x_k y_k = grad_f_k_plus1 - grad_k h_k = calc_h_new(h_k, s_k, y_k) grad_k = grad_f_k_plus1 x_k = x_k_plus1 func_k = function(x_k.flatten()) # check divergence if torch.isnan(x_k).any(): history['message'] = f'The method has diverged. code 2' return x_k.flatten(), history # verbose print_verbose(x_k.flatten(), func_k, verbose, i + 1, round_precision) # history if keep_history: history = update_history_gd(history, values=[i + 1, func_k, grad_k.norm(2), x_k.flatten()]) else: history['message'] = 'Searching finished. Max iterations have been reached. code 1' except Exception as e: history['message'] = f'Optimization failed. {e}. code 2' return x_k.flatten(), history
def calc_h_new(h: torch.Tensor, s: torch.Tensor, y: torch.Tensor) -> torch.Tensor: """ Calculates a new approximation of the inverse Hessian matrix :param h: The previous approximation of the H matrix :param s: the difference x_{i+1} - x_{i} :param y: the difference f'_{i+1} - f'_{i} :return: The new approximation of inverse Hessian matrix """ ro = 1 / (y.T @ s) i = torch.eye(h.shape[0]).double() h_new = (i - ro * s @ y.T) @ h @ (i - ro * s @ y.T) + ro * s @ s.T return h_new
[docs]def gd_constant(function: Callable[[torch.Tensor], torch.Tensor], x0: torch.Tensor, epsilon: float = 1e-5, gamma: float = 0.1, max_iter: int = 500, verbose: bool = False, keep_history: bool = False) -> Tuple[torch.Tensor, HistoryGD]: """ Returns a tensor n x 1 with optimal point and history using Algorithm with constant step. The gradient of the function shows us the direction of increasing the function. The idea is to move in the opposite direction to :math:`\\displaystyle x_{k + 1} \\text{ where } f(x_{k + 1}) < f(x_{k}) \\text{ .}` But, if we add a gradient to :math:`\\displaystyle x_{k}` without changes, our method will often diverge. So we need to add a gradient with some weight :math:`\\displaystyle \\gamma \\text{ .}\\\\` :param function: callable that depends on the first positional argument :param x0: Torch tensor which is initial approximation :param epsilon: optimization accuracy :param gamma: gradient step :param max_iter: maximum number of iterations :param verbose: flag of printing iteration logs :param keep_history: flag of return history :return: tuple with point and history. .. rubric:: Examples .. code-block:: python3 >>> def func(x): return x[0] ** 2 + x[1] ** 2 >>> x_0 = torch.tensor([1, 2]) >>> solution = gd_constant(func, x_0) >>> print(solution[0]) tensor([1.9156e-06, 3.8312e-06], dtype=torch.float64) """ # initialization x_k, func_k, grad_k, history, round_precision = initialize(function, x0, epsilon, keep_history) # first verbose print_verbose(x_k, func_k, verbose, 0, round_precision) try: for i in range(max_iter - 1): if grad_k.norm(2) < epsilon: # comparing of norm 2 with optimization accuracy history['message'] = 'Optimization terminated successfully. code 0' break else: x_k = x_k - gamma * grad_k # updating the point for next iter and repeat grad_k = gradient(function, x_k) func_k = function(x_k) # verbose print_verbose(x_k, func_k, verbose, i + 1, round_precision) # history if keep_history: history = update_history_gd(history, values=[i + 1, func_k, (grad_k ** 2).sum() ** 0.5, x_k]) else: history['message'] = 'Optimization terminated. Max steps. code 1' except Exception as e: history['message'] = f'Optimization failed. {e}. code 2' return x_k, history
[docs]def gd_frac(function: Callable[[torch.Tensor], torch.Tensor], x0: torch.Tensor, epsilon: float = 1e-5, gamma: float = 0.1, delta: float = 0.1, lambda0: float = 0.1, max_iter: int = 500, verbose: bool = False, keep_history: bool = False) -> Tuple[torch.Tensor, HistoryGD]: """ Returns a tensor n x 1 with optimal point and history using Algorithm with fractional step. Requirements: :math:`\\ 0 < \\lambda_0 < 1` is the step multiplier, :math:`0 < \\delta < 1` influence on step size. :param function: callable that depends on the first positional argument :param x0: Torch tensor which is initial approximation :param epsilon: optimization accuracy :param gamma: gradient step :param delta: value of the crushing parameter :param lambda0: initial step :param max_iter: maximum number of iterations :param verbose: flag of printing iteration logs :param keep_history: flag of return history :return: tuple with point and history. .. rubric:: Examples .. code-block:: python3 >>> def func(x): return x[0] ** 2 + x[1] ** 2 >>> x_0 = torch.tensor([1, 2]) >>> solution = gd_frac(func, x_0) >>> print(solution[0]) tensor([1.9156e-06, 3.8312e-06], dtype=torch.float64) """ # initialization x_k, func_k, grad_k, history, round_precision = initialize(function, x0, epsilon, keep_history) # first verbose print_verbose(x_k, func_k, verbose, 0, round_precision) try: for i in range(max_iter - 1): # point for first comparison, first gradient step t = x_k - gamma * grad_k func_t = function(t) # will divide the gradient step until condition is met while not func_t - func_k <= - gamma * delta * sum(grad_k ** 2): gamma = gamma * lambda0 t = x_k - gamma * grad_k func_t = function(t) x_k = t func_k = func_t grad_k = gradient(function, x_k) # comparing of norm 2 with optimization accuracy if grad_k.norm(2) < epsilon: history['message'] = 'Optimization terminated successfully. code 0' break # verbose print_verbose(x_k, func_k, verbose, i + 1, round_precision) # history if keep_history: history = update_history_gd(history, values=[i + 1, func_k, (grad_k ** 2).sum() ** 0.5, x_k]) else: history['message'] = 'Optimization terminated. Max steps. code 1' except Exception as e: history['message'] = f'Optimization failed. {e}. code 2' return x_k, history
[docs]def gd_optimal(function: Callable[[torch.Tensor], torch.Tensor], x0: torch.Tensor, epsilon: float = 1e-5, max_iter: int = 500, verbose: bool = False, keep_history: bool = False) -> Tuple[torch.Tensor, HistoryGD]: """ Returns a tensor n x 1 with optimal point and history using Algorithm with optimal step. The idea is to choose a gamma that minimizes the function in the direction :math:`\\ f'(x_k)` :param function: callable that depends on the first positional argument :param x0: Torch tensor which is initial approximation :param epsilon: optimization accuracy :param max_iter: maximum number of iterations :param verbose: flag of printing iteration logs :param keep_history: flag of return history :return: tuple with point and history. .. rubric:: Examples .. code-block:: python3 >>> def func(x): return -torch.exp(- x[0] ** 2 - x[1] ** 2) >>> x_0 = torch.tensor([1, 2]) >>> solution = gd_optimal(func, x_0) >>> print(solution[0]) tensor([9.2070e-08, 1.8405e-07], dtype=torch.float64) """ # initialization x_k, func_k, grad_k, history, round_precision = initialize(function, x0, epsilon, keep_history) # first verbose print_verbose(x_k, func_k, verbose, 0, round_precision) try: for i in range(max_iter - 1): # comparing of norm 2 with optimization accuracy if grad_k.norm(2) < epsilon: history['message'] = 'Optimization terminated successfully. code 0' break with HiddenPrints(): # hiding the prints of the results of the brent algorithm def optimization_f(gam: float | torch.Tensor) -> float | torch.Tensor: return function(x_k - gam * grad_k) gamma = brent(optimization_f, (0, 1))[0] x_k = x_k - gamma * grad_k grad_k = gradient(function, x_k) func_k = function(x_k) # verbose print_verbose(x_k, func_k, verbose, i + 1, round_precision) # history if keep_history: history = update_history_gd(history, values=[i + 1, func_k, (grad_k ** 2).sum() ** 0.5, x_k]) else: history['message'] = 'Optimization terminated. Max steps. code 1' except Exception as e: history['message'] = f'Optimization failed. {e}. code 2' return x_k, history
[docs]def nonlinear_cgm(function: Callable[[torch.Tensor], torch.Tensor], x0: torch.Tensor, epsilon: float = 1e-5, max_iter: int = 500, verbose: bool = False, keep_history: bool = False) -> Tuple[torch.Tensor, HistoryGD]: """ Returns a tensor n x 1 with optimal point and history. Algorithm works when the function is approximately quadratic near the minimum, which is the case when the function is twice differentiable at the minimum and the second derivative is non-singular there [1]_ .. rubric:: References .. [1] Nocedal, J., &amp; Wright, S. J. (2006). 5.2 NONLINEAR CONJUGATE GRADIENT METHOD In Numerical optimization (pp. 121). essay, Springer. :param function: callable that depends on the first positional argument :param x0: Torch tensor which is initial approximation :param epsilon: optimization accuracy :param max_iter: maximum number of iterations :param verbose: flag of printing iteration logs :param keep_history: flag of return history :return: tuple with point and history. .. rubric:: Examples Example for :math:`f(x, y) = (x + 0.5)^2 + (y - 0.5)^2, \\quad x = 1` .. code-block:: python3 >>> def func(x): return 10 * x[0] ** 2 + x[1] ** 2 / 5 >>> x_0 = torch.tensor([1, 2]) >>> solution = nonlinear_cgm(func, x_0) >>> print(solution[0]) tensor([6.9846e+25, 4.2454e+26], dtype=torch.float64) """ # initialization x_k, func_k, grad_k, history, round_precision = initialize(function, x0, epsilon, keep_history) p_k = grad_k # first verbose print_verbose(x_k, func_k, verbose, 0, round_precision) try: for i in range(max_iter - 1): if grad_k.norm(2) < epsilon: history['message'] = 'Optimization terminated successfully. code 0' break else: with HiddenPrints(): def optimization_f(gam: float | torch.Tensor) -> float | torch.Tensor: return function(x_k - gam * grad_k) gamma = brent(optimization_f, (0, 1))[0] x_k = x_k - gamma * p_k grad_k_new = gradient(function, x_k) beta_fr = (grad_k_new @ grad_k_new.reshape(-1, 1)) / (grad_k @ grad_k.reshape(-1, 1)) p_k = grad_k_new + beta_fr * p_k grad_k = grad_k_new func_k = function(x_k) # verbose print_verbose(x_k, func_k, verbose, i + 1, round_precision) # history if keep_history: history = update_history_gd(history, values=[i + 1, func_k, grad_k.norm(2), x_k]) else: history['message'] = 'Optimization terminated. Max steps. code 1' except Exception as e: history['message'] = f'Optimization failed. {e}. code 2' return x_k, history
def log_barrier_function(function: Callable[[torch.Tensor], torch.Tensor], x0: torch.Tensor, mu: float, inequality_constraints: Sequence[Callable[[torch.Tensor], torch.Tensor]] ) -> torch.Tensor: """ Support function. Compute log-barrier function .. math:: P(x, \\mu) = f(x) - \\mu \\sum_{i\\in\\mathcal{I}}\\ln c_i(x) :param function: callable that depends on the first positional argument :param x0: some specific point x(Torch tensor) :param mu: parameter weighing constraints :param inequality_constraints: :math:`\\mathcal{I}` is set of inequality functions :return: tuple with point and history. """ m = len(inequality_constraints) # Amount of inequality constraints output_lb = function(x0) for j in range(m): const_function = inequality_constraints[j](x0) if 0 <= const_function < 1e-8: output_lb += mu * 0 elif const_function > 1e-8: output_lb -= mu * torch.log(const_function) else: output_lb += 10 ** 10 return output_lb
[docs]def primal_dual_interior(function: Callable[[torch.Tensor], torch.Tensor], x0: torch.Tensor, inequality_constraints: Sequence[Callable[[torch.Tensor], torch.Tensor]], mu: float = 1e-4, epsilon: float = 1e-12, alpha: float = 1e-1, max_iter: int = 200, verbose: bool = False, keep_history: bool = False) -> Tuple[torch.Tensor, HistoryGD]: """ Returns point and history of minimization. [1]_ AIM: minimize :math:`\\ f(x)` subject to :math:`\\ c(x) \\geqslant 0`; c from inequality_constraints :math:`\\ B(x,\\mu) = f(x) - \\mu \\sum_{i=1}^m \\log(c_i(x)) \\rightarrow \\min` Here :math:`\\mu` is a small positive scalar, :math:`\\mu \\rightarrow 0` :param function: callable that depends on the first positional argument :param x0: some specific point x(Torch tensor) :param inequality_constraints: :math:`\\mathcal{I}` is set of inequality functions :param mu: is a small positive scalar, sometimes called the "barrier parameter" :param epsilon: optimization accuracy :param alpha: step length :param max_iter: maximum number of iterations :param verbose: flag of printing iteration logs :param keep_history: flag of return history :return: tuple with point and history. :raises ArithmeticError: if x0 is not in trust region. .. rubric:: Reference .. [1] https://en.wikipedia.org/wiki/Interior-point_method .. rubric:: Examples .. code-block:: python3 >>> primal_dual_interior(lambda x: (x[0] + 0.5) ** 2 + (x[1] - 0.5) ** 2, torch.tensor([0.9, 0.1]), >>> [lambda x: x[0], lambda x: 1 - x[0], lambda x: x[1], lambda x: 1 - x[1]])[0] tensor([1.9910e-04, 5.0000e-01], dtype=torch.float64) """ # initialization def lb_function(x: torch.Tensor, _mu: float = mu) -> torch.Tensor | float: """ log barrier function -- main function. therefore, instead of a function, we use lb_function """ return log_barrier_function(function, x, _mu, inequality_constraints) x_k, func_k, grad_k, history, round_precision = initialize(function, x0, epsilon, keep_history) m = len(inequality_constraints) # Amount of inequality constraints n = x_k.shape[0] # Amount of variables p = torch.ones(m + n) # init of p lambdas = torch.rand(m, dtype=torch.float64) try: function(x0) for i in range(m): if inequality_constraints[i](x0) < 0: raise ArithmeticError except ArithmeticError: history['message'] = 'Point out of domain' return x_k, history # first verbose print_verbose(x_k, func_k, verbose, 0, round_precision) try: for i in range(max_iter): # terminate condition if p[:n].norm(2) < epsilon: history['message'] = 'Optimization terminated successfully. code 0' break w = hessian(lb_function, x_k, 1e-4) # hessian of lb function a = jacobian(inequality_constraints, x_k) c = torch.diag(torch.tensor([c(x_k) for c in inequality_constraints])).double() left_matrix = torch.zeros(n + m, n + m, dtype=torch.float64) # left matrix equation left_matrix[:n, :n] = w left_matrix[:n, n:] = -a.T left_matrix[n:, :n] = torch.diag(lambdas) @ a left_matrix[n:, n:] = c right_matrix = torch.zeros(n + m, 1, dtype=torch.float64) # right matrix # - g + A.T @ lambdas right_matrix[:n] = - gradient(lb_function, x_k).reshape(-1, 1) + a.T @ lambdas.reshape(-1, 1) # mu 1 - c @ lambdas right_matrix[n:] = mu * torch.ones(m, 1) - c @ lambdas.reshape(-1, 1) try: # check singularity p = torch.linalg.solve(left_matrix, right_matrix.flatten()) # direction except torch.linalg.LinAlgError: left_matrix[:n, :n] += 1e-2 * torch.eye(n) left_matrix[n:, n:] += 1e-4 * torch.eye(m) p = torch.linalg.solve(left_matrix, right_matrix.flatten()) # direction x_k += alpha * p[:n] lambdas += alpha * p[n:] # verbose if verbose or keep_history: func_k = function(x_k) print_verbose(x_k, func_k, verbose, i + 1, round_precision) # history if keep_history: history = update_history_gd(history, values=[i + 1, func_k, p[:n].norm(2), x_k.clone()]) mu *= 0.99 else: history['message'] = 'Optimization terminated. Max steps. code 1' except Exception as e: history['message'] = f'Optimization failed. {e}. code 2' return x_k, history
[docs]def log_barrier_solver(function: Callable[[torch.Tensor], torch.Tensor], x0: torch.Tensor, inequality_constraints: Sequence[Callable[[torch.Tensor], torch.Tensor]], epsilon: float = 1e-5, max_iter: int = 1000, keep_history: bool = False, verbose: bool = False) -> Tuple[torch.Tensor, HistoryGD]: """ Returns optimal point of optimization with inequality constraints by Log Barrier method [1]_ .. rubric:: References .. [1] Nocedal, J., &amp; Wright, S. J. (2006). 19.6 THE PRIMAL LOG-BARRIER METHOD. In Numerical optimization (pp. 583–584). essay, Springer. :math:`\\rule{125mm}{0.2pt} \\\\` :param function: callable that depends on the first positional argument :param x0: some specific point x(Torch tensor) :param epsilon: optimization accuracy :param inequality_constraints: :math:`\\mathcal{I}` is set of inequality functions :param max_iter: maximum number of iterations :param keep_history: flag of return history :param verbose: flag of printing iteration logs :return: tuple with point and history. .. rubric:: Examples Example for :math:`f(x, y) = (x + 0.5)^2 + (y - 0.5)^2, \\quad 0 \\le x \\le 1, 0 \\le y \\le 1` .. code-block:: python3 >>> log_barrier_solver(lambda x: (x[0] + 0.5) ** 2 + (x[1] - 0.5) ** 2, torch.tensor([0.9, 0.1]), >>> [lambda x: x[0], lambda x: 1 - x[0], lambda x: x[1], lambda x: 1 - x[1]]) tensor([0.0032, 0.5000], dtype=torch.float64) """ m = len(inequality_constraints) # Amount of inequality constraints x_k, func_k, grad_k, history, round_precision = initialize(function, x0, epsilon, keep_history) try: function(x0) for i in range(m): if inequality_constraints[i](x0) < 0: raise ArithmeticError except ArithmeticError: history['message'] = 'Point out of domain' return x_k, history tau = 1 # The tau sequence will be geometric try: for i in range(max_iter): mu_k = tau ** 0.5 x_k, history_step = gd_frac(lambda x: log_barrier_function(function, x, mu_k, inequality_constraints), x_k, gamma=mu_k, epsilon=tau, keep_history=True, max_iter=5) tau *= 0.9 if tau <= epsilon: history['message'] = 'Optimization terminated successfully. code 0' break # verbose if verbose or keep_history: func_k = function(x_k) grad_k = gradient(function, x_k) print_verbose(x_k, func_k, verbose, i + 1, round_precision) # history if keep_history: history = update_history_gd(history, values=[i + 1, func_k, grad_k.norm(2), x_k.clone()]) except Exception as e: history['message'] = f'Optimization failed. {e}. code 2' return x_k, history
[docs]def constrained_lagrangian_solver(function: Callable[[float | torch.Tensor], torch.Tensor], x0: torch.Tensor, constraints: Sequence[Callable[[float | torch.Tensor], torch.Tensor]], x_bounds: Sequence[Tuple[float, float]] | None | torch.Tensor = None, epsilon: float = 1e-4, max_iter: int = 250, keep_history: bool = False, verbose: bool = False) -> Tuple[torch.Tensor, HistoryGD]: """ Returns a tensor n x 1 with optimal point and history of minimization by newton_eq_const. Alias of ''Newton’s method under equality constrains'' [1]_ Example for :math:`f(x, y) = (x + 0.5)^2 + (y - 0.5)^2, \\quad x = 1` .. rubric:: References .. [1] Nocedal, J., &amp; Wright, S. J. (2006). 17.4 PRACTICAL AUGMENTED LAGRANGIAN METHODS. In Numerical optimization (pp. 519–521). essay, Springer. :math:`\\rule{125mm}{0.2pt} \\\\` :param function: callable that depends on the first positional argument :param x0: some specific point x(Torch tensor) :param constraints: list of equality constraints :param x_bounds: bounds on x. e.g. 0 <= x[i] <= 1, then x_bounds[i] = (0, 1) :param epsilon: optimization accuracy :param max_iter: maximum number of iterations :param keep_history: flag of return history :param verbose: flag of printing iteration logs :return: tuple with point and history. .. rubric:: Examples .. code-block:: python3 >>> constrained_lagrangian_solver(lambda x: (x[0] + 0.5) ** 2 + (x[1] - 0.5) ** 2, >>> torch.tensor([0.1, 0.1]),[lambda x: x[0] - 1])) tensor([1.0540, 0.5000], dtype=torch.float64) """ m = len(constraints) if x_bounds is None: x_bounds = torch.zeros(len(x0), 2).double() for i in range(len(x0)): x_bounds[i, :] = torch.tensor([-torch.inf, torch.inf]) elif isinstance(x0, torch.Tensor): assert x0.shape[0] == x_bounds.shape[0], 'the boundaries should be n x 2 tensor' assert len(x_bounds.shape) == 2, 'the boundaries should be n x 2 tensor' assert x_bounds.shape[1] == 2, 'the boundaries should be n x 2 tensor' else: assert len(x_bounds) == x0.shape[0], 'the boundaries should be for each variable' if not isinstance(x0, torch.Tensor): _x_bounds = torch.zeros(x0.shape[0], 2).double() for i, const in enumerate(x_bounds): _x_bounds[i, :] = const x_bounds = _x_bounds def c(x: torch.Tensor) -> torch.Tensor: """Returns vector of constraints at specific x""" _c = torch.zeros(m).double() for j in range(m): _c[j] = constraints[j](x) return _c def lagrangian_a(x: torch.Tensor, lam: torch.Tensor, mu: float) -> torch.Tensor: """ Returns :math:`\\mathcal{L}_a` Nocedal, J., &amp; Wright, S. J. (2006). Numerical optimization (p. 520) """ output = function(x) for j in range(m): output += -lam[j] * constraints[j](x) + mu / 2 * constraints[j](x) ** 2 return output def p_function(g: torch.Tensor, u_l_bounds: torch.Tensor): """ P(g, l, u) is the projection of the vector g :math:`\\in` IRn onto the rectangular box :math:`[l, u]` Nocedal, J., &amp; Wright, S. J. (2006). Numerical optimization (p. 520) """ # for j in range(len(g)): # if g[j] >= u_l_bounds[j][1]: # g[j] = u_l_bounds[j][1] # elif g[j] <= u_l_bounds[j][0]: # g[j] = u_l_bounds[j][0] # replaced by g = torch.minimum(torch.maximum(g, u_l_bounds[:, 0]), u_l_bounds[:, 1]) return g x_k, func_k, grad_k, history, round_precision = initialize(function, x0, epsilon, keep_history) try: function(x0) for i in range(m): constraints[i](x0) except ArithmeticError: history['message'] = 'Point out of domain' return x_k, history lambdas_k = torch.rand(m) eta = epsilon # Main tolerance for constraints omega = eta # Main tolerance for lagrange function mu_k = 10 omega_k = 1 / mu_k eta_k = 1 / mu_k ** 0.1 for i in range(max_iter): def local_min_function(x): grad_lagrangian = x - gradient(lambda y: lagrangian_a(y, lambdas_k, mu_k), x) p = p_function(grad_lagrangian, x_bounds) return (x - p).norm(2) x_k = nonlinear_cgm(local_min_function, x_k, epsilon=omega_k, max_iter=4)[0] if verbose or keep_history: func_k = function(x_k) grad_k = gradient(function, x_k) print_verbose(x_k, func_k, verbose, i + 1, round_precision) # history if keep_history: history = update_history_gd(history, values=[i + 1, func_k, grad_k.norm(2), x_k.clone()]) c_k = c(x_k) if c_k.norm(2) <= eta_k: # test for convergence if c_k.norm(2) <= eta and local_min_function(x_k) <= omega: break # update multipliers, tighten tolerances lambdas_k = lambdas_k - mu_k * c_k else: # increase penalty parameter, tighten tolerances lambdas_k = lambdas_k - mu_k * c_k mu_k = mu_k * 100 eta_k = 1 / mu_k ** 0.1 omega = 1 / mu_k return x_k, history
def branch_bound(function: Callable[[float | torch.Tensor], torch.Tensor], x0: torch.Tensor, eq_constraints: Sequence[Callable[[float | torch.Tensor], torch.Tensor]] | None = None, ineq_constraints: Sequence[Callable[[float | torch.Tensor], torch.Tensor]] | None = None, tol: float = 1e-6, return_dataframe_history: bool = False ) -> Tuple[torch.Tensor | None, dict | pd.DataFrame]: """ Returns :math:`x \\in \\mathbf{Z}^{n}`, which minimizes the function. :param function: target function. depends on torch tensor :param x0: argmin function. :math:`x0 \\in \\mathbf{R}^{n}`. torch tensor :param eq_constraints: equality constraints for function [g(x), ...] = 0 :param ineq_constraints: inequality constraint for function: [h(x), ...] >= 0 :param tol: tolerance for eq_constraint: if |eq(x)| <= tol than eq satisfied :param return_dataframe_history: return history as pandas dataframe :return: returns integer solution of minimization and history. If constraint is not satisfied return None. .. code-block:: python3 >>> def f(x): return ((x - torch.cos(x) - 1) ** 2).sum() >>> x_0 = torch.tensor([4., 5.]) >>> x_opt = gd_optimal(f, x0)[0] >>> x_int, hist = branch_bound(f, x_opt, return_dataframe_history=True) >>> print(x_int) >>> hist +---+-------------+-----------+---------------------------+ | | x | func | message | +===+=============+===========+===========================+ | 0 | (1.0, 1.0) | 0.583853 | constraints is satisfied | +---+-------------+-----------+---------------------------+ | 1 | (1.0, 2.0) | 2.297398 | constraints is satisfied | +---+-------------+-----------+---------------------------+ | 2 | (2.0, 1.0) | 2.297398 | constraints is satisfied | +---+-------------+-----------+---------------------------+ | 3 | (2.0, 2.0) | 4.010944 | constraints is satisfied | +---+-------------+-----------+---------------------------+ """ history = {'x': [], 'func': [], 'message': []} min_f = torch.inf min_x = x0.clone() def check_constraints(_x: torch.Tensor) -> [bool, str]: """ Check constraints """ flag_satisfaction = True message = 'constraints is satisfied' _x = _x.float() try: function(_x) if eq_constraints is not None: for ind, eq in enumerate(eq_constraints): assert abs(eq(_x)) <= tol, f'{ind} equation is not satisfied' if ineq_constraints is not None: for ind, ineq in enumerate(ineq_constraints): assert ineq(_x) >= 0 - tol, f'{ind} inequality is not satisfied' except Exception as e: flag_satisfaction = False message = f'constraint is not satisfied: {e}' return flag_satisfaction, message n = x0.flatten().shape[0] # amount of variables for i in range(2 ** n): x_rounded = x0.clone() round_rules = f'{i:0{n}b}' # Number at binary format with n symbols for j, rule in enumerate(round_rules): x_rounded[j] = torch.floor(x_rounded[j]) if rule == '0' else torch.ceil(x_rounded[j]) flag, mess = check_constraints(x_rounded) func = function(x_rounded) if func < min_f: min_f = func min_x = x_rounded history['x'].append(x_rounded) history['func'].append(func) history['message'].append(mess) if return_dataframe_history: history['x'] = list(map(lambda x: tuple(list(x.numpy())), history['x'])) history['func'] = list(map(lambda x: float(x), history['func'])) history = pd.DataFrame(history) return min_x, history