# Multi dimensional optimization algorithms for function optimization
from __future__ import annotations
from typing import Callable, Tuple, Sequence
import numpy
import pandas as pd
import torch
from .calculus import gradient, hessian, jacobian
from .one_optimize import brent
from .support import HistoryGD, update_history_gd, HiddenPrints, print_verbose
def initialize(function: Callable[[torch.Tensor], torch.Tensor],
x0: torch.Tensor,
epsilon: float = 1e-5,
keep_history: bool = False) -> Tuple:
"""
Returns initial x_k with double dtype, func_k, grad_k, round_precision, history
:param function: callable that depends on the first positional argument
:param x0: numpy ndarray which is initial approximation
:param epsilon: optimization accuracy
:param keep_history: flag of return history
:return: tuple with x_k, func_k, grad_k, history, round_precision
"""
x_k = x0.double().flatten()
func_k = function(x_k)
grad_k = gradient(function, x_k)
round_precision = -int(numpy.log10(epsilon)) # variable to determine the rounding accuracy
# if keep_history=True, we will save history. here is initial step
if keep_history:
history: HistoryGD = {
'iteration': [0],
'f_value': [func_k],
'f_grad_norm': [grad_k.norm(2)],
'x': [x_k.flatten()],
'message': ''
}
else:
history: HistoryGD = {'iteration': [], 'f_value': [], 'x': [], 'f_grad_norm': [], 'message': ''}
return x_k, func_k, grad_k, history, round_precision
[docs]def bfgs(function: Callable[[torch.Tensor], torch.Tensor],
x0: torch.Tensor,
tolerance: float = 1e-8,
max_iter: int = 500,
verbose: bool = False,
keep_history: bool = False) -> Tuple[torch.Tensor, HistoryGD]:
"""
Returns a tensor n x 1 with optimal point and history using the BFGS method [1]_
Broyden–Fletcher–Goldfarb–Shanno algorithm
The algorithm does not use Wolfe conditions. Instead of wolfe, alg uses the optimal step.
.. note::
The algorithm only works for a flat x0, and the functions should depend on a flat array
.. rubric:: References
.. [1] Wright and Nocedal, 'Numerical Optimization', 1999; pp.136-140 BFGS algorithm.
:math:`\\rule{125mm}{0.2pt} \\\\`
:param function: callable that depends on the first positional argument. Other arguments are passed through kwargs
:param x0: start minimization point
:param tolerance: criterion of stop os l2 norm(grad f) < tolerance
:param max_iter: maximum number of iterations
:param verbose: flag of printing iteration logs
:param keep_history: flag of return history
:return: tuple with point and history.
.. rubric:: Examples
.. code-block:: python3
>>> def func(x): return 10 * x[0] ** 2 + x[1] ** 2 / 5
>>> x_0 = torch.tensor([1, 2])
>>> solution = bfgs(func, x_0)
>>> print(solution[0])
tensor([3.4372e-14, 1.8208e-14], dtype=torch.float64)
"""
# initialization
x_k, func_k, grad_k, history, round_precision = initialize(function, x0, tolerance, keep_history)
h_k = torch.eye(x_k.shape[0], dtype=torch.float64) * tolerance ** 0.1
grad_k = grad_k.reshape(-1, 1)
x_k = x_k.reshape(-1, 1)
s_k = x_k + 1
# first verbose
print_verbose(x_k, func_k, verbose, 0, round_precision)
try:
for i in range(max_iter):
# stop criterion
if grad_k.norm(2) < tolerance or s_k.norm(2) < tolerance:
history['message'] = 'Searching finished. Successfully. code 0'
return x_k.reshape(-1), history
p_k = -h_k @ grad_k
with HiddenPrints():
def optimization_f(alpha: float | torch.Tensor) -> float | torch.Tensor:
return function(x_k + alpha * p_k)
alpha_k = brent(optimization_f, (0, 1))[0]
# step
x_k_plus1 = x_k + alpha_k * p_k
grad_f_k_plus1 = gradient(function, x_k_plus1, delta_x=1e-1).reshape(-1, 1)
s_k = x_k_plus1 - x_k
y_k = grad_f_k_plus1 - grad_k
h_k = calc_h_new(h_k, s_k, y_k)
grad_k = grad_f_k_plus1
x_k = x_k_plus1
func_k = function(x_k.flatten())
# check divergence
if torch.isnan(x_k).any():
history['message'] = f'The method has diverged. code 2'
return x_k.flatten(), history
# verbose
print_verbose(x_k.flatten(), func_k, verbose, i + 1, round_precision)
# history
if keep_history:
history = update_history_gd(history, values=[i + 1, func_k, grad_k.norm(2), x_k.flatten()])
else:
history['message'] = 'Searching finished. Max iterations have been reached. code 1'
except Exception as e:
history['message'] = f'Optimization failed. {e}. code 2'
return x_k.flatten(), history
def calc_h_new(h: torch.Tensor,
s: torch.Tensor,
y: torch.Tensor) -> torch.Tensor:
"""
Calculates a new approximation of the inverse Hessian matrix
:param h: The previous approximation of the H matrix
:param s: the difference x_{i+1} - x_{i}
:param y: the difference f'_{i+1} - f'_{i}
:return: The new approximation of inverse Hessian matrix
"""
ro = 1 / (y.T @ s)
i = torch.eye(h.shape[0]).double()
h_new = (i - ro * s @ y.T) @ h @ (i - ro * s @ y.T) + ro * s @ s.T
return h_new
[docs]def gd_constant(function: Callable[[torch.Tensor], torch.Tensor],
x0: torch.Tensor,
epsilon: float = 1e-5,
gamma: float = 0.1,
max_iter: int = 500,
verbose: bool = False,
keep_history: bool = False) -> Tuple[torch.Tensor, HistoryGD]:
"""
Returns a tensor n x 1 with optimal point and history using
Algorithm with constant step.
The gradient of the function shows us the direction of increasing the function.
The idea is to move in the opposite direction to :math:`\\displaystyle x_{k + 1} \\text{ where }
f(x_{k + 1}) < f(x_{k}) \\text{ .}`
But, if we add a gradient to :math:`\\displaystyle x_{k}` without changes, our method will often diverge.
So we need to add a gradient with some weight :math:`\\displaystyle \\gamma \\text{ .}\\\\`
:param function: callable that depends on the first positional argument
:param x0: Torch tensor which is initial approximation
:param epsilon: optimization accuracy
:param gamma: gradient step
:param max_iter: maximum number of iterations
:param verbose: flag of printing iteration logs
:param keep_history: flag of return history
:return: tuple with point and history.
.. rubric:: Examples
.. code-block:: python3
>>> def func(x): return x[0] ** 2 + x[1] ** 2
>>> x_0 = torch.tensor([1, 2])
>>> solution = gd_constant(func, x_0)
>>> print(solution[0])
tensor([1.9156e-06, 3.8312e-06], dtype=torch.float64)
"""
# initialization
x_k, func_k, grad_k, history, round_precision = initialize(function, x0, epsilon, keep_history)
# first verbose
print_verbose(x_k, func_k, verbose, 0, round_precision)
try:
for i in range(max_iter - 1):
if grad_k.norm(2) < epsilon: # comparing of norm 2 with optimization accuracy
history['message'] = 'Optimization terminated successfully. code 0'
break
else:
x_k = x_k - gamma * grad_k # updating the point for next iter and repeat
grad_k = gradient(function, x_k)
func_k = function(x_k)
# verbose
print_verbose(x_k, func_k, verbose, i + 1, round_precision)
# history
if keep_history:
history = update_history_gd(history, values=[i + 1, func_k, (grad_k ** 2).sum() ** 0.5, x_k])
else:
history['message'] = 'Optimization terminated. Max steps. code 1'
except Exception as e:
history['message'] = f'Optimization failed. {e}. code 2'
return x_k, history
[docs]def gd_frac(function: Callable[[torch.Tensor], torch.Tensor],
x0: torch.Tensor,
epsilon: float = 1e-5,
gamma: float = 0.1,
delta: float = 0.1,
lambda0: float = 0.1,
max_iter: int = 500,
verbose: bool = False,
keep_history: bool = False) -> Tuple[torch.Tensor, HistoryGD]:
"""
Returns a tensor n x 1 with optimal point and history using
Algorithm with fractional step.
Requirements: :math:`\\ 0 < \\lambda_0 < 1` is the step multiplier, :math:`0 < \\delta < 1` influence on step size.
:param function: callable that depends on the first positional argument
:param x0: Torch tensor which is initial approximation
:param epsilon: optimization accuracy
:param gamma: gradient step
:param delta: value of the crushing parameter
:param lambda0: initial step
:param max_iter: maximum number of iterations
:param verbose: flag of printing iteration logs
:param keep_history: flag of return history
:return: tuple with point and history.
.. rubric:: Examples
.. code-block:: python3
>>> def func(x): return x[0] ** 2 + x[1] ** 2
>>> x_0 = torch.tensor([1, 2])
>>> solution = gd_frac(func, x_0)
>>> print(solution[0])
tensor([1.9156e-06, 3.8312e-06], dtype=torch.float64)
"""
# initialization
x_k, func_k, grad_k, history, round_precision = initialize(function, x0, epsilon, keep_history)
# first verbose
print_verbose(x_k, func_k, verbose, 0, round_precision)
try:
for i in range(max_iter - 1):
# point for first comparison, first gradient step
t = x_k - gamma * grad_k
func_t = function(t)
# will divide the gradient step until condition is met
while not func_t - func_k <= - gamma * delta * sum(grad_k ** 2):
gamma = gamma * lambda0
t = x_k - gamma * grad_k
func_t = function(t)
x_k = t
func_k = func_t
grad_k = gradient(function, x_k)
# comparing of norm 2 with optimization accuracy
if grad_k.norm(2) < epsilon:
history['message'] = 'Optimization terminated successfully. code 0'
break
# verbose
print_verbose(x_k, func_k, verbose, i + 1, round_precision)
# history
if keep_history:
history = update_history_gd(history, values=[i + 1, func_k, (grad_k ** 2).sum() ** 0.5, x_k])
else:
history['message'] = 'Optimization terminated. Max steps. code 1'
except Exception as e:
history['message'] = f'Optimization failed. {e}. code 2'
return x_k, history
[docs]def gd_optimal(function: Callable[[torch.Tensor], torch.Tensor],
x0: torch.Tensor,
epsilon: float = 1e-5,
max_iter: int = 500,
verbose: bool = False,
keep_history: bool = False) -> Tuple[torch.Tensor, HistoryGD]:
"""
Returns a tensor n x 1 with optimal point and history using
Algorithm with optimal step.
The idea is to choose a gamma that minimizes the function in the direction :math:`\\ f'(x_k)`
:param function: callable that depends on the first positional argument
:param x0: Torch tensor which is initial approximation
:param epsilon: optimization accuracy
:param max_iter: maximum number of iterations
:param verbose: flag of printing iteration logs
:param keep_history: flag of return history
:return: tuple with point and history.
.. rubric:: Examples
.. code-block:: python3
>>> def func(x): return -torch.exp(- x[0] ** 2 - x[1] ** 2)
>>> x_0 = torch.tensor([1, 2])
>>> solution = gd_optimal(func, x_0)
>>> print(solution[0])
tensor([9.2070e-08, 1.8405e-07], dtype=torch.float64)
"""
# initialization
x_k, func_k, grad_k, history, round_precision = initialize(function, x0, epsilon, keep_history)
# first verbose
print_verbose(x_k, func_k, verbose, 0, round_precision)
try:
for i in range(max_iter - 1):
# comparing of norm 2 with optimization accuracy
if grad_k.norm(2) < epsilon:
history['message'] = 'Optimization terminated successfully. code 0'
break
with HiddenPrints(): # hiding the prints of the results of the brent algorithm
def optimization_f(gam: float | torch.Tensor) -> float | torch.Tensor:
return function(x_k - gam * grad_k)
gamma = brent(optimization_f, (0, 1))[0]
x_k = x_k - gamma * grad_k
grad_k = gradient(function, x_k)
func_k = function(x_k)
# verbose
print_verbose(x_k, func_k, verbose, i + 1, round_precision)
# history
if keep_history:
history = update_history_gd(history, values=[i + 1, func_k, (grad_k ** 2).sum() ** 0.5, x_k])
else:
history['message'] = 'Optimization terminated. Max steps. code 1'
except Exception as e:
history['message'] = f'Optimization failed. {e}. code 2'
return x_k, history
[docs]def nonlinear_cgm(function: Callable[[torch.Tensor], torch.Tensor],
x0: torch.Tensor,
epsilon: float = 1e-5,
max_iter: int = 500,
verbose: bool = False,
keep_history: bool = False) -> Tuple[torch.Tensor, HistoryGD]:
"""
Returns a tensor n x 1 with optimal point and history.
Algorithm works when the function is approximately quadratic near the minimum, which is the case when the
function is twice differentiable at the minimum and the second derivative is non-singular there [1]_
.. rubric:: References
.. [1] Nocedal, J., & Wright, S. J. (2006). 5.2 NONLINEAR CONJUGATE GRADIENT METHOD
In Numerical optimization (pp. 121). essay, Springer.
:param function: callable that depends on the first positional argument
:param x0: Torch tensor which is initial approximation
:param epsilon: optimization accuracy
:param max_iter: maximum number of iterations
:param verbose: flag of printing iteration logs
:param keep_history: flag of return history
:return: tuple with point and history.
.. rubric:: Examples
Example for :math:`f(x, y) = (x + 0.5)^2 + (y - 0.5)^2, \\quad x = 1`
.. code-block:: python3
>>> def func(x): return 10 * x[0] ** 2 + x[1] ** 2 / 5
>>> x_0 = torch.tensor([1, 2])
>>> solution = nonlinear_cgm(func, x_0)
>>> print(solution[0])
tensor([6.9846e+25, 4.2454e+26], dtype=torch.float64)
"""
# initialization
x_k, func_k, grad_k, history, round_precision = initialize(function, x0, epsilon, keep_history)
p_k = grad_k
# first verbose
print_verbose(x_k, func_k, verbose, 0, round_precision)
try:
for i in range(max_iter - 1):
if grad_k.norm(2) < epsilon:
history['message'] = 'Optimization terminated successfully. code 0'
break
else:
with HiddenPrints():
def optimization_f(gam: float | torch.Tensor) -> float | torch.Tensor:
return function(x_k - gam * grad_k)
gamma = brent(optimization_f, (0, 1))[0]
x_k = x_k - gamma * p_k
grad_k_new = gradient(function, x_k)
beta_fr = (grad_k_new @ grad_k_new.reshape(-1, 1)) / (grad_k @ grad_k.reshape(-1, 1))
p_k = grad_k_new + beta_fr * p_k
grad_k = grad_k_new
func_k = function(x_k)
# verbose
print_verbose(x_k, func_k, verbose, i + 1, round_precision)
# history
if keep_history:
history = update_history_gd(history, values=[i + 1, func_k, grad_k.norm(2), x_k])
else:
history['message'] = 'Optimization terminated. Max steps. code 1'
except Exception as e:
history['message'] = f'Optimization failed. {e}. code 2'
return x_k, history
def log_barrier_function(function: Callable[[torch.Tensor], torch.Tensor],
x0: torch.Tensor,
mu: float,
inequality_constraints: Sequence[Callable[[torch.Tensor], torch.Tensor]]
) -> torch.Tensor:
"""
Support function. Compute log-barrier function
.. math::
P(x, \\mu) = f(x) - \\mu \\sum_{i\\in\\mathcal{I}}\\ln c_i(x)
:param function: callable that depends on the first positional argument
:param x0: some specific point x(Torch tensor)
:param mu: parameter weighing constraints
:param inequality_constraints: :math:`\\mathcal{I}` is set of inequality functions
:return: tuple with point and history.
"""
m = len(inequality_constraints) # Amount of inequality constraints
output_lb = function(x0)
for j in range(m):
const_function = inequality_constraints[j](x0)
if 0 <= const_function < 1e-8:
output_lb += mu * 0
elif const_function > 1e-8:
output_lb -= mu * torch.log(const_function)
else:
output_lb += 10 ** 10
return output_lb
[docs]def primal_dual_interior(function: Callable[[torch.Tensor], torch.Tensor],
x0: torch.Tensor,
inequality_constraints: Sequence[Callable[[torch.Tensor], torch.Tensor]],
mu: float = 1e-4,
epsilon: float = 1e-12,
alpha: float = 1e-1,
max_iter: int = 200,
verbose: bool = False,
keep_history: bool = False) -> Tuple[torch.Tensor, HistoryGD]:
"""
Returns point and history of minimization. [1]_
AIM: minimize :math:`\\ f(x)` subject to :math:`\\ c(x) \\geqslant 0`; c from inequality_constraints
:math:`\\ B(x,\\mu) = f(x) - \\mu \\sum_{i=1}^m \\log(c_i(x)) \\rightarrow \\min`
Here :math:`\\mu` is a small positive scalar, :math:`\\mu \\rightarrow 0`
:param function: callable that depends on the first positional argument
:param x0: some specific point x(Torch tensor)
:param inequality_constraints: :math:`\\mathcal{I}` is set of inequality functions
:param mu: is a small positive scalar, sometimes called the "barrier parameter"
:param epsilon: optimization accuracy
:param alpha: step length
:param max_iter: maximum number of iterations
:param verbose: flag of printing iteration logs
:param keep_history: flag of return history
:return: tuple with point and history.
:raises ArithmeticError: if x0 is not in trust region.
.. rubric:: Reference
.. [1] https://en.wikipedia.org/wiki/Interior-point_method
.. rubric:: Examples
.. code-block:: python3
>>> primal_dual_interior(lambda x: (x[0] + 0.5) ** 2 + (x[1] - 0.5) ** 2, torch.tensor([0.9, 0.1]),
>>> [lambda x: x[0], lambda x: 1 - x[0], lambda x: x[1], lambda x: 1 - x[1]])[0]
tensor([1.9910e-04, 5.0000e-01], dtype=torch.float64)
"""
# initialization
def lb_function(x: torch.Tensor, _mu: float = mu) -> torch.Tensor | float:
""" log barrier function -- main function. therefore, instead of a function, we use lb_function """
return log_barrier_function(function, x, _mu, inequality_constraints)
x_k, func_k, grad_k, history, round_precision = initialize(function, x0, epsilon, keep_history)
m = len(inequality_constraints) # Amount of inequality constraints
n = x_k.shape[0] # Amount of variables
p = torch.ones(m + n) # init of p
lambdas = torch.rand(m, dtype=torch.float64)
try:
function(x0)
for i in range(m):
if inequality_constraints[i](x0) < 0:
raise ArithmeticError
except ArithmeticError:
history['message'] = 'Point out of domain'
return x_k, history
# first verbose
print_verbose(x_k, func_k, verbose, 0, round_precision)
try:
for i in range(max_iter):
# terminate condition
if p[:n].norm(2) < epsilon:
history['message'] = 'Optimization terminated successfully. code 0'
break
w = hessian(lb_function, x_k, 1e-4) # hessian of lb function
a = jacobian(inequality_constraints, x_k)
c = torch.diag(torch.tensor([c(x_k) for c in inequality_constraints])).double()
left_matrix = torch.zeros(n + m, n + m, dtype=torch.float64) # left matrix equation
left_matrix[:n, :n] = w
left_matrix[:n, n:] = -a.T
left_matrix[n:, :n] = torch.diag(lambdas) @ a
left_matrix[n:, n:] = c
right_matrix = torch.zeros(n + m, 1, dtype=torch.float64) # right matrix
# - g + A.T @ lambdas
right_matrix[:n] = - gradient(lb_function, x_k).reshape(-1, 1) + a.T @ lambdas.reshape(-1, 1)
# mu 1 - c @ lambdas
right_matrix[n:] = mu * torch.ones(m, 1) - c @ lambdas.reshape(-1, 1)
try: # check singularity
p = torch.linalg.solve(left_matrix, right_matrix.flatten()) # direction
except torch.linalg.LinAlgError:
left_matrix[:n, :n] += 1e-2 * torch.eye(n)
left_matrix[n:, n:] += 1e-4 * torch.eye(m)
p = torch.linalg.solve(left_matrix, right_matrix.flatten()) # direction
x_k += alpha * p[:n]
lambdas += alpha * p[n:]
# verbose
if verbose or keep_history:
func_k = function(x_k)
print_verbose(x_k, func_k, verbose, i + 1, round_precision)
# history
if keep_history:
history = update_history_gd(history, values=[i + 1, func_k, p[:n].norm(2), x_k.clone()])
mu *= 0.99
else:
history['message'] = 'Optimization terminated. Max steps. code 1'
except Exception as e:
history['message'] = f'Optimization failed. {e}. code 2'
return x_k, history
[docs]def log_barrier_solver(function: Callable[[torch.Tensor], torch.Tensor],
x0: torch.Tensor,
inequality_constraints: Sequence[Callable[[torch.Tensor], torch.Tensor]],
epsilon: float = 1e-5,
max_iter: int = 1000,
keep_history: bool = False,
verbose: bool = False) -> Tuple[torch.Tensor, HistoryGD]:
"""
Returns optimal point of optimization with inequality constraints by Log Barrier method [1]_
.. rubric:: References
.. [1] Nocedal, J., & Wright, S. J. (2006). 19.6 THE PRIMAL LOG-BARRIER METHOD.
In Numerical optimization (pp. 583–584). essay, Springer.
:math:`\\rule{125mm}{0.2pt} \\\\`
:param function: callable that depends on the first positional argument
:param x0: some specific point x(Torch tensor)
:param epsilon: optimization accuracy
:param inequality_constraints: :math:`\\mathcal{I}` is set of inequality functions
:param max_iter: maximum number of iterations
:param keep_history: flag of return history
:param verbose: flag of printing iteration logs
:return: tuple with point and history.
.. rubric:: Examples
Example for :math:`f(x, y) = (x + 0.5)^2 + (y - 0.5)^2, \\quad 0 \\le x \\le 1, 0 \\le y \\le 1`
.. code-block:: python3
>>> log_barrier_solver(lambda x: (x[0] + 0.5) ** 2 + (x[1] - 0.5) ** 2, torch.tensor([0.9, 0.1]),
>>> [lambda x: x[0], lambda x: 1 - x[0], lambda x: x[1], lambda x: 1 - x[1]])
tensor([0.0032, 0.5000], dtype=torch.float64)
"""
m = len(inequality_constraints) # Amount of inequality constraints
x_k, func_k, grad_k, history, round_precision = initialize(function, x0, epsilon, keep_history)
try:
function(x0)
for i in range(m):
if inequality_constraints[i](x0) < 0:
raise ArithmeticError
except ArithmeticError:
history['message'] = 'Point out of domain'
return x_k, history
tau = 1 # The tau sequence will be geometric
try:
for i in range(max_iter):
mu_k = tau ** 0.5
x_k, history_step = gd_frac(lambda x: log_barrier_function(function, x, mu_k, inequality_constraints),
x_k, gamma=mu_k, epsilon=tau, keep_history=True, max_iter=5)
tau *= 0.9
if tau <= epsilon:
history['message'] = 'Optimization terminated successfully. code 0'
break
# verbose
if verbose or keep_history:
func_k = function(x_k)
grad_k = gradient(function, x_k)
print_verbose(x_k, func_k, verbose, i + 1, round_precision)
# history
if keep_history:
history = update_history_gd(history, values=[i + 1, func_k, grad_k.norm(2), x_k.clone()])
except Exception as e:
history['message'] = f'Optimization failed. {e}. code 2'
return x_k, history
[docs]def constrained_lagrangian_solver(function: Callable[[float | torch.Tensor], torch.Tensor],
x0: torch.Tensor,
constraints: Sequence[Callable[[float | torch.Tensor], torch.Tensor]],
x_bounds: Sequence[Tuple[float, float]] | None | torch.Tensor = None,
epsilon: float = 1e-4,
max_iter: int = 250,
keep_history: bool = False,
verbose: bool = False) -> Tuple[torch.Tensor, HistoryGD]:
"""
Returns a tensor n x 1 with optimal point and history of minimization by newton_eq_const.
Alias of ''Newton’s method under equality constrains'' [1]_
Example for :math:`f(x, y) = (x + 0.5)^2 + (y - 0.5)^2, \\quad x = 1`
.. rubric:: References
.. [1] Nocedal, J., & Wright, S. J. (2006). 17.4 PRACTICAL AUGMENTED LAGRANGIAN METHODS.
In Numerical optimization (pp. 519–521). essay, Springer.
:math:`\\rule{125mm}{0.2pt} \\\\`
:param function: callable that depends on the first positional argument
:param x0: some specific point x(Torch tensor)
:param constraints: list of equality constraints
:param x_bounds: bounds on x. e.g. 0 <= x[i] <= 1, then x_bounds[i] = (0, 1)
:param epsilon: optimization accuracy
:param max_iter: maximum number of iterations
:param keep_history: flag of return history
:param verbose: flag of printing iteration logs
:return: tuple with point and history.
.. rubric:: Examples
.. code-block:: python3
>>> constrained_lagrangian_solver(lambda x: (x[0] + 0.5) ** 2 + (x[1] - 0.5) ** 2,
>>> torch.tensor([0.1, 0.1]),[lambda x: x[0] - 1]))
tensor([1.0540, 0.5000], dtype=torch.float64)
"""
m = len(constraints)
if x_bounds is None:
x_bounds = torch.zeros(len(x0), 2).double()
for i in range(len(x0)):
x_bounds[i, :] = torch.tensor([-torch.inf, torch.inf])
elif isinstance(x0, torch.Tensor):
assert x0.shape[0] == x_bounds.shape[0], 'the boundaries should be n x 2 tensor'
assert len(x_bounds.shape) == 2, 'the boundaries should be n x 2 tensor'
assert x_bounds.shape[1] == 2, 'the boundaries should be n x 2 tensor'
else:
assert len(x_bounds) == x0.shape[0], 'the boundaries should be for each variable'
if not isinstance(x0, torch.Tensor):
_x_bounds = torch.zeros(x0.shape[0], 2).double()
for i, const in enumerate(x_bounds):
_x_bounds[i, :] = const
x_bounds = _x_bounds
def c(x: torch.Tensor) -> torch.Tensor:
"""Returns vector of constraints at specific x"""
_c = torch.zeros(m).double()
for j in range(m):
_c[j] = constraints[j](x)
return _c
def lagrangian_a(x: torch.Tensor, lam: torch.Tensor, mu: float) -> torch.Tensor:
"""
Returns :math:`\\mathcal{L}_a`
Nocedal, J., & Wright, S. J. (2006). Numerical optimization (p. 520)
"""
output = function(x)
for j in range(m):
output += -lam[j] * constraints[j](x) + mu / 2 * constraints[j](x) ** 2
return output
def p_function(g: torch.Tensor, u_l_bounds: torch.Tensor):
"""
P(g, l, u) is the projection of the vector g :math:`\\in` IRn onto the rectangular box :math:`[l, u]`
Nocedal, J., & Wright, S. J. (2006). Numerical optimization (p. 520)
"""
# for j in range(len(g)):
# if g[j] >= u_l_bounds[j][1]:
# g[j] = u_l_bounds[j][1]
# elif g[j] <= u_l_bounds[j][0]:
# g[j] = u_l_bounds[j][0]
# replaced by
g = torch.minimum(torch.maximum(g, u_l_bounds[:, 0]), u_l_bounds[:, 1])
return g
x_k, func_k, grad_k, history, round_precision = initialize(function, x0, epsilon, keep_history)
try:
function(x0)
for i in range(m):
constraints[i](x0)
except ArithmeticError:
history['message'] = 'Point out of domain'
return x_k, history
lambdas_k = torch.rand(m)
eta = epsilon # Main tolerance for constraints
omega = eta # Main tolerance for lagrange function
mu_k = 10
omega_k = 1 / mu_k
eta_k = 1 / mu_k ** 0.1
for i in range(max_iter):
def local_min_function(x):
grad_lagrangian = x - gradient(lambda y: lagrangian_a(y, lambdas_k, mu_k), x)
p = p_function(grad_lagrangian, x_bounds)
return (x - p).norm(2)
x_k = nonlinear_cgm(local_min_function, x_k, epsilon=omega_k, max_iter=4)[0]
if verbose or keep_history:
func_k = function(x_k)
grad_k = gradient(function, x_k)
print_verbose(x_k, func_k, verbose, i + 1, round_precision)
# history
if keep_history:
history = update_history_gd(history, values=[i + 1, func_k, grad_k.norm(2), x_k.clone()])
c_k = c(x_k)
if c_k.norm(2) <= eta_k:
# test for convergence
if c_k.norm(2) <= eta and local_min_function(x_k) <= omega:
break
# update multipliers, tighten tolerances
lambdas_k = lambdas_k - mu_k * c_k
else:
# increase penalty parameter, tighten tolerances
lambdas_k = lambdas_k - mu_k * c_k
mu_k = mu_k * 100
eta_k = 1 / mu_k ** 0.1
omega = 1 / mu_k
return x_k, history
def branch_bound(function: Callable[[float | torch.Tensor], torch.Tensor],
x0: torch.Tensor,
eq_constraints: Sequence[Callable[[float | torch.Tensor], torch.Tensor]] | None = None,
ineq_constraints: Sequence[Callable[[float | torch.Tensor], torch.Tensor]] | None = None,
tol: float = 1e-6,
return_dataframe_history: bool = False
) -> Tuple[torch.Tensor | None, dict | pd.DataFrame]:
"""
Returns :math:`x \\in \\mathbf{Z}^{n}`, which minimizes the function.
:param function: target function. depends on torch tensor
:param x0: argmin function. :math:`x0 \\in \\mathbf{R}^{n}`. torch tensor
:param eq_constraints: equality constraints for function [g(x), ...] = 0
:param ineq_constraints: inequality constraint for function: [h(x), ...] >= 0
:param tol: tolerance for eq_constraint: if |eq(x)| <= tol than eq satisfied
:param return_dataframe_history: return history as pandas dataframe
:return: returns integer solution of minimization and history. If constraint is not satisfied return None.
.. code-block:: python3
>>> def f(x): return ((x - torch.cos(x) - 1) ** 2).sum()
>>> x_0 = torch.tensor([4., 5.])
>>> x_opt = gd_optimal(f, x0)[0]
>>> x_int, hist = branch_bound(f, x_opt, return_dataframe_history=True)
>>> print(x_int)
>>> hist
+---+-------------+-----------+---------------------------+
| | x | func | message |
+===+=============+===========+===========================+
| 0 | (1.0, 1.0) | 0.583853 | constraints is satisfied |
+---+-------------+-----------+---------------------------+
| 1 | (1.0, 2.0) | 2.297398 | constraints is satisfied |
+---+-------------+-----------+---------------------------+
| 2 | (2.0, 1.0) | 2.297398 | constraints is satisfied |
+---+-------------+-----------+---------------------------+
| 3 | (2.0, 2.0) | 4.010944 | constraints is satisfied |
+---+-------------+-----------+---------------------------+
"""
history = {'x': [], 'func': [], 'message': []}
min_f = torch.inf
min_x = x0.clone()
def check_constraints(_x: torch.Tensor) -> [bool, str]:
""" Check constraints """
flag_satisfaction = True
message = 'constraints is satisfied'
_x = _x.float()
try:
function(_x)
if eq_constraints is not None:
for ind, eq in enumerate(eq_constraints):
assert abs(eq(_x)) <= tol, f'{ind} equation is not satisfied'
if ineq_constraints is not None:
for ind, ineq in enumerate(ineq_constraints):
assert ineq(_x) >= 0 - tol, f'{ind} inequality is not satisfied'
except Exception as e:
flag_satisfaction = False
message = f'constraint is not satisfied: {e}'
return flag_satisfaction, message
n = x0.flatten().shape[0] # amount of variables
for i in range(2 ** n):
x_rounded = x0.clone()
round_rules = f'{i:0{n}b}' # Number at binary format with n symbols
for j, rule in enumerate(round_rules):
x_rounded[j] = torch.floor(x_rounded[j]) if rule == '0' else torch.ceil(x_rounded[j])
flag, mess = check_constraints(x_rounded)
func = function(x_rounded)
if func < min_f:
min_f = func
min_x = x_rounded
history['x'].append(x_rounded)
history['func'].append(func)
history['message'].append(mess)
if return_dataframe_history:
history['x'] = list(map(lambda x: tuple(list(x.numpy())), history['x']))
history['func'] = list(map(lambda x: float(x), history['func']))
history = pd.DataFrame(history)
return min_x, history