0

Dimension error appears when trying to call minimize function

import numpy as np
import math
import scipy
from scipy import optimize
from scipy.optimize import minimize, line_search

x1=[1,2,1] ; y1=0
x2=[1,1,2] ; y2=0
x3=[2,3,3] ; y3=1
x4=[2,2,1] ; y4=1
x5=[1,2,3] ; y5=0
x6=[1,3,1] ; y6=1
x7=[1,1,1] ; y7=0
x8=[1,2,2] ; y8=0
x9=[1,2,1] ; y9=0
x10=[1,1,1] ; y10=0
x11=[2,2,2] ; y11=1
x12=[1,2,2] ; y12=0

X= np.array([x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12])
y=np.array([y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12])
n=len(y)

def h(x):
    return 1/(1+np.exp(-x))
def r(beta):
    f=0
    for i in range(n):
        f=f+ (1-y[i])* np.dot(X[i],beta) + np.log( 1+np.exp(-np.dot(X[i],beta)  ))
    return np.array([f/n])

#gradient of r
def gradr(beta):
    f=0
    for i in range(n):
        mu= h(np.dot(X[i],beta))
        f=f+ (mu-y[i])*X[i]
    return (f/n).reshape(3,1)

def exactsearch(beta_0,d):
    phi_aux = lambda alfa : r(beta_0+ alfa*d)
    alfa_0=np.array([1])
    bds=[(0,None)]
    res =  minimize(phi_aux, alfa_0, bounds=bds)
    alfa=np.array([res.x])
    return alfa

def GradientMethod(beta,f):
    N=0
    e=10**(-5)
    p=-gradr(beta)
    alfa=f(beta,p)
    while True:
        if r(beta)==r(beta+alfa*p):break 
        if N==10000:break
        if alfa<=e:break
        else:
            N=N+1
            beta=beta+alfa*p
            p=-gradr(beta)
            alfa=f(beta,p)
    return [beta,r(beta),N]

GradientMethod(np.array([1,1,1]),exactsearch)

X is a 3 by 12 matrix, r is a function that takes a size 3 vector and operates it with the vectors of X

When changing np.exp to math.exp the error changes to TypeError: only size-1 arrays can be converted to Python scalars. Also, previously I encountered the error ValueError: shapes (3,) and (1,3) not aligned: 3 (dim 0) != 1 (dim 0) but it went away when reshaping gradr.

I must add that I don't understand that much the function exactsearch, since it was given to me.

Full error

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-20-bb9e6dc26271> in <module>
     62     return [beta,r(beta),N]
     63 
---> 64 GradientMethod(np.array([1,1,1]),exactsearch)

<ipython-input-20-bb9e6dc26271> in GradientMethod(beta, f)
     50     e=10**(-5)
     51     p=-gradr(beta)
---> 52     alfa=f(beta,p)
     53     while True:
     54         if r(beta)==r(beta+alfa*p):break

<ipython-input-20-bb9e6dc26271> in exactsearch(beta_0, d)
     42     alfa_0=np.array([1])
     43     bds=[(0,None)]
---> 44     res =  minimize(phi_aux, alfa_0, bounds=bds)
     45     alfa=np.array([res.x])
     46     return alfa

~/anaconda3/lib/python3.8/site-packages/scipy/optimize/_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
    615                                   **options)
    616     elif meth == 'l-bfgs-b':
--> 617         return _minimize_lbfgsb(fun, x0, args, jac, bounds,
    618                                 callback=callback, **options)
    619     elif meth == 'tnc':

~/anaconda3/lib/python3.8/site-packages/scipy/optimize/lbfgsb.py in _minimize_lbfgsb(fun, x0, args, jac, bounds, disp, maxcor, ftol, gtol, eps, maxfun, maxiter, iprint, callback, maxls, finite_diff_rel_step, **unknown_options)
    304             iprint = disp
    305 
--> 306     sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
    307                                   bounds=new_bounds,
    308                                   finite_diff_rel_step=finite_diff_rel_step)

~/anaconda3/lib/python3.8/site-packages/scipy/optimize/optimize.py in _prepare_scalar_function(fun, x0, jac, args, bounds, epsilon, finite_diff_rel_step, hess)
    259     # ScalarFunction caches. Reuse of fun(x) during grad
    260     # calculation reduces overall function evaluations.
--> 261     sf = ScalarFunction(fun, x0, args, grad, hess,
    262                         finite_diff_rel_step, bounds, epsilon=epsilon)
    263 

~/anaconda3/lib/python3.8/site-packages/scipy/optimize/_differentiable_functions.py in __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step, finite_diff_bounds, epsilon)
     93 
     94         self._update_grad_impl = update_grad
---> 95         self._update_grad()
     96 
     97         # Hessian Evaluation

~/anaconda3/lib/python3.8/site-packages/scipy/optimize/_differentiable_functions.py in _update_grad(self)
    169     def _update_grad(self):
    170         if not self.g_updated:
--> 171             self._update_grad_impl()
    172             self.g_updated = True
    173 

~/anaconda3/lib/python3.8/site-packages/scipy/optimize/_differentiable_functions.py in update_grad()
     89                 self._update_fun()
     90                 self.ngev += 1
---> 91                 self.g = approx_derivative(fun_wrapped, self.x, f0=self.f,
     92                                            **finite_diff_options)
     93 

~/anaconda3/lib/python3.8/site-packages/scipy/optimize/_numdiff.py in approx_derivative(fun, x0, method, rel_step, abs_step, f0, bounds, sparsity, as_linear_operator, args, kwargs)
    386         f0 = np.atleast_1d(f0)
    387         if f0.ndim > 1:
--> 388             raise ValueError("`f0` passed has more than 1 dimension.")
    389 
    390     if np.any((x0 < lb) | (x0 > ub)):

ValueError: `f0` passed has more than 1 dimension.

1 Answers1

0

So apparently the problem was with the dimensions of the arrays, somewhere for some reason an array went from being like [1,2,3] to [[1,2,3]], changing its shape from (3,) to (1,3), I tried to fix it by reshaping gradr to (3,1) but this just made everything worse, instead the solution was to eliminate the reshape of gradr and reshape every beta to (3,) before its operated.

def r(beta):
    f=0
    beta=beta.reshape(3,)
    for i in range(n):
        f=f+ (1-y[i])* np.dot(X[i],beta) + np.log( 1+np.exp(-np.dot(X[i],beta)  ))
    return np.array([f/n])

#gradient of r
def gradr(beta):
    f=0
    beta=beta.reshape(3,)
    for i in range(n):
        mu= h(np.dot(X[i],beta))
        f=f+ (mu-y[i])*X[i]
    return (f/n)