I want to run the GRAM algorithm from MKLpy on a list of dataframes mrna_df
, methylation_df
, and protein_df
. MKLpy uses cvxopt
under the hood.
from . import Solution, Cache, TwoStepMKL
from ..arrange import average, summation
from ..utils.misc import uniform_vector
from sklearn.svm import SVC
from cvxopt import matrix, spdiag, solvers
import numpy as np
import time,sys
import torch
from ..metrics import ratio
def opt_radius(K, init_sol=None):
n = K.shape[0]
K = matrix(K.numpy())
P = 2 * K
p = -matrix([K[i,i] for i in range(n)])
G = -spdiag([1.0] * n)
h = matrix([0.0] * n)
A = matrix([1.0] * n).T
b = matrix([1.0])
solvers.options['show_progress']=False
sol = solvers.qp(P,p,G,h,A,b,initvals=init_sol)
radius2 = (-p.T * sol['x'])[0] - (sol['x'].T * K * sol['x'])[0]
return sol, radius2
def opt_margin(K, YY, init_sol=None):
'''optimized margin evaluation'''
n = K.shape[0]
P = 2 * (YY * matrix(K.numpy()) * YY)
p = matrix([0.0]*n)
G = -spdiag([1.0]*n)
h = matrix([0.0]*n)
A = matrix([[1.0 if YY[i,i]==+1 else 0 for i in range(n)],
[1.0 if YY[j,j]==-1 else 0 for j in range(n)]]).T
b = matrix([[1.0],[1.0]],(2,1))
solvers.options['show_progress']=False
sol = solvers.qp(P,p,G,h,A,b,initvals=init_sol)
margin2 = sol['primal objective']
return sol, margin2
class GRAM(TwoStepMKL):
direction = 'min'
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.func_form = summation
def get_params(self, deep=True):
return super().get_params()
def initialize_optimization(self):
YY = spdiag([1 if y==self.classes_[1] else -1 for y in self.Y])
Y = torch.tensor([1 if y==self.classes_[1] else -1 for y in self.Y])
weights = uniform_vector(self.n_kernels)
ker_matrix = self.func_form(self.KL, weights)
alpha,r2 = opt_radius(ker_matrix)
gamma,m2 = opt_margin(ker_matrix, YY)
obj = (r2 / m2) / len(self.Y)
#caching
self.cache.YY = YY
self.cache.Y = Y
self.cache.alpha = alpha
self.cache.gamma = gamma
dual_coef = torch.Tensor(np.array(gamma['x'])).double().T[0]
bias = 0.5 * (dual_coef @ ker_matrix @ (dual_coef.T * Y)).item()
return Solution(
weights=weights,
objective=obj,
ker_matrix=ker_matrix,
dual_coef = dual_coef,
bias = bias,
)
def do_step(self):
YY = self.cache.YY
alpha = self.cache.alpha
gamma = self.cache.gamma
beta = np.log(self.solution.weights)
beta = self._update_grad(self.solution.ker_matrix, YY, beta, alpha, gamma)
w = np.exp(beta)
w /= w.sum()
ker_matrix = self.func_form(self.KL, w)
try :
# try incremental radius/margin optimization
new_alpha,r2 = opt_radius(ker_matrix ,init_sol=alpha)
new_gamma,m2 = opt_margin(ker_matrix,YY,init_sol=gamma)
except :
new_alpha,r2 = opt_radius(ker_matrix )
new_gamma,m2 = opt_margin(ker_matrix,YY)
obj = (r2 / m2) / len(self.Y)
self.cache.alpha = new_alpha
self.cache.gamma = new_gamma
ker_matrix = self.func_form(self.KL, w)
dual_coef = torch.Tensor(np.array(new_gamma['x'])).double().T[0]
yg = dual_coef.T * self.cache.Y
bias = 0.5 * (dual_coef @ ker_matrix @ yg).item()
return Solution(
weights=w,
objective=obj,
ker_matrix=ker_matrix,
dual_coef = dual_coef,
bias = bias,
)
def _update_grad(self,Kc, YY, beta, alpha, gamma):
n = self.n_kernels
gammaY = gamma['x'].T * YY
gammaY = torch.DoubleTensor(np.array(gammaY))
gamma = torch.DoubleTensor(np.array(gamma['x']))
alpha = torch.DoubleTensor(np.array(alpha['x']))
eb = torch.exp(torch.tensor(beta))
a,b = [], []
for K in self.KL: #optimized for generators
#K = matrix(K.numpy().astype(np.double))
#a.append( 1.-(alpha['x'].T*matrix(K)*alpha['x'])[0] )
#b.append( (gammaY*matrix(K)*gammaY.T)[0] )
a.append(1. - (alpha.T @ K @ alpha).item())
b.append( (gammaY @ K @ gammaY.T).item() )
a, b = torch.tensor(a, dtype=torch.double), torch.tensor(b, dtype=torch.double)
ebb, eba = (eb @ b).item(), (eb @ a).item()
den = ((eb @ b)**2).item()
num = [eb[r] * (a[r]*ebb - b[r]*eba) for r in range(n)]
new_beta = np.array([beta[k] - self.learning_rate * (num[k]/den) for k in range(n)])
return new_beta
Apply GRAM algorithm:
from MKLpy.callbacks import EarlyStopping, Monitor
from MKLpy.scheduler import ReduceOnWorsening
from MKLpy.model_selection import train_test_split
from MKLpy.metrics import pairwise
from MKLpy.generators import Multiview_generator
from MKLpy.algorithms import GRAM
train_ratio = 0.75
validation_ratio = 0.15
test_ratio = 0.10
K = Multiview_generator([X_mrna, X_meth, X_protein], kernel=pairwise.linear_kernel)
KLtr, KLte, Ytr, Yte = train_test_split(K, y, test_size=1 - train_ratio)
KLtr, KLva, Ytr, Yva = train_test_split(KLte, Yte, test_size=test_ratio/(test_ratio + validation_ratio))
monitor = Monitor()
scheduler = ReduceOnWorsening()
earlystop = EarlyStopping(KLva, Yva,patience=5,cooldown=1,metric='roc_auc',)
mkl_gram = GRAM(
max_iter=1000,
learning_rate=.01,
callbacks=[earlystop, monitor],
scheduler=scheduler).fit(KLtr, Ytr)
Traceback:
---------------------------------------------------------------------------
ArithmeticError Traceback (most recent call last)
File ~/.local/lib/python3.9/site-packages/cvxopt/misc.py:1429, in kkt_chol2.<locals>.factor(W, H, Df)
1428 if type(F['S']) is matrix:
-> 1429 lapack.potrf(F['S'])
1430 else:
ArithmeticError: 2
During handling of the above exception, another exception occurred:
ArithmeticError Traceback (most recent call last)
File ~/.local/lib/python3.9/site-packages/cvxopt/coneprog.py:2065, in coneqp(P, q, G, h, dims, A, b, initvals, kktsolver, xnewcopy, xdot, xaxpy, xscal, ynewcopy, ydot, yaxpy, yscal, **kwargs)
2064 for rti in W['rti']: rti[::rti.size[0]+1 ] = 1.0
-> 2065 try: f = kktsolver(W)
2066 except ArithmeticError:
File ~/.local/lib/python3.9/site-packages/cvxopt/coneprog.py:1981, in coneqp.<locals>.kktsolver(W)
1980 def kktsolver(W):
-> 1981 return factor(W, P)
File ~/.local/lib/python3.9/site-packages/cvxopt/misc.py:1444, in kkt_chol2.<locals>.factor(W, H, Df)
1443 if type(F['S']) is matrix:
-> 1444 lapack.potrf(F['S'])
1445 else:
ArithmeticError: 2
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Input In [31], in <cell line: 1>()
----> 1 mkl_gram = GRAM(
2 max_iter=1000,
3 learning_rate=.01,
4 callbacks=[earlystop, monitor],
5 scheduler=scheduler).fit(KLtr, Ytr)
File ~/.local/lib/python3.9/site-packages/MKLpy/algorithms/base.py:91, in MKL.fit(self, KL, Y)
89 self.solution = self.clf.solution
90 else :
---> 91 self._fit() # fit the model
93 self.is_fitted = True
94 return self
File ~/.local/lib/python3.9/site-packages/MKLpy/algorithms/base.py:98, in MKL._fit(self)
97 def _fit(self):
---> 98 self.solution = self._combine_kernels() # call combine_kernels without re-preprocess
99 if self.learner:
100 self.learner.fit(self.solution.ker_matrix, self.Y)
File ~/.local/lib/python3.9/site-packages/MKLpy/algorithms/base.py:217, in TwoStepMKL._combine_kernels(self)
214 def _combine_kernels(self):
216 self.learning_rate = self.initial_lr
--> 217 self.solution = self.initialize_optimization()
218 for callback in self.callbacks: callback.on_train_begin()
219 self.convergence = False
File ~/.local/lib/python3.9/site-packages/MKLpy/algorithms/GRAM.py:77, in GRAM.initialize_optimization(self)
75 weights = uniform_vector(self.n_kernels)
76 ker_matrix = self.func_form(self.KL, weights)
---> 77 alpha,r2 = opt_radius(ker_matrix)
78 gamma,m2 = opt_margin(ker_matrix, YY)
79 obj = (r2 / m2) / len(self.Y)
File ~/.local/lib/python3.9/site-packages/MKLpy/algorithms/GRAM.py:35, in opt_radius(K, init_sol)
33 b = matrix([1.0])
34 solvers.options['show_progress']=False
---> 35 sol = solvers.qp(P,p,G,h,A,b,initvals=init_sol)
36 radius2 = (-p.T * sol['x'])[0] - (sol['x'].T * K * sol['x'])[0]
37 return sol, radius2
File ~/.local/lib/python3.9/site-packages/cvxopt/coneprog.py:4485, in qp(P, q, G, h, A, b, solver, kktsolver, initvals, **kwargs)
4475 pinfres, dinfres = None, None
4477 return {'status': status, 'x': x, 's': s, 'y': y, 'z': z,
4478 'primal objective': pcost, 'dual objective': dcost,
4479 'gap': gap, 'relative gap': relgap,
(...)
4482 'residual as primal infeasibility certificate': pinfres,
4483 'residual as dual infeasibility certificate': dinfres}
-> 4485 return coneqp(P, q, G, h, None, A, b, initvals, kktsolver = kktsolver, options = options)
File ~/.local/lib/python3.9/site-packages/cvxopt/coneprog.py:2067, in coneqp(P, q, G, h, dims, A, b, initvals, kktsolver, xnewcopy, xdot, xaxpy, xscal, ynewcopy, ydot, yaxpy, yscal, **kwargs)
2065 try: f = kktsolver(W)
2066 except ArithmeticError:
-> 2067 raise ValueError("Rank(A) < p or Rank([P; A; G]) < n")
2070 # Solve
2071 #
2072 # [ P A' G' ] [ x ] [ -q ]
2073 # [ A 0 0 ] * [ y ] = [ b ].
2074 # [ G 0 -I ] [ z ] [ h ]
2076 xcopy(q, x)
ValueError: Rank(A) < p or Rank([P; A; G]) < n
Input:
mrna_df.iloc[0:4,0:4]
pd.DataFrame({'TCGA.2Z.A9J1.01': {'AADAT': 6.22565668754941,
'ABCB6': 8.88994408176988,
'ABCB9': 7.63002607924622,
'ABHD1': 4.44668632713815},
'TCGA.2Z.A9J3.01': {'AADAT': 6.170389794965,
'ABCB6': 9.09364811306327,
'ABCB9': 7.76961109298212,
'ABHD1': 5.9260824660449},
'TCGA.2Z.A9J6.01': {'AADAT': 5.52284042813955,
'ABCB6': 8.7728136979997,
'ABCB9': 7.20326283373569,
'ABHD1': 5.53843883525076},
'TCGA.2Z.A9J7.01': {'AADAT': 6.72337288854289,
'ABCB6': 10.2092692396731,
'ABCB9': 7.07511328163257,
'ABHD1': 7.37872402365434}}
)
methylation_df.iloc[0:4,0:4]
pd.DataFrame({'TCGA.2Z.A9J1.01': {'cg00001349': 2.91982935150322,
'cg09506600': -0.266195780430997,
'cg18093372': -2.21886912223455,
'cg23182840': 2.61132640570935},
'TCGA.2Z.A9J3.01': {'cg00001349': 3.18178018117433,
'cg09506600': 3.66127491467573,
'cg18093372': 1.54037692823511,
'cg23182840': -1.79602557853906},
'TCGA.2Z.A9J6.01': {'cg00001349': 4.18499151289161,
'cg09506600': -1.53498211627565,
'cg18093372': -2.36944533302082,
'cg23182840': -0.853529780223778},
'TCGA.2Z.A9J7.01': {'cg00001349': 2.23780927076746,
'cg09506600': -0.230680016194668,
'cg18093372': -0.213643485846856,
'cg23182840': -1.04586272492208}})
protein_df.iloc[0:4,0:4]
pd.DataFrame({'TCGA.2Z.A9J1.01': {'Smad4': 0.53031080975,
'14-3-3_epsilon': 0.10028027475,
'IGFBP2': -0.27061545625,
'S6_pS235_S236': 0.30110602225},
'TCGA.2Z.A9J3.01': {'Smad4': -0.096913032,
'14-3-3_epsilon': -0.0086484960000001,
'IGFBP2': -0.335268579,
'S6_pS235_S236': -0.3745811985},
'TCGA.2Z.A9J6.01': {'Smad4': 0.330359758,
'14-3-3_epsilon': 0.355386269,
'IGFBP2': -0.670854249,
'S6_pS235_S236': -0.6540221245},
'TCGA.2Z.A9J7.01': {'Smad4': -0.0341280585,
'14-3-3_epsilon': -0.0887603205,
'IGFBP2': -0.1709274405,
'S6_pS235_S236': -0.805935077}})
Potential solution:
A google group discussion suggested that removing redundant constraints may resolve the issue. However, the error persisted even with:
GRAM().fit(KLtr, Ytr)