From the below code the accuracy values of the ROC can be predicted and GNUplot was used to display the plot output. But the output is set to 'onscreen'. Thus the plot just appears for few seconds and closes. I want to direct the plot output to a folder as a. How to modify the code.
Thanks for the assistance.
The program was taken from plotroc.py https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/#roc_curve_for_binary_svm
#!/usr/bin/env python
#This tool allow users to plot SVM-prob ROC curve from data
from svmutil import *
from sys import argv, platform
from os import path, popen
from random import randrange , seed
from operator import itemgetter
from time import sleep
#search path for gnuplot executable
#be careful on using windows LONG filename, surround it with double quotes.
#and leading 'r' to make it raw string, otherwise, repeat \\.
gnuplot_exe_list = [r'"C:\Users\Lakshmi\Documents\gnuplot\bin\pgnuplot.exe"', "/usr/bin/gnuplot","/usr/local/bin/gnuplot"]
def get_pos_deci(train_y, train_x, test_y, test_x, param):
model = svm_train(train_y, train_x, param)
#predict and grab decision value, assure deci>0 for label+,
#the positive descision value = val[0]*labels[0]
labels = model.get_labels()
py, evals, deci = svm_predict(test_y, test_x, model)
deci = [labels[0]*val[0] for val in deci]
return deci,model
#get_cv_deci(prob_y[], prob_x[], svm_parameter param, nr_fold)
#input raw attributes, labels, param, cv_fold in decision value building
#output list of decision value, remember to seed(0)
def get_cv_deci(prob_y, prob_x, param, nr_fold):
if nr_fold == 1 or nr_fold==0:
deci,model = get_pos_deci(prob_y, prob_x, prob_y, prob_x, param)
return deci
deci, model = [], []
prob_l = len(prob_y)
#random permutation by swapping i and j instance
for i in range(prob_l):
j = randrange(i,prob_l)
prob_x[i], prob_x[j] = prob_x[j], prob_x[i]
prob_y[i], prob_y[j] = prob_y[j], prob_y[i]
#cross training : folding
for i in range(nr_fold):
begin = i * prob_l // nr_fold
end = (i + 1) * prob_l // nr_fold
train_x = prob_x[:begin] + prob_x[end:]
train_y = prob_y[:begin] + prob_y[end:]
test_x = prob_x[begin:end]
test_y = prob_y[begin:end]
subdeci, submdel = get_pos_deci(train_y, train_x, test_y, test_x, param)
deci += subdeci
return deci
#a simple gnuplot object
class gnuplot:
def __init__(self, term='onscreen'):
# -persists leave plot window on screen after gnuplot terminates
if platform == 'win32':
cmdline = gnuplot_exe
self.__dict__['screen_term'] = 'windows'
else:
cmdline = gnuplot_exe + ' -persist'
self.__dict__['screen_term'] = 'x11'
self.__dict__['iface'] = popen(cmdline,'w')
self.set_term(term)
def set_term(self, term):
if term=='onscreen':
self.writeln("set term %s" % self.screen_term)
else:
#term must be either x.ps or x.png
if term.find('.ps')>0:
self.writeln("set term postscript eps color 22")
elif term.find('.png')>0:
self.writeln("set term png")
else:
print("You must set term to either *.ps or *.png")
raise SystemExit
self.output = term
def writeln(self,cmdline):
self.iface.write(cmdline + '\n')
def __setattr__(self, attr, val):
if type(val) == str:
self.writeln('set %s \"%s\"' % (attr, val))
else:
print("Unsupport format:", attr, val)
raise SystemExit
#terminate gnuplot
def __del__(self):
self.writeln("quit")
self.iface.flush()
self.iface.close()
def __repr__(self):
return "<gnuplot instance: output=%s>" % term
#data is a list of [x,y]
def plotline(self, data):
self.writeln("plot \"-\" notitle with lines linewidth 1")
for i in range(len(data)):
self.writeln("%f %f" % (data[i][0], data[i][1]))
sleep(0) #delay
self.writeln("e")
if platform=='win32':
sleep(3)
#processing argv and set some global variables
def proc_argv(argv = argv):
#print("Usage: %s " % argv[0])
#The command line : ./plotroc.py [-v cv_fold | -T testing_file] [libsvm-options] training_file
train_file = argv[-1]
test_file = None
fold = 5
options = []
i = 1
while i < len(argv)-1:
if argv[i] == '-T':
test_file = argv[i+1]
i += 1
elif argv[i] == '-v':
fold = int(argv[i+1])
i += 1
else :
options += [argv[i]]
i += 1
return ' '.join(options), fold, train_file, test_file
def plot_roc(deci, label, output, title):
#count of postive and negative labels
db = []
pos, neg = 0, 0
for i in range(len(label)):
if label[i]>0:
pos+=1
else:
neg+=1
db.append([deci[i], label[i]])
#sorting by decision value
db = sorted(db, key=itemgetter(0), reverse=True)
#calculate ROC
xy_arr = []
tp, fp = 0., 0. #assure float division
for i in range(len(db)):
if db[i][1]>0: #positive
tp+=1
else:
fp+=1
xy_arr.append([fp/neg,tp/pos])
#area under curve
aoc = 0.
prev_x = 0
for x,y in xy_arr:
if x != prev_x:
aoc += (x - prev_x) * y
prev_x = x
#begin gnuplot
if title == None:
title = output
#also write to file
g = gnuplot(output)
g.xlabel = "False Positive Rate"
g.ylabel = "True Positive Rate"
g.title = "ROC curve of %s (AUC = %.4f)" % (title,aoc)
g.plotline(xy_arr)
#display on screen
s = gnuplot('onscreen')
s.xlabel = "False Positive Rate"
s.ylabel = "True Positive Rate"
s.title = "ROC curve of %s (AUC = %.4f)" % (title,aoc)
s.plotline(xy_arr)
def check_gnuplot_exe():
global gnuplot_exe
gnuplot_exe = None
for g in gnuplot_exe_list:
if path.exists(g.replace('"','')):
gnuplot_exe=g
break
if gnuplot_exe == None:
print("You must add correct path of 'gnuplot' into gnuplot_exe_list")
raise SystemExit
def main():
check_gnuplot_exe()
if len(argv) <= 1:
print("Usage: %s [-v cv_fold | -T testing_file] [libsvm-options] training_file" % argv[0])
raise SystemExit
param,fold,train_file,test_file = proc_argv()
output_file = path.split(train_file)[1] + '-roc.png'
#read data
train_y, train_x = svm_read_problem(train_file)
if set(train_y) != set([1,-1]):
print("ROC is only applicable to binary classes with labels 1, -1")
raise SystemExit
#get decision value, with positive = label+
seed(0) #reset random seed
if test_file: #go with test_file
output_title = "%s on %s" % (path.split(test_file)[1], path.split(train_file)[1])
test_y, test_x = svm_read_problem(test_file)
if set(test_y) != set([1,-1]):
print("ROC is only applicable to binary classes with labels 1, -1")
raise SystemExit
deci,model = get_pos_deci(train_y, train_x, test_y, test_x, param)
plot_roc(deci, test_y, output_file, output_title)
else: #single file -> CV
output_title = path.split(train_file)[1]
deci = get_cv_deci(train_y, train_x, param, fold)
plot_roc(deci, train_y, output_file, output_title)
if __name__ == '__main__':
main()`