-3

From the below code the accuracy values of the ROC can be predicted and GNUplot was used to display the plot output. But the output is set to 'onscreen'. Thus the plot just appears for few seconds and closes. I want to direct the plot output to a folder as a. How to modify the code.

Thanks for the assistance.

The program was taken from plotroc.py https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/#roc_curve_for_binary_svm

#!/usr/bin/env python
#This tool allow users to plot SVM-prob ROC curve from data
from svmutil import *
from sys import argv, platform
from os import path, popen
from random import randrange , seed
from operator import itemgetter
from time import sleep

#search path for gnuplot executable 
#be careful on using windows LONG filename, surround it with double quotes.
#and leading 'r' to make it raw string, otherwise, repeat \\.
gnuplot_exe_list = [r'"C:\Users\Lakshmi\Documents\gnuplot\bin\pgnuplot.exe"', "/usr/bin/gnuplot","/usr/local/bin/gnuplot"]


def get_pos_deci(train_y, train_x, test_y, test_x, param):
    model = svm_train(train_y, train_x, param)
    #predict and grab decision value, assure deci>0 for label+,
    #the positive descision value = val[0]*labels[0]
    labels = model.get_labels()
    py, evals, deci = svm_predict(test_y, test_x, model)
    deci = [labels[0]*val[0] for val in deci]
    return deci,model

#get_cv_deci(prob_y[], prob_x[], svm_parameter param, nr_fold)
#input raw attributes, labels, param, cv_fold in decision value building
#output list of decision value, remember to seed(0)
def get_cv_deci(prob_y, prob_x, param, nr_fold):
    if nr_fold == 1 or nr_fold==0:
        deci,model = get_pos_deci(prob_y, prob_x, prob_y, prob_x, param)
        return deci
    deci, model = [], []
    prob_l = len(prob_y)

    #random permutation by swapping i and j instance
    for i in range(prob_l):
        j = randrange(i,prob_l)
        prob_x[i], prob_x[j] = prob_x[j], prob_x[i]
        prob_y[i], prob_y[j] = prob_y[j], prob_y[i]

    #cross training : folding
    for i in range(nr_fold):
        begin = i * prob_l // nr_fold
        end = (i + 1) * prob_l // nr_fold
        train_x = prob_x[:begin] + prob_x[end:]
        train_y = prob_y[:begin] + prob_y[end:]
        test_x = prob_x[begin:end]
        test_y = prob_y[begin:end]
        subdeci, submdel = get_pos_deci(train_y, train_x, test_y, test_x, param)
        deci += subdeci
    return deci

#a simple gnuplot object
class gnuplot:
    def __init__(self, term='onscreen'):
        # -persists leave plot window on screen after gnuplot terminates
        if platform == 'win32':
            cmdline = gnuplot_exe
            self.__dict__['screen_term'] = 'windows'
        else:
            cmdline = gnuplot_exe + ' -persist'
            self.__dict__['screen_term'] = 'x11'
        self.__dict__['iface'] = popen(cmdline,'w')
        self.set_term(term)

    def set_term(self, term):
        if term=='onscreen':
            self.writeln("set term %s" % self.screen_term)
        else:
            #term must be either x.ps or x.png
            if term.find('.ps')>0:
                self.writeln("set term postscript eps color 22")
            elif term.find('.png')>0:
                self.writeln("set term png")
            else:
                print("You must set term to either *.ps or *.png")
                raise SystemExit
            self.output = term

    def writeln(self,cmdline):
        self.iface.write(cmdline + '\n')

    def __setattr__(self, attr, val):
        if type(val) == str:
            self.writeln('set %s \"%s\"' % (attr, val))
        else:
            print("Unsupport format:", attr, val)
            raise SystemExit

    #terminate gnuplot
    def __del__(self):
        self.writeln("quit")
        self.iface.flush()
        self.iface.close()

    def __repr__(self):
        return "<gnuplot instance: output=%s>" % term

    #data is a list of [x,y]
    def plotline(self, data):
        self.writeln("plot \"-\" notitle with lines linewidth 1")
        for i in range(len(data)):
            self.writeln("%f %f" % (data[i][0], data[i][1]))
            sleep(0) #delay
        self.writeln("e")
        if platform=='win32':
            sleep(3)

#processing argv and set some global variables
def proc_argv(argv = argv):
    #print("Usage: %s " % argv[0])
    #The command line : ./plotroc.py [-v cv_fold | -T testing_file] [libsvm-options] training_file
    train_file = argv[-1]
    test_file = None
    fold = 5
    options = []
    i = 1
    while i < len(argv)-1:
        if argv[i] == '-T': 
            test_file = argv[i+1]
            i += 1
        elif argv[i] == '-v':
            fold = int(argv[i+1])
            i += 1
        else :
            options += [argv[i]]
        i += 1

    return ' '.join(options), fold, train_file, test_file

def plot_roc(deci, label, output, title):
    #count of postive and negative labels
    db = []
    pos, neg = 0, 0         
    for i in range(len(label)):
        if label[i]>0:
            pos+=1
        else:   
            neg+=1
        db.append([deci[i], label[i]])

    #sorting by decision value
    db = sorted(db, key=itemgetter(0), reverse=True)

    #calculate ROC 
    xy_arr = []
    tp, fp = 0., 0.         #assure float division
    for i in range(len(db)):
        if db[i][1]>0:      #positive
            tp+=1
        else:
            fp+=1
        xy_arr.append([fp/neg,tp/pos])

    #area under curve
    aoc = 0.            
    prev_x = 0
    for x,y in xy_arr:
        if x != prev_x:
            aoc += (x - prev_x) * y
            prev_x = x

    #begin gnuplot
    if title == None:
        title = output
    #also write to file
    g = gnuplot(output)
    g.xlabel = "False Positive Rate"
    g.ylabel = "True Positive Rate"
    g.title = "ROC curve of %s (AUC = %.4f)" % (title,aoc)
    g.plotline(xy_arr)
    #display on screen
    s = gnuplot('onscreen')
    s.xlabel = "False Positive Rate"
    s.ylabel = "True Positive Rate"
    s.title = "ROC curve of %s (AUC = %.4f)" % (title,aoc)
    s.plotline(xy_arr)

def check_gnuplot_exe():
    global gnuplot_exe
    gnuplot_exe = None
    for g in gnuplot_exe_list:
        if path.exists(g.replace('"','')):
            gnuplot_exe=g
            break
    if gnuplot_exe == None:
        print("You must add correct path of 'gnuplot' into gnuplot_exe_list")
        raise SystemExit

def main():
    check_gnuplot_exe()
    if len(argv) <= 1:
        print("Usage: %s [-v cv_fold | -T testing_file] [libsvm-options] training_file" % argv[0])
        raise SystemExit
    param,fold,train_file,test_file = proc_argv()
    output_file = path.split(train_file)[1] + '-roc.png'
    #read data
    train_y, train_x = svm_read_problem(train_file)
    if set(train_y) != set([1,-1]):
        print("ROC is only applicable to binary classes with labels 1, -1")
        raise SystemExit

    #get decision value, with positive = label+
    seed(0) #reset random seed
    if test_file:       #go with test_file
        output_title = "%s on %s" % (path.split(test_file)[1], path.split(train_file)[1])
        test_y, test_x = svm_read_problem(test_file)
        if set(test_y) != set([1,-1]):
            print("ROC is only applicable to binary classes with labels 1, -1")
            raise SystemExit
        deci,model = get_pos_deci(train_y, train_x, test_y, test_x, param)
        plot_roc(deci, test_y, output_file, output_title)
    else:               #single file -> CV
        output_title = path.split(train_file)[1]
        deci = get_cv_deci(train_y, train_x, param, fold)
        plot_roc(deci, train_y, output_file, output_title)

if __name__ == '__main__':
    main()`

1 Answers1

1

I downloaded code and data file heart_scale and after running

python script.py -v 5 -c 10 heart_scale

I found file heart_scale-roc.png in folder in which I executed script.

furas
  • 134,197
  • 12
  • 106
  • 148
  • But I don't get the output should I use python 3x? – Lakshmi KrishnaKumaar Dec 12 '17 at 08:29
  • maybe you get it in different folder. It create file in folder where you run script - it doesn't have to be folder with code or with data file. But you can always in code assing to `output_file` filename with full path (instead of `output_file = path.split(train_file)[1] + '-roc.png'`) and you don't have to search it in different folders. – furas Dec 12 '17 at 08:55
  • how can I re direct the output to a folder I need – Lakshmi KrishnaKumaar Dec 12 '17 at 08:57
  • as I said - you have to change `output_file` in code - in place of `output_file = path.split(train_file)[1] + '-roc.png'`. Put ie. `output_file = "/full/path/to/your/folder/output.png"` – furas Dec 12 '17 at 09:02
  • Adding the path I get the following error SyntaxError: 'unicodeescape' codec can't decode bytes in position 2-4: truncated \UXXXXXXXX escape – Lakshmi KrishnaKumaar Dec 12 '17 at 09:23
  • do you use Windows ? Char \ has special meaning in string - ie. `\t`, `\n` - even if it is string with path. Probably you have `\U` in your path - ie. `\Users` - but `\U` is used to put unicode char in text. In Windows paths better use \\ or / - `"C:\\Your\\folder\\output.png"` or `"C:/Your/folder/output.png"`. Or you can use prefix `r` to create **raw** string `r"C:\Your\folder\output.png"` – furas Dec 12 '17 at 09:29