I'm using this tutorial to build an Adaboost.SAMME classifier for object recognition, using HoG features. This is my code below, mostly only the top part is customized according to my problem, otherwise most of it is the same as in the tutorial. This is a very small test I'm doing, with only 17 images in all, 10 for training, 7 for testing. Once I get this up and running, I'll add loads of more images for proper training.
import sys from scipy
import misc, ndimage from skimage
import data, io, filter, color, exposure
from skimage.viewer import ImageViewer
from skimage.feature import hog from skimage.transform
import resize import matplotlib.pyplot as plt
from sklearn.datasets import make_gaussian_quantiles
from sklearn.ensemble import AdaBoostClassifier
from sklearn.externals.six.moves import xrange
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
import pylab as pl from sklearn.externals.six.moves
import zip
f = open("PATH_TO_LIST_OF_SAMPLES\\samples.txt",'r')
out = f.read().splitlines() import numpy as np
imgs = [] tmp_hogs = []
#tmp_hogs = np.zeros((17,1728)) labels = [1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0]
i=0 for file in out:
filepath = "PATH_TO_IMAGES\\imgs\\"
readfile = filepath + file
curr_img = color.rgb2gray(io.imread(readfile))
imgs.append(curr_img)
fd, hog_image = hog(curr_img, orientations=8, pixels_per_cell=(16, 16),
cells_per_block=(1, 1), visualise=True, normalise=True)
tmp_hogs.append(fd)
i+=1
img_hogs = np.array(tmp_hogs, dtype =float)
n_split = 10
X_train, X_test = np.array(img_hogs[:n_split]), np.array(img_hogs[n_split:])
y_train, y_test = np.array(labels[:n_split]), np.array(labels[n_split:])
#now all the code below is straight off the example on scikit-learn's website
bdt_real = AdaBoostClassifier(
DecisionTreeClassifier(max_depth=2),
n_estimators=600,
learning_rate=1)
bdt_discrete = AdaBoostClassifier(
DecisionTreeClassifier(max_depth=2),
n_estimators=600,
learning_rate=1.5,
algorithm="SAMME")
bdt_real.fit(X_train, y_train)
bdt_discrete.fit(X_train, y_train)
real_test_errors = []
discrete_test_errors = []
for real_test_predict, discrete_train_predict in zip(
bdt_real.staged_predict(X_test), bdt_discrete.staged_predict(X_test)):
real_test_errors.append(
1. - accuracy_score(real_test_predict, y_test))
discrete_test_errors.append(
1. - accuracy_score(discrete_train_predict, y_test))
n_trees = xrange(1, len(bdt_discrete) + 1)
pl.figure(figsize=(15, 5))
pl.subplot(131)
pl.plot(n_trees, discrete_test_errors, c='black', label='SAMME')
pl.plot(n_trees, real_test_errors, c='black',
linestyle='dashed', label='SAMME.R')
pl.legend()
pl.ylim(0.18, 0.62)
pl.ylabel('Test Error')
pl.xlabel('Number of Trees')
pl.subplot(132)
pl.plot(n_trees, bdt_discrete.estimator_errors_, "b", label='SAMME', alpha=.5)
pl.plot(n_trees, bdt_real.estimator_errors_, "r", label='SAMME.R', alpha=.5)
pl.legend()
pl.ylabel('Error')
pl.xlabel('Number of Trees')
pl.ylim((.2,
max(bdt_real.estimator_errors_.max(),
bdt_discrete.estimator_errors_.max()) * 1.2))
pl.xlim((-20, len(bdt_discrete) + 20))
pl.subplot(133)
pl.plot(n_trees, bdt_discrete.estimator_weights_, "b", label='SAMME')
pl.legend()
pl.ylabel('Weight')
pl.xlabel('Number of Trees')
pl.ylim((0, bdt_discrete.estimator_weights_.max() * 1.2))
pl.xlim((-20, len(bdt_discrete) + 20))
# prevent overlapping y-axis labels
pl.subplots_adjust(wspace=0.25)
pl.show()
But I'm getting the following error:
Traceback (most recent call last):
File "C:\Users\app\Documents\Python Scripts\carclassify.py", line 101, in <module>
pl.plot(n_trees, bdt_discrete.estimator_errors_, "b", label='SAMME', alpha=.5)
File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\pyplot.py", line 2987, in plot
ret = ax.plot(*args, **kwargs)
File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\axes.py", line 4137, in plot
for line in self._get_lines(*args, **kwargs):
File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\axes.py", line 317, in _grab_next_args
for seg in self._plot_args(remaining, kwargs):
File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\axes.py", line 295, in _plot_args
x, y = self._xy_from_xy(x, y)
File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\axes.py", line 237, in _xy_from_xy
raise ValueError("x and y must have same first dimension")
ValueError: x and y must have same first dimension
So I added these lines before the tutorial section of code, in order to see the dimensions of the X and Y arrays:
print X_train.shape
print y_train.shape
print X_test.shape
print y_test.shape
and the output was:
(10L, 48L)
(10L,)
(7L, 48L)
(7L,)
But I'm not sure if the x and y in the error are referring to my X and y... because surely it's normal for the training and testing datasets to have different sizes. What am I doing wrong?