"ValueError: x and y must have same first dimension", error in plotting graph in pylab

Question

I'm using this tutorial to build an Adaboost.SAMME classifier for object recognition, using HoG features. This is my code below, mostly only the top part is customized according to my problem, otherwise most of it is the same as in the tutorial. This is a very small test I'm doing, with only 17 images in all, 10 for training, 7 for testing. Once I get this up and running, I'll add loads of more images for proper training.

import sys from scipy 
import misc, ndimage from skimage 
import data, io, filter, color, exposure 
from skimage.viewer import ImageViewer 
from skimage.feature import hog from skimage.transform 
import resize import matplotlib.pyplot as plt 
from sklearn.datasets import make_gaussian_quantiles 
from sklearn.ensemble import AdaBoostClassifier 
from sklearn.externals.six.moves import xrange 
from sklearn.metrics import accuracy_score 
from sklearn.tree import DecisionTreeClassifier 
import pylab as pl from sklearn.externals.six.moves 
import zip

f = open("PATH_TO_LIST_OF_SAMPLES\\samples.txt",'r') 
out = f.read().splitlines() import numpy as np

imgs = [] tmp_hogs = []
#tmp_hogs = np.zeros((17,1728)) labels = [1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0]

i=0 for file in out:
        filepath = "PATH_TO_IMAGES\\imgs\\"
        readfile = filepath + file
        curr_img = color.rgb2gray(io.imread(readfile))
        imgs.append(curr_img)
        fd, hog_image = hog(curr_img, orientations=8, pixels_per_cell=(16, 16),
                 cells_per_block=(1, 1), visualise=True, normalise=True)
        tmp_hogs.append(fd) 
        i+=1
        img_hogs = np.array(tmp_hogs, dtype =float) 

n_split = 10 
X_train, X_test = np.array(img_hogs[:n_split]), np.array(img_hogs[n_split:]) 
y_train, y_test = np.array(labels[:n_split]), np.array(labels[n_split:])


#now all the code below is straight off the example on scikit-learn's website

bdt_real = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2),
    n_estimators=600,
    learning_rate=1)

bdt_discrete = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2),
    n_estimators=600,
    learning_rate=1.5,
    algorithm="SAMME")

bdt_real.fit(X_train, y_train)
bdt_discrete.fit(X_train, y_train)

real_test_errors = []
discrete_test_errors = []

for real_test_predict, discrete_train_predict in zip(
        bdt_real.staged_predict(X_test), bdt_discrete.staged_predict(X_test)):
    real_test_errors.append(
        1. - accuracy_score(real_test_predict, y_test))
    discrete_test_errors.append(
        1. - accuracy_score(discrete_train_predict, y_test))

n_trees = xrange(1, len(bdt_discrete) + 1)

pl.figure(figsize=(15, 5))

pl.subplot(131)
pl.plot(n_trees, discrete_test_errors, c='black', label='SAMME')
pl.plot(n_trees, real_test_errors, c='black',
        linestyle='dashed', label='SAMME.R')
pl.legend()
pl.ylim(0.18, 0.62)
pl.ylabel('Test Error')
pl.xlabel('Number of Trees')

pl.subplot(132)
pl.plot(n_trees, bdt_discrete.estimator_errors_, "b", label='SAMME', alpha=.5)
pl.plot(n_trees, bdt_real.estimator_errors_, "r", label='SAMME.R', alpha=.5)
pl.legend()
pl.ylabel('Error')
pl.xlabel('Number of Trees')
pl.ylim((.2,
        max(bdt_real.estimator_errors_.max(),
            bdt_discrete.estimator_errors_.max()) * 1.2))
pl.xlim((-20, len(bdt_discrete) + 20))

pl.subplot(133)
pl.plot(n_trees, bdt_discrete.estimator_weights_, "b", label='SAMME')
pl.legend()
pl.ylabel('Weight')
pl.xlabel('Number of Trees')
pl.ylim((0, bdt_discrete.estimator_weights_.max() * 1.2))
pl.xlim((-20, len(bdt_discrete) + 20))

# prevent overlapping y-axis labels
pl.subplots_adjust(wspace=0.25)
pl.show()

But I'm getting the following error:

Traceback (most recent call last):
  File "C:\Users\app\Documents\Python Scripts\carclassify.py", line 101, in <module>
    pl.plot(n_trees, bdt_discrete.estimator_errors_, "b", label='SAMME', alpha=.5)
  File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\pyplot.py", line 2987, in plot
    ret = ax.plot(*args, **kwargs)
  File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\axes.py", line 4137, in plot
    for line in self._get_lines(*args, **kwargs):
  File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\axes.py", line 317, in _grab_next_args
    for seg in self._plot_args(remaining, kwargs):
  File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\axes.py", line 295, in _plot_args
    x, y = self._xy_from_xy(x, y)
  File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\axes.py", line 237, in _xy_from_xy
    raise ValueError("x and y must have same first dimension")
ValueError: x and y must have same first dimension

So I added these lines before the tutorial section of code, in order to see the dimensions of the X and Y arrays:

print X_train.shape 
print y_train.shape
print X_test.shape 
print y_test.shape

and the output was:

(10L, 48L)
(10L,)
(7L, 48L)
(7L,)

But I'm not sure if the x and y in the error are referring to my X and y... because surely it's normal for the training and testing datasets to have different sizes. What am I doing wrong?

If you look at the traceback provided, it is saying that the first dimension of `n_trees` and `bdt_discrete.estimator_errors_` are not the same. Print out those shapes and see what they are. They will most likely be `(N,)` and `(M,)` where `N!=M`. — wflynny, Apr 15 '14 at 15:31
I did `print n_trees` and `print bdt_discrete.estimator_errors_.shape`. The output was `xrange(1,2)` for n_trees, and and `(600L)` for bdt_discrete.estimator_errors_. Does that mean I'm doing something wrong in the classification? How can this be fixed? — user961627, Apr 16 '14 at 12:44

"ValueError: x and y must have same first dimension", error in plotting graph in pylab

0 Answers0