I'm trying to perform dimensionality reduction on a classification problem with two classes.
I have 6 csv files. My code is here:
def linear_discrimination_analysis(files):
with open(os.path.join("/Users", "byname", "PycharmProjects", "sensorLogProject", "Data", files[0]),
'rU') as my_file_0:
df1 = sd.sample_difference(my_file_0)
for f in files[1:len(files) - 2]:
with open(os.path.join("/Users", "myname", "PycharmProjects", "sensorLogProject", "Data", f),
'rU') as my_file_1:
df1.append(sd.sample_difference(my_file_1))
with open(os.path.join("/Users", "myname", "PycharmProjects", "sensorLogProject", "Data", files[len(files) - 2]),
'rU') as my_file_2:
df2 = sd.sample_difference(my_file_2)
with open(os.path.join("/Users", "myname", "PycharmProjects", "sensorLogProject", "Data", files[len(files) - 1]),
'rU') as my_file_3:
df2.append(sd.sample_difference(my_file_3))
X = df1[['x', 'y', 'z']].values
y = df2['label'].values
lda = LDA(n_components=1)
lda.fit_transform(X, y.ravel())
plt.show()
linear_discrimination_analysis(files)
I suppose that could be the issue.
Here's the error I get:
RuntimeWarning: invalid value encountered in divide S**2 [:self._max_components]
Each file has 100's of rows and 5 columns. I want to use the 3rd, 4th, and 5th column for feature extraction which are called 'x', 'y', and 'z'.