0

I am planning on conducting kruskal-wallis test for multiple variables between groups(e.g. disease differences in out1, out2, and out3 in ordinal level) and getting the sorted variables [e.g. out2, out3, out1] based on the decreasing KW test results. However, getting the error message: "ValueError: If using all scalar values, you must pass an index". So how to fix it?

from scipy import stats
import pandas as pd
import numpy as np

data = pd.DataFrame({ 'ID': list(range(1,21)), 
                  'Disease': ["Cancer","Healthy","Adenoma","Cancer","Cancer","Healthy","Cancer","Healthy","Cancer","Cancer","Adenoma","Cancer","Healthy","Cancer","Healthy","Healthy","Healthy","Inflammation","Healthy","Inflammation"],
                   'Out1': np.random.randn(20),
                  'Out2': np.random.randn(20),
                  'Out3': np.random.randn(20)})

def KW_test(y, x):
  y = np.array(y)
  label, idx = np.unique(list(x), return_inverse=True)
  groups = [y[idx == i] for i, l in enumerate(label)]
  H, p = stats.kruskal(*groups)
  return H

results=[]
outcomes= list(data.columns)[2:]
for i in outcomes: 
   y=data[i]
   x=data.iloc[:, 1]
   KW_test=KW_test(y,x)
   results=pd.DataFrame({'outcomes': i, 'KW_test': KW_test}, columns=['outcomes', 'KW_test']) 
   results.append(results)
   # sorted the results decreasingly using KW_test results
   results=results.sort_value(["KW_test"], ascending=False)
   # return the sorted outcomes 
   print(results[outcomes]) 
zeno Zeng
  • 173
  • 7

1 Answers1

0

The solution is to add an index with 'enumerate' function

for index, i in enumerate(features): 
    y=data[i]
    x=data.iloc[:, 4]
    KW_test_result=KW_test(y,x)
    if index == 0:
    results = pd.DataFrame({'features': [i], 'KW_test': [KW_test_result]}, columns=['features', 'KW_test']) 
    else: 
    new_result=pd.DataFrame({'features': [i], 'KW_test': [KW_test_result]}, columns=['features', 'KW_test']) 
    results = pd.concat([results, new_result])
 
results=results.sort_values(["KW_test"], ascending=False)
zeno Zeng
  • 173
  • 7