0

I am getting an attribute error but I couldn't figure out the reason. I am attaching my code please have a look and let me know how to fix.

The error I am getting is:

AttributeError                            Traceback (most recent call last)
<ipython-input-1-f3085e096816> in <module>
    164     my_tree = build_tree(training_data)
    165 
--> 166     print_tree(my_tree)
    167 
    168     testing_data = [

<ipython-input-1-f3085e096816> in print_tree(node, spacing)
    134         return
    135 
--> 136     print(spacing + str(node.question))
    137 
    138     print(spacing + "--> True")

AttributeError: 'NoneType' object has no attribute 'question'
# Generating data
training_data = [
    ["Green", 3, "Mango"],
    ["Yellow", 3, "Mango"],
    ["Red", 1, "Grape"],
    ["Red", 1, "Grape"],
    ["Yellow", 3, "Lemon"],
]


# Column Labels
header = ["color", "diameter", "label"]

def unique_vals(rows, col):
    """Find unique values for a column in dataset."""
    return set([row[col] for row in rows])

def class_counts(rows):
    """Counts # of each typre of eg. in datset."""
    counts = {}
    for row in rows:
        label = row[-1]
        if label not in counts:
            counts[label] = 0
        counts[label] += 1
    return counts

def is_numeric(value):
    """Check if a value is numeric"""
    return isinstance(value, int) or isinstance(value, float)


# Spliting dataset based on questions
class Question:

    def __init__(self, column, value):
        self.column = column
        self.value = value

    def match(self, example):
        # compare feature values
        val = example[self.column]
        if is_numeric(val):
            return val >= self.value
        else:
            return val == self.value


    def __repr__(self):
        condition = "=="
        if is_numeric(self.value):
            condition = ">="
        return "Is %s %s %s?" % (
            header[self.column], condition, str(self.value))


def partition(rows, question):
    true_rows, false_rows = [], []
    for row in rows:
        if question.match(row):
            true_rows.append(row)
        else:
            false_rows.append(row)
    return true_rows, false_rows

# Entropy    
def gini(rows):

    counts = class_counts(rows)
    impurity = 1
    for lbl in counts:
        prob_if_lbl = counts[lbl]/float(len(rows))
        impurity -= prob_if_lbl**2
    return impurity

def info_gain(left, right, current_uncertainity):
    p = float(len(left))/len(left) + len(right)
    return current_uncertainity - p * gini(left) - (1-p) * gini(right)


def find_best_split(rows):
    best_gain = 0
    best_question = None
    current_uncertainity = gini(rows)
    n_features = len(rows[0]) - 1

    for col in range(n_features):
        values = set([row[col] for row in rows])

        for val in values:
            question = Question(col, val)
            true_rows, false_rows = partition(rows, question)

            if len(true_rows) == 0 or len(false_rows) == 0:
                continue

            gain = info_gain(true_rows, false_rows, current_uncertainity)

            if gain >= best_gain:
                best_gain, best_question = gain, question
    return best_gain, best_question


# Defining tree

class leaf:

    def __init__(self, rows):
        self.predicitons = class_counts(rows)

class Decision_Node:

    def __init__(self,
                 question,
                 true_branch,
                 false_branch):
        self.question = question
        self.true_branch = true_branch
        self.false_branch = false_branch

def build_tree(rows):
    gain, question = find_best_split(rows)

    if gain == 0:
        return leaf(rows)

    true_rows, false_rows = partition(rows, question)
    true_branch = build_tree(true_rows)
    false_branch = build_tree(false_rows)

def print_tree(node, spacing=""):
    if isinstance(node, leaf):
        print (spacing + "Predict", node.predicitons)
        return

    print(spacing + str(node.question))

    print(spacing + "--> True")
    print_tree(node.true_branch, spacing + " ")
    print(spacing + "--> False")
    print_tree(node.false_branch, spacing + " ")  


def classify(row,node):

    if isinstance(node,leaf):
          return node.predicitons

    if node.question.match(row):
          return classify(row, node.true_branch)
    else:
          return classify(row, node.false_branch)

def print_leaf(counts):
    total = sum(counts.values()) * 1.0
    probs = {}
    for lbl in counts.keys():
          probs[lbl] = str(int(counts[lbl] / total * 100)) + "%"
    return probs


if __name__ == "__main__":

    my_tree = build_tree(training_data)

    print_tree(my_tree)

    testing_data = [
    ["Green", 3, "Mango"],
    ["Yellow", 4, "Mango"],
    ["Red", 2, "Grape"],
    ["Red", 1, "Grape"],
    ["Yellow", 3, "Lemon"],
]

    for row in testing_data:
          print("Actual: %s. Predicted: %s" %
               (row[-1], print_leaf(classify(row, my_tree))))



wjandrea
  • 28,235
  • 9
  • 60
  • 81

2 Answers2

1

The error message is telling you that node is None. node doesn't change in print_tree, so that means my_tree is None. It looks like the problem is that build_tree doesn't have a return condition if gain != 0, so it returns None implicitly.

If you want advice about fixing it, you'll need to make a minimal, reproducible example.

wjandrea
  • 28,235
  • 9
  • 60
  • 81
-1

For some reason you are getting a Nonetype to process.

You could print every node and every node.question to see where things go wrong!

JaFizz
  • 328
  • 2
  • 20
  • It's `None` - `None` is the only instance of `Nonetype`. But the fact that `node = None` isn't helpful on its own -- you need to figure out where the value is coming from, which I did in [my answer](https://stackoverflow.com/a/59474854/4518341). (I didn't downvote btw.) – wjandrea Dec 25 '19 at 02:58