i was following this tutorial on decision trees and I tried to recreate it on my own as python project, instead of a notebook, using spyder.
I create different py files where I put different methods and classes, specifically I create a file named tree_structure with the following classes: Leaf, DecisionNode and Question. (I hope it's correct to put more classes in a single py file)
When I tried to use isinstance()
in method "classify" in another py file, I was expecting True instead it returned False:
>>>leaf0
<tree_structure.Leaf at 0x11b7c3450>
>>>leaf0.__class__
tree_structure.Leaf
>>>isinstance(leaf0,Leaf)
False
>>>isinstance(leaf0,tree_structure.Leaf)
False
leaf0 was created iteratively from "build_tree" method (I just saved it to t0 for debugging.. during execution is not saved as a variable) :
t0 = build_tree(train_data)
leaf0 = t0.false_branch.false_branch
I tried also using type(leaf0) is Leaf
instead of isinstance but it still returns False.
Can someone explain me why this happen?
tree_structure.py
class Question:
def __init__(self,header, column, value):
self.column = column
self.value = value
self.header = header
def match(self, example):
# Compare the feature value in an example to the
# feature value in this question.
val = example[self.column]
if is_numeric(val):
return val >= self.value
else:
return val == self.value
def __repr__(self):
# This is just a helper method to print
# the question in a readable format.
condition = "=="
if is_numeric(self.value):
condition = ">="
return "Is %s %s %s?" % (
self.header[self.column], condition, str(self.value))
class Leaf:
def __init__(self, rows):
self.predictions = class_counts(rows)
class DecisionNode:
def __init__(self,
question,
true_branch,
false_branch):
self.question = question
self.true_branch = true_branch
self.false_branch = false_branch
classifier.py
from tree_structure import Question,Leaf,DecisionNode
def classify(row, node):
# Base case: we've reached a leaf
if isinstance(node, Leaf):
print("----")
return node.predictions
# Decide whether to follow the true-branch or the false-branch.
# Compare the feature / value stored in the node,
# to the example we're considering.
if node.question.match(row):
print("yes")
return classify(row, node.true_branch)
else:
print("no")
return classify(row, node.false_branch)
build_tree
def build_tree(rows,header):
"""Builds the tree.
Rules of recursion: 1) Believe that it works. 2) Start by checking
for the base case (no further information gain). 3) Prepare for
giant stack traces.
"""
gain, question = find_best_split(rows,header)
print("--best question is ''{}'' with information gain: {}".format(question,round(gain,2)))
# Base case: no further info gain
# Since we can ask no further questions,
# we'll return a leaf.
if isinstance(rows,pd.DataFrame):
rows = rows.values.tolist()
if gain == 0:
return Leaf(rows)
# If we reach here, we have found a useful feature / value
# to partition on.
true_rows, false_rows = partition(rows, question)
# Recursively build the true branch.
print("\n----TRUE BRANCH----")
true_branch = build_tree(true_rows,header)
# Recursively build the false branch.
print("\n----FALSE BRANCH----")
false_branch = build_tree(false_rows,header)
# Return a Question node.
# This records the best feature / value to ask at this point,
# as well as the branches to follow
# dependingo on the answer.
return DecisionNode(question, true_branch, false_branch)