Convert scikit-learn decision trees to JSON

SKLearn has a function to convert decision trees to “graphviz” (for rendering) but I find JSON more helpful, as you can read it more easily, as well as use it in web apps. The function below will give you JSON.

The reason this is necessary (vs the JSON.dumps) library is that the Decision Tree interfaces don’t support the interfaces the JSON library needs to run. Additionally, even if it did, the JSON library in python dies on very small floating point numbers, which is why it’s not used at all in my version.


def treeToJson(decision_tree, feature_names=None):
  from warnings import warn

  js = ""

  def node_to_str(tree, node_id, criterion):
    if not isinstance(criterion, sklearn.tree.tree.six.string_types):
      criterion = "impurity"

    value = tree.value[node_id]
    if tree.n_outputs == 1:
      value = value[0, :]

    jsonValue = ', '.join([str(x) for x in value])

    if tree.children_left[node_id] == sklearn.tree._tree.TREE_LEAF:
      return '"id": "%s", "criterion": "%s", "impurity": "%s", "samples": "%s", "value": [%s]' \
             % (node_id, 
                criterion,
                tree.impurity[node_id],
                tree.n_node_samples[node_id],
                jsonValue)
    else:
      if feature_names is not None:
        feature = feature_names[tree.feature[node_id]]
      else:
        feature = tree.feature[node_id]

      if "=" in feature:
        ruleType = "="
        ruleValue = "false"
      else:
        ruleType = "<="
        ruleValue = "%.4f" % tree.threshold[node_id]

      return '"id": "%s", "rule": "%s %s %s", "%s": "%s", "samples": "%s"' \
             % (node_id, 
                feature,
                ruleType,
                ruleValue,
                criterion,
                tree.impurity[node_id],
                tree.n_node_samples[node_id])

  def recurse(tree, node_id, criterion, parent=None, depth=0):
    tabs = "  " * depth
    js = ""

    left_child = tree.children_left[node_id]
    right_child = tree.children_right[node_id]

    js = js + "\n" + \
         tabs + "{\n" + \
         tabs + "  " + node_to_str(tree, node_id, criterion)

    if left_child != sklearn.tree._tree.TREE_LEAF:
      js = js + ",\n" + \
           tabs + '  "left": ' + \
           recurse(tree, \
                   left_child, \
                   criterion=criterion, \
                   parent=node_id, \
                   depth=depth + 1) + ",\n" + \
           tabs + '  "right": ' + \
           recurse(tree, \
                   right_child, \
                   criterion=criterion, \
                   parent=node_id,
                   depth=depth + 1)

    js = js + tabs + "\n" + \
         tabs + "}"

    return js

  if isinstance(decision_tree, sklearn.tree.tree.Tree):
    js = js + recurse(decision_tree, 0, criterion="impurity")
  else:
    js = js + recurse(decision_tree.tree_, 0, criterion=decision_tree.criterion)

  return js

3 Replies to “Convert scikit-learn decision trees to JSON”

here’s my code with your funcion,I can NOT run through it,please help,thanks
——————————————————
# #-*- coding:utf-8 -*-
# import sys
# reload(sys)
# sys.setdefaultencoding(‘utf-8’)
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
import sklearn
import numpy
from IPython.display import display, Image
import pydotplus
from sklearn import tree
from sklearn.tree import _tree
from sklearn import tree
import collections
import drawtree
import os
from sklearn.tree._tree import TREE_LEAF

def treeToJson(decision_tree, feature_names=None):
from warnings import warn

js = “”

def node_to_str(tree, node_id, criterion):
if not isinstance(criterion, sklearn.tree.tree.six.string_types):
criterion = “impurity”

value = tree.value[node_id]
if tree.n_outputs == 1:
value = value[0, :]

jsonValue = ‘, ‘.join([str(x) for x in value])

if tree.children_left[node_id] == sklearn.tree._tree.TREE_LEAF:
return ‘”id”: “%s”, “criterion”: “%s”, “impurity”: “%s”, “samples”: “%s”, “value”: [%s]’ \
% (node_id,
criterion,
tree.impurity[node_id],
tree.n_node_samples[node_id],
jsonValue)
else:
if feature_names is not None:
feature = feature_names[tree.feature[node_id]]
else:
feature = tree.feature[node_id]
print(“feature=”)
if “=” in feature:
ruleType = “=”
ruleValue = “false”
else:
ruleType = “<="
ruleValue = "%.4f" % tree.threshold[node_id]

return '"id": "%s", "rule": "%s %s %s", "%s": "%s", "samples": "%s"' \
% (node_id,
feature,
ruleType,
ruleValue,
criterion,
tree.impurity[node_id],
tree.n_node_samples[node_id])

def recurse(tree, node_id, criterion, parent=None, depth=0):
tabs = " " * depth
js = ""

left_child = tree.children_left[node_id]
right_child = tree.children_right[node_id]

js = js + "\n" + \
tabs + "{\n" + \
tabs + " " + node_to_str(tree, node_id, criterion)

if left_child != sklearn.tree._tree.TREE_LEAF:
js = js + ",\n" + \
tabs + ' "left": ' + \
recurse(tree, \
left_child, \
criterion=criterion, \
parent=node_id, \
depth=depth + 1) + ",\n" + \
tabs + ' "right": ' + \
recurse(tree, \
right_child, \
criterion=criterion, \
parent=node_id,
depth=depth + 1)

js = js + tabs + "\n" + \
tabs + "}"

return js

if isinstance(decision_tree, sklearn.tree.tree.Tree):
js = js + recurse(decision_tree, 0, criterion="impurity")
else:
js = js + recurse(decision_tree.tree_, 0, criterion=decision_tree.criterion)

return j

def train():
X, y = make_classification(n_samples=1000,
n_features=6,
n_informative=3,
n_classes=2,
random_state=0,
shuffle=False)
# print"y=",y
# Creating a dataFrame
df = pd.DataFrame({'Feature 1':X[:,0],
'Feature 2':X[:,1],
'Feature 3':X[:,2],
'Feature 4':X[:,3],
'Feature 5':X[:,4],
'Feature 6':X[:,5],
'Class':y})
y_train = df['Class']
X_train = df.drop('Class',axis = 1)

dt = DecisionTreeClassifier( random_state=42)
dt.fit(X_train, y_train)
return dt,X_train
#——————上面是生成决策树模型———————————–
# os.environ["PATH"] += os.pathsep + 'C:\\Anaconda3\\Library\\bin\\graphviz'
def draw_file(model,dot_file,png_file,X_train):
dot_data = tree.export_graphviz(model, out_file =dot_file ,
feature_names=X_train.columns, filled = True
, rounded = True
, special_characters = True)

graph = pydotplus.graph_from_dot_file(dot_file)

thisIsTheImage = Image(graph.create_png())
display(thisIsTheImage)
#print(dt.tree_.feature)

from subprocess import check_call
check_call(['dot','-Tpng',dot_file,'-o',png_file])

# 剪枝函数（这里使用的不是著名的CCP剪枝，而是根据的当前的子树剩余的样本数是否超过阈值，如果小于阈值，就进行剪枝）
def prune_index(inner_tree, index, threshold):
if inner_tree.value[index].min() < threshold:
# turn node into a leaf by "unlinking" its children
inner_tree.children_left[index] = TREE_LEAF#对左子树进行剪枝操作
inner_tree.children_right[index] = TREE_LEAF#对右子树进行剪枝操作
# if there are shildren, visit them as well
if inner_tree.children_left[index] != TREE_LEAF:
prune_index(inner_tree, inner_tree.children_left[index], threshold)#对左子树进行递归
prune_index(inner_tree, inner_tree.children_right[index], threshold)#对右子树进行递归

#***************************************************************

if __name__ == '__main__':
model,X_train=train()
model_json=treeToJson(model)
print("model_json=",model_json)

i had runed it 100000000 times got 000000000000 errors

—————————- no working at all———————
dont waste your time here folks

3 Replies to “Convert scikit-learn decision trees to JSON”

Leave a Reply Cancel reply