from sklearn.ensemble import GradientBoostingClassifier,GradientBoostingRegressor
from sklearn.datasets import load_boston,load_wine
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn import metrics
import math
import numpy as np
def rmse(x,y): return math.sqrt(((x-y)**2).mean())
def print_score(m):
res = [rmse(m.predict(X_train), y_train), rmse(m.predict(X_test), y_test),
m.score(X_train, y_train), m.score(X_test, y_test)]
if hasattr(m, 'oob_score_'): res.append(m.oob_score_)
print(res)
gbc=GradientBoostingRegressor()
gbc.get_params()
{'alpha': 0.9, 'criterion': 'friedman_mse', 'init': None, 'learning_rate': 0.1, 'loss': 'ls', 'max_depth': 3, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'presort': 'auto', 'random_state': None, 'subsample': 1.0, 'verbose': 0, 'warm_start': False}
house_price=load_boston(return_X_y=False)
#house_price['data']
#house_price['feature_names']
#house_price['target']
X_df=pd.DataFrame(data=house_price['data'],columns=house_price['feature_names'])
y=house_price['target']
X_df.head(10)
CRIM | ZN | INDUS | CHAS | NOX | RM | AGE | DIS | RAD | TAX | PTRATIO | B | LSTAT | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.00632 | 18.0 | 2.31 | 0.0 | 0.538 | 6.575 | 65.2 | 4.0900 | 1.0 | 296.0 | 15.3 | 396.90 | 4.98 |
1 | 0.02731 | 0.0 | 7.07 | 0.0 | 0.469 | 6.421 | 78.9 | 4.9671 | 2.0 | 242.0 | 17.8 | 396.90 | 9.14 |
2 | 0.02729 | 0.0 | 7.07 | 0.0 | 0.469 | 7.185 | 61.1 | 4.9671 | 2.0 | 242.0 | 17.8 | 392.83 | 4.03 |
3 | 0.03237 | 0.0 | 2.18 | 0.0 | 0.458 | 6.998 | 45.8 | 6.0622 | 3.0 | 222.0 | 18.7 | 394.63 | 2.94 |
4 | 0.06905 | 0.0 | 2.18 | 0.0 | 0.458 | 7.147 | 54.2 | 6.0622 | 3.0 | 222.0 | 18.7 | 396.90 | 5.33 |
5 | 0.02985 | 0.0 | 2.18 | 0.0 | 0.458 | 6.430 | 58.7 | 6.0622 | 3.0 | 222.0 | 18.7 | 394.12 | 5.21 |
6 | 0.08829 | 12.5 | 7.87 | 0.0 | 0.524 | 6.012 | 66.6 | 5.5605 | 5.0 | 311.0 | 15.2 | 395.60 | 12.43 |
7 | 0.14455 | 12.5 | 7.87 | 0.0 | 0.524 | 6.172 | 96.1 | 5.9505 | 5.0 | 311.0 | 15.2 | 396.90 | 19.15 |
8 | 0.21124 | 12.5 | 7.87 | 0.0 | 0.524 | 5.631 | 100.0 | 6.0821 | 5.0 | 311.0 | 15.2 | 386.63 | 29.93 |
9 | 0.17004 | 12.5 | 7.87 | 0.0 | 0.524 | 6.004 | 85.9 | 6.5921 | 5.0 | 311.0 | 15.2 | 386.71 | 17.10 |
X_train, X_test, y_train, y_test=train_test_split(X_df,y)
m=GradientBoostingRegressor(n_estimators=10)
m.fit(X_train, y_train)
print_score(m)
[4.446339600725093, 4.433590194075118, 0.7742061036836994, 0.7379744675817033]
m.estimators_[5][0]
DecisionTreeRegressor(criterion='friedman_mse', max_depth=3, max_features=None, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, presort='auto', random_state=<mtrand.RandomState object at 0x00000000058201B0>, splitter='best')
preds = np.stack([t[0].predict(X_test) for t in m.estimators_])
preds[:,0], np.sum(preds[:,0]), y_test[0]
(array([ 0.43779031, -1.09636469, 0.27496642, -1.57691115, -2.57801691, -1.57575104, 0.6471021 , -0.11316941, -0.44972887, -1.11537016]), -7.145453410517382, 19.3)
pred=m.predict(X_test)
plt.plot(y_test,label='orig')
plt.plot(pred,label='pred')
plt.legend()
plt.show()
pred[0]
21.762499513829525
import pydotplus
from sklearn.tree import export_graphviz
from IPython.display import Image,HTML,SVG
from io import StringIO
dot_data = StringIO()
export_graphviz(m.estimators_[0][0], out_file=dot_data,
filled=True, rounded=True,
special_characters=True,feature_names=house_price['feature_names'])
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
#Image(graph.create_png())
import os
import sys
def conda_fix(graph):
path = os.path.join(sys.base_exec_prefix, "Library", "bin", "graphviz")
paths = ("dot", "twopi", "neato", "circo", "fdp")
paths = {p: os.path.join(path, "{}.exe".format(p)) for p in paths}
graph.set_graphviz_executables(paths)
conda_fix(graph)
Image(graph.create_jpg())
from io import StringIO
dot_data = StringIO()
export_graphviz(m.estimators_[1][0], out_file=dot_data,
filled=True, rounded=True,
special_characters=True,feature_names=house_price['feature_names'])
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
#Image(graph.create_png())
import os
import sys
def conda_fix(graph):
path = os.path.join(sys.base_exec_prefix, "Library", "bin", "graphviz")
paths = ("dot", "twopi", "neato", "circo", "fdp")
paths = {p: os.path.join(path, "{}.exe".format(p)) for p in paths}
graph.set_graphviz_executables(paths)
conda_fix(graph)
Image(graph.create_jpg())
from io import StringIO
dot_data = StringIO()
export_graphviz(m.estimators_[2][0], out_file=dot_data,
filled=True, rounded=True,
special_characters=True,feature_names=house_price['feature_names'])
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
#Image(graph.create_png())
import os
import sys
def conda_fix(graph):
path = os.path.join(sys.base_exec_prefix, "Library", "bin", "graphviz")
paths = ("dot", "twopi", "neato", "circo", "fdp")
paths = {p: os.path.join(path, "{}.exe".format(p)) for p in paths}
graph.set_graphviz_executables(paths)
conda_fix(graph)
Image(graph.create_jpg())
from io import StringIO
dot_data = StringIO()
export_graphviz(m.estimators_[9][0], out_file=dot_data,
filled=True, rounded=True,
special_characters=True,feature_names=house_price['feature_names'])
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
#Image(graph.create_png())
import os
import sys
def conda_fix(graph):
path = os.path.join(sys.base_exec_prefix, "Library", "bin", "graphviz")
paths = ("dot", "twopi", "neato", "circo", "fdp")
paths = {p: os.path.join(path, "{}.exe".format(p)) for p in paths}
graph.set_graphviz_executables(paths)
conda_fix(graph)
Image(graph.create_jpg())