-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathTreeDumper.py
executable file
·45 lines (38 loc) · 1.3 KB
/
TreeDumper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
import sys
def dump_model(model,num_trees=None,output_file=None):
"Dump the first 'num_trees' tree from XGBClassifier model to file"
booster = model.booster()
trees = booster.get_dump()
if num_trees == None:
num_trees = len(trees)
oster = model.booster()
tree_separator = "booster[%i]:\n"
model_str = ""
for i in range(num_trees):
model_str += tree_separator % i
model_str += trees[i]
if output_file != None:
with open(output_file,'w') as f:
f.write(model_str)
else:
return model_str
IN_name=sys.argv[3]
X_train = pd.read_csv(IN_name, dtype=np.float32)
y_train=np.ravel(pd.read_csv("aging_labels.csv", usecols=[1], sep=",", dtype=np.float32))
max_d=int(sys.argv[1])
n_est=int(sys.argv[2])
model = XGBClassifier(learning_rate=0.3, max_depth=max_d, nthread=10, n_estimators=n_est)
model.fit(X_train, y_train)
d={}
imp_scores=list(model.feature_importances_)
for i in range(len(imp_scores)):
d[i]=imp_scores[i]
for k in sorted(d, key=d.get, reverse=True):
if d[k]>0:
print(X_train.columns[k],round(d[k],3))
OUT_name=IN_name+"_Trees-n_est"+str(n_est)+"-max_d"+str(max_d)+".txt"
print(OUT_name)
dump_model(model,num_trees=None,output_file=OUT_name)