forked from dmlc/xgboost
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathplotting.py
More file actions
360 lines (296 loc) · 9.81 KB
/
plotting.py
File metadata and controls
360 lines (296 loc) · 9.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
# pylint: disable=too-many-locals, too-many-arguments
# pylint: disable=too-many-branches
"""Plotting Library."""
import json
import warnings
from io import BytesIO
from typing import Any, Optional, Union
import numpy as np
from ._typing import PathLike
from .core import Booster, _deprecate_positional_args
from .sklearn import XGBModel
Axes = Any # real type is matplotlib.axes.Axes
GraphvizSource = Any # real type is graphviz.Source
@_deprecate_positional_args
def plot_importance(
booster: Union[XGBModel, Booster, dict],
*,
ax: Optional[Axes] = None,
height: float = 0.2,
xlim: Optional[tuple] = None,
ylim: Optional[tuple] = None,
title: str = "Feature importance",
xlabel: str = "Importance score",
ylabel: str = "Features",
fmap: PathLike = "",
importance_type: str = "weight",
max_num_features: Optional[int] = None,
grid: bool = True,
show_values: bool = True,
values_format: str = "{v}",
**kwargs: Any,
) -> Axes:
"""Plot importance based on fitted trees.
Parameters
----------
booster :
Booster or XGBModel instance, or dict taken by Booster.get_fscore()
ax : matplotlib Axes
Target axes instance. If None, new figure and axes will be created.
grid :
Turn the axes grids on or off. Default is True (On).
importance_type :
How the importance is calculated: either "weight", "gain", or "cover"
* "weight" is the number of times a feature appears in a tree
* "gain" is the average gain of splits which use the feature
* "cover" is the average coverage of splits which use the feature
where coverage is defined as the number of samples affected by the split
max_num_features :
Maximum number of top features displayed on plot. If None, all features will be
displayed.
height :
Bar height, passed to ax.barh()
xlim :
Tuple passed to axes.xlim()
ylim :
Tuple passed to axes.ylim()
title :
Axes title. To disable, pass None.
xlabel :
X axis title label. To disable, pass None.
ylabel :
Y axis title label. To disable, pass None.
fmap :
The name of feature map file.
show_values :
Show values on plot. To disable, pass False.
values_format :
Format string for values. "v" will be replaced by the value of the feature
importance. e.g. Pass "{v:.2f}" in order to limit the number of digits after
the decimal point to two, for each value printed on the graph.
kwargs :
Other keywords passed to ax.barh()
Returns
-------
ax : matplotlib Axes
"""
try:
import matplotlib.pyplot as plt
except ImportError as e:
raise ImportError("You must install matplotlib to plot importance") from e
if isinstance(booster, XGBModel):
importance = booster.get_booster().get_score(
importance_type=importance_type, fmap=fmap
)
elif isinstance(booster, Booster):
importance = booster.get_score(importance_type=importance_type, fmap=fmap)
elif isinstance(booster, dict):
importance = booster
else:
raise ValueError("tree must be Booster, XGBModel or dict instance")
if not importance:
raise ValueError(
"Booster.get_score() results in empty. "
+ "This maybe caused by having all trees as decision dumps."
)
tuples = [(k, importance[k]) for k in importance]
if max_num_features is not None:
# pylint: disable=invalid-unary-operand-type
tuples = sorted(tuples, key=lambda _x: _x[1])[-max_num_features:]
else:
tuples = sorted(tuples, key=lambda _x: _x[1])
labels, values = zip(*tuples)
if ax is None:
_, ax = plt.subplots(1, 1)
ylocs = np.arange(len(values))
ax.barh(ylocs, values, align="center", height=height, **kwargs)
if show_values is True:
for x, y in zip(values, ylocs):
ax.text(x + 1, float(y), values_format.format(v=x), va="center")
ax.set_yticks(ylocs)
ax.set_yticklabels(labels)
if xlim is not None:
if not isinstance(xlim, tuple) or len(xlim) != 2:
raise ValueError("xlim must be a tuple of 2 elements")
else:
xlim = (0, max(values) * 1.1)
ax.set_xlim(xlim)
if ylim is not None:
if not isinstance(ylim, tuple) or len(ylim) != 2:
raise ValueError("ylim must be a tuple of 2 elements")
else:
ylim = (-1, len(values))
ax.set_ylim(ylim)
if title is not None:
ax.set_title(title)
if xlabel is not None:
ax.set_xlabel(xlabel)
if ylabel is not None:
ax.set_ylabel(ylabel)
ax.grid(grid)
return ax
@_deprecate_positional_args
def to_graphviz(
booster: Union[Booster, XGBModel],
*,
fmap: PathLike = "",
num_trees: Optional[int] = None,
rankdir: Optional[str] = None,
yes_color: Optional[str] = None,
no_color: Optional[str] = None,
condition_node_params: Optional[dict] = None,
leaf_node_params: Optional[dict] = None,
with_stats: bool = False,
tree_idx: int = 0,
**kwargs: Any,
) -> GraphvizSource:
"""Convert specified tree to graphviz instance. IPython can automatically plot
the returned graphviz instance. Otherwise, you should call .render() method
of the returned graphviz instance.
Parameters
----------
booster :
Booster or XGBModel instance
fmap :
The name of feature map file
num_trees :
.. deprecated:: 3.0
Specify the ordinal number of target tree
rankdir :
Passed to graphviz via graph_attr
yes_color :
Edge color when meets the node condition.
no_color :
Edge color when doesn't meet the node condition.
condition_node_params :
Condition node configuration for for graphviz. Example:
.. code-block:: python
{'shape': 'box',
'style': 'filled,rounded',
'fillcolor': '#78bceb'}
leaf_node_params :
Leaf node configuration for graphviz. Example:
.. code-block:: python
{'shape': 'box',
'style': 'filled',
'fillcolor': '#e48038'}
with_stats :
.. versionadded:: 3.0
Controls whether the split statistics should be included.
tree_idx :
.. versionadded:: 3.0
Specify the ordinal index of target tree.
kwargs :
Other keywords passed to graphviz graph_attr, e.g. ``graph [ {key} = {value} ]``
Returns
-------
graph: graphviz.Source
"""
try:
from graphviz import Source
except ImportError as e:
raise ImportError("You must install graphviz to plot tree") from e
if isinstance(booster, XGBModel):
booster = booster.get_booster()
# squash everything back into kwargs again for compatibility
parameters = "dot"
extra = {}
for key, value in kwargs.items():
extra[key] = value
if rankdir is not None:
kwargs["graph_attrs"] = {}
kwargs["graph_attrs"]["rankdir"] = rankdir
for key, value in extra.items():
if kwargs.get("graph_attrs", None) is not None:
kwargs["graph_attrs"][key] = value
else:
kwargs["graph_attrs"] = {}
del kwargs[key]
if yes_color is not None or no_color is not None:
kwargs["edge"] = {}
if yes_color is not None:
kwargs["edge"]["yes_color"] = yes_color
if no_color is not None:
kwargs["edge"]["no_color"] = no_color
if condition_node_params is not None:
kwargs["condition_node_params"] = condition_node_params
if leaf_node_params is not None:
kwargs["leaf_node_params"] = leaf_node_params
if kwargs:
parameters += ":"
parameters += json.dumps(kwargs)
if num_trees is not None:
warnings.warn(
"The `num_trees` parameter is deprecated, use `tree_idx` insetad. ",
FutureWarning,
)
if tree_idx not in (0, num_trees):
raise ValueError(
"Both `num_trees` and `tree_idx` are used, prefer `tree_idx` instead."
)
tree_idx = num_trees
tree = booster.get_dump(fmap=fmap, dump_format=parameters, with_stats=with_stats)[
tree_idx
]
g = Source(tree)
return g
@_deprecate_positional_args
def plot_tree(
booster: Union[Booster, XGBModel],
*,
fmap: PathLike = "",
num_trees: Optional[int] = None,
rankdir: Optional[str] = None,
ax: Optional[Axes] = None,
with_stats: bool = False,
tree_idx: int = 0,
**kwargs: Any,
) -> Axes:
"""Plot specified tree.
Parameters
----------
booster :
Booster or XGBModel instance
fmap: str (optional)
The name of feature map file
num_trees :
.. deprecated:: 3.0
rankdir : str, default "TB"
Passed to graphviz via graph_attr
ax : matplotlib Axes, default None
Target axes instance. If None, new figure and axes will be created.
with_stats :
.. versionadded:: 3.0
See :py:func:`to_graphviz`.
tree_idx :
.. versionadded:: 3.0
See :py:func:`to_graphviz`.
kwargs :
Other keywords passed to :py:func:`to_graphviz`
Returns
-------
ax : matplotlib Axes
"""
try:
from matplotlib import image
from matplotlib import pyplot as plt
except ImportError as e:
raise ImportError("You must install matplotlib to plot tree") from e
if ax is None:
_, ax = plt.subplots(1, 1)
g = to_graphviz(
booster,
fmap=fmap,
num_trees=num_trees,
rankdir=rankdir,
with_stats=with_stats,
tree_idx=tree_idx,
**kwargs,
)
s = BytesIO()
s.write(g.pipe(format="png"))
s.seek(0)
img = image.imread(s)
ax.imshow(img)
ax.axis("off")
return ax