forked from dmlc/xgboost
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtest_linear.py
More file actions
108 lines (95 loc) · 3.72 KB
/
test_linear.py
File metadata and controls
108 lines (95 loc) · 3.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from typing import Dict
from hypothesis import given, note, settings, strategies
import xgboost as xgb
from xgboost import testing as tm
pytestmark = tm.timeout(20)
parameter_strategy = strategies.fixed_dictionaries({
'booster': strategies.just('gblinear'),
'eta': strategies.floats(0.01, 0.25),
'tolerance': strategies.floats(1e-5, 1e-2),
'nthread': strategies.integers(1, 4),
})
coord_strategy = strategies.fixed_dictionaries({
'feature_selector': strategies.sampled_from(['cyclic', 'shuffle',
'greedy', 'thrifty']),
'top_k': strategies.integers(1, 10),
})
def train_result(param: dict, dmat: xgb.DMatrix, num_rounds: int) -> Dict[str, Dict]:
result: Dict[str, Dict] = {}
xgb.train(
param,
dmat,
num_rounds,
evals=[(dmat, "train")],
verbose_eval=False,
evals_result=result,
)
return result
class TestLinear:
@given(
parameter_strategy,
strategies.integers(10, 50),
tm.make_dataset_strategy(),
coord_strategy
)
@settings(deadline=None, max_examples=20, print_blob=True)
def test_coordinate(self, param, num_rounds, dataset, coord_param):
param['updater'] = 'coord_descent'
param.update(coord_param)
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
note(result)
assert tm.non_increasing(result, 5e-4)
# Loss is not guaranteed to always decrease because of regularisation parameters
# We test a weaker condition that the loss has not increased between the first and last
# iteration
@given(
parameter_strategy,
strategies.integers(10, 50),
tm.make_dataset_strategy(),
coord_strategy,
strategies.floats(1e-5, 0.8),
strategies.floats(1e-5, 0.8)
)
@settings(deadline=None, max_examples=20, print_blob=True)
def test_coordinate_regularised(self, param, num_rounds, dataset, coord_param, alpha, lambd):
param['updater'] = 'coord_descent'
param['alpha'] = alpha
param['lambda'] = lambd
param.update(coord_param)
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
note(result)
assert tm.non_increasing([result[0], result[-1]])
@given(
parameter_strategy, strategies.integers(10, 50), tm.make_dataset_strategy()
)
@settings(deadline=None, max_examples=20, print_blob=True)
def test_shotgun(self, param, num_rounds, dataset):
param['updater'] = 'shotgun'
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
note(result)
# shotgun is non-deterministic, so we relax the test by only using first and last
# iteration.
if len(result) > 2:
sampled_result = (result[0], result[-1])
else:
sampled_result = result
assert tm.non_increasing(sampled_result)
@given(
parameter_strategy,
strategies.integers(10, 50),
tm.make_dataset_strategy(),
strategies.floats(1e-5, 1.0),
strategies.floats(1e-5, 1.0)
)
@settings(deadline=None, max_examples=20, print_blob=True)
def test_shotgun_regularised(self, param, num_rounds, dataset, alpha, lambd):
param['updater'] = 'shotgun'
param['alpha'] = alpha
param['lambda'] = lambd
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
note(result)
assert tm.non_increasing([result[0], result[-1]])