Skip to content

Commit 3654eca

Browse files
AndreyOrbjameslambStrikerRUS
authored
[c++] Fixed Predictor lifecycle and trees initialization in Contrib mode (#6778)
* 1) Fixed Predictor lifecycle 2) Fixed Boosting trees initialization #5482 * Added tests for LGBM_BoosterPredictForMat in Contrib mode * #6778 Reverted indentation to 4 spaces --------- Co-authored-by: James Lamb <jaylamb20@gmail.com> Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
1 parent 226e7f7 commit 3654eca

File tree

3 files changed

+72
-17
lines changed

3 files changed

+72
-17
lines changed

src/boosting/gbdt.h

+12-1
Original file line numberDiff line numberDiff line change
@@ -433,11 +433,18 @@ class GBDT : public GBDTBase {
433433
num_iteration_for_pred_ = num_iteration_for_pred_ - start_iteration;
434434
}
435435
start_iteration_for_pred_ = start_iteration;
436-
if (is_pred_contrib) {
436+
437+
if (is_pred_contrib && !models_initialized_) {
438+
std::lock_guard<std::mutex> lock(instance_mutex_);
439+
if (models_initialized_)
440+
return;
441+
437442
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
438443
for (int i = 0; i < static_cast<int>(models_.size()); ++i) {
439444
models_[i]->RecomputeMaxDepth();
440445
}
446+
447+
models_initialized_ = true;
441448
}
442449
}
443450

@@ -548,6 +555,10 @@ class GBDT : public GBDTBase {
548555
int max_feature_idx_;
549556
/*! \brief Parser config file content */
550557
std::string parser_config_str_ = "";
558+
/*! \brief Are the models initialized (passed RecomputeMaxDepth phase) */
559+
bool models_initialized_ = false;
560+
/*! \brief Mutex for exclusive models initialization */
561+
std::mutex instance_mutex_;
551562

552563
#ifdef USE_CUDA
553564
/*! \brief First order derivative of training data */

src/c_api.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ class Booster {
460460
*out_len = single_row_predictor->num_pred_in_one_row;
461461
}
462462

463-
Predictor CreatePredictor(int start_iteration, int num_iteration, int predict_type, int ncol, const Config& config) const {
463+
std::shared_ptr<Predictor> CreatePredictor(int start_iteration, int num_iteration, int predict_type, int ncol, const Config& config) const {
464464
if (!config.predict_disable_shape_check && ncol != boosting_->MaxFeatureIdx() + 1) {
465465
Log::Fatal("The number of features in data (%d) is not the same as it was in training data (%d).\n" \
466466
"You can set ``predict_disable_shape_check=true`` to discard this error, but please be aware what you are doing.", ncol, boosting_->MaxFeatureIdx() + 1);
@@ -478,7 +478,7 @@ class Booster {
478478
is_raw_score = false;
479479
}
480480

481-
return Predictor(boosting_.get(), start_iteration, num_iteration, is_raw_score, is_predict_leaf, predict_contrib,
481+
return std::make_shared<Predictor>(boosting_.get(), start_iteration, num_iteration, is_raw_score, is_predict_leaf, predict_contrib,
482482
config.pred_early_stop, config.pred_early_stop_freq, config.pred_early_stop_margin);
483483
}
484484

@@ -496,7 +496,7 @@ class Booster {
496496
predict_contrib = true;
497497
}
498498
int64_t num_pred_in_one_row = boosting_->NumPredictOneRow(start_iteration, num_iteration, is_predict_leaf, predict_contrib);
499-
auto pred_fun = predictor.GetPredictFunction();
499+
auto pred_fun = predictor->GetPredictFunction();
500500
OMP_INIT_EX();
501501
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
502502
for (int i = 0; i < nrow; ++i) {
@@ -517,7 +517,7 @@ class Booster {
517517
int32_t** out_indices, void** out_data, int data_type,
518518
bool* is_data_float32_ptr, int num_matrices) const {
519519
auto predictor = CreatePredictor(start_iteration, num_iteration, predict_type, ncol, config);
520-
auto pred_sparse_fun = predictor.GetPredictSparseFunction();
520+
auto pred_sparse_fun = predictor->GetPredictSparseFunction();
521521
std::vector<std::vector<std::unordered_map<int, double>>>& agg = *agg_ptr;
522522
OMP_INIT_EX();
523523
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
@@ -652,7 +652,7 @@ class Booster {
652652
// Get the number of trees per iteration (for multiclass scenario we output multiple sparse matrices)
653653
int num_matrices = boosting_->NumModelPerIteration();
654654
auto predictor = CreatePredictor(start_iteration, num_iteration, predict_type, ncol, config);
655-
auto pred_sparse_fun = predictor.GetPredictSparseFunction();
655+
auto pred_sparse_fun = predictor->GetPredictSparseFunction();
656656
bool is_col_ptr_int32 = false;
657657
bool is_data_float32 = false;
658658
int num_output_cols = ncol + 1;

tests/cpp_tests/test_single_row.cpp

+55-11
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
using LightGBM::TestUtils;
1414

15-
TEST(SingleRow, JustWorks) {
15+
void test_predict_type(int predict_type, int num_predicts) {
1616
// Load some test data
1717
int result;
1818

@@ -37,17 +37,19 @@ TEST(SingleRow, JustWorks) {
3737
booster_handle,
3838
&n_features);
3939
EXPECT_EQ(0, result) << "LGBM_BoosterGetNumFeature result code: " << result;
40+
EXPECT_EQ(28, n_features) << "LGBM_BoosterGetNumFeature number of features: " << n_features;
4041

4142
// Run a single row prediction and compare with regular Mat prediction:
4243
int64_t output_size;
4344
result = LGBM_BoosterCalcNumPredict(
4445
booster_handle,
4546
1,
46-
C_API_PREDICT_NORMAL, // predict_type
47+
predict_type, // predict_type
4748
0, // start_iteration
4849
-1, // num_iteration
4950
&output_size);
5051
EXPECT_EQ(0, result) << "LGBM_BoosterCalcNumPredict result code: " << result;
52+
EXPECT_EQ(num_predicts, output_size) << "LGBM_BoosterCalcNumPredict output size: " << output_size;
5153

5254
std::ifstream test_file("examples/binary_classification/binary.test");
5355
std::vector<double> test;
@@ -77,21 +79,55 @@ TEST(SingleRow, JustWorks) {
7779
test_set_size, // nrow
7880
n_features, // ncol
7981
1, // is_row_major
80-
C_API_PREDICT_NORMAL, // predict_type
82+
predict_type, // predict_type
8183
0, // start_iteration
8284
-1, // num_iteration
8385
"",
8486
&written,
8587
&mat_output[0]);
8688
EXPECT_EQ(0, result) << "LGBM_BoosterPredictForMat result code: " << result;
8789

88-
// Now let's run with the single row fast prediction API:
90+
// Test LGBM_BoosterPredictForMat in multi-threaded mode
8991
const int kNThreads = 10;
92+
const int numIterations = 5;
93+
std::vector<std::thread> predict_for_mat_threads(kNThreads);
94+
for (int i = 0; i < kNThreads; i++) {
95+
predict_for_mat_threads[i] = std::thread(
96+
[
97+
i, test_set_size, output_size, n_features,
98+
test = &test[0], booster_handle, predict_type, numIterations
99+
]() {
100+
for (int j = 0; j < numIterations; j++) {
101+
int result;
102+
std::vector<double> mat_output(output_size * test_set_size, -1);
103+
int64_t written;
104+
result = LGBM_BoosterPredictForMat(
105+
booster_handle,
106+
&test[0],
107+
C_API_DTYPE_FLOAT64,
108+
test_set_size, // nrow
109+
n_features, // ncol
110+
1, // is_row_major
111+
predict_type, // predict_type
112+
0, // start_iteration
113+
-1, // num_iteration
114+
"",
115+
&written,
116+
&mat_output[0]);
117+
EXPECT_EQ(0, result) << "LGBM_BoosterPredictForMat result code: " << result;
118+
}
119+
});
120+
}
121+
for (std::thread& t : predict_for_mat_threads) {
122+
t.join();
123+
}
124+
125+
// Now let's run with the single row fast prediction API:
90126
FastConfigHandle fast_configs[kNThreads];
91127
for (int i = 0; i < kNThreads; i++) {
92128
result = LGBM_BoosterPredictForMatSingleRowFastInit(
93129
booster_handle,
94-
C_API_PREDICT_NORMAL, // predict_type
130+
predict_type, // predict_type
95131
0, // start_iteration
96132
-1, // num_iteration
97133
C_API_DTYPE_FLOAT64,
@@ -102,14 +138,14 @@ TEST(SingleRow, JustWorks) {
102138
}
103139

104140
std::vector<double> single_row_output(output_size * test_set_size, -1);
105-
std::vector<std::thread> threads(kNThreads);
141+
std::vector<std::thread> single_row_threads(kNThreads);
106142
int batch_size = (test_set_size + kNThreads - 1) / kNThreads; // round up
107143
for (int i = 0; i < kNThreads; i++) {
108-
threads[i] = std::thread(
144+
single_row_threads[i] = std::thread(
109145
[
110146
i, batch_size, test_set_size, output_size, n_features,
111-
test = &test[0], fast_configs = &fast_configs[0], single_row_output = &single_row_output[0]
112-
](){
147+
test = &test[0], fast_configs = &fast_configs[0], single_row_output = &single_row_output[0]
148+
]() {
113149
int result;
114150
int64_t written;
115151
for (int j = i * batch_size; j < std::min((i + 1) * batch_size, test_set_size); j++) {
@@ -122,8 +158,8 @@ TEST(SingleRow, JustWorks) {
122158
EXPECT_EQ(written, output_size) << "LGBM_BoosterPredictForMatSingleRowFast unexpected written output size";
123159
}
124160
});
125-
}
126-
for (std::thread &t : threads) {
161+
}
162+
for (std::thread& t : single_row_threads) {
127163
t.join();
128164
}
129165

@@ -141,3 +177,11 @@ TEST(SingleRow, JustWorks) {
141177
result = LGBM_DatasetFree(train_dataset);
142178
EXPECT_EQ(0, result) << "LGBM_DatasetFree result code: " << result;
143179
}
180+
181+
TEST(SingleRow, Normal) {
182+
test_predict_type(C_API_PREDICT_NORMAL, 1);
183+
}
184+
185+
TEST(SingleRow, Contrib) {
186+
test_predict_type(C_API_PREDICT_CONTRIB, 29);
187+
}

0 commit comments

Comments
 (0)