Skip to content
This repository has been archived by the owner on Aug 5, 2022. It is now read-only.

Commit

Permalink
Merge remote-tracking branch 'remotes/internal/release_1.0.4'
Browse files Browse the repository at this point in the history
  • Loading branch information
daisyden committed Sep 14, 2017
2 parents 85749df + d9d52b7 commit c6e2503
Show file tree
Hide file tree
Showing 34 changed files with 4,877 additions and 278 deletions.
8 changes: 7 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ ifeq ($(CAFFE_MLSL_SHUFFLE), 1)
COMMON_FLAGS += -DCAFFE_MLSL_SHUFFLE
endif

ifeq ($(FW_OVERLAP_OPT), 1)
ifneq ($(FW_OVERLAP_OPT), 0)
COMMON_FLAGS += -DFW_OVERLAP_OPT
endif
endif
Expand Down Expand Up @@ -547,6 +547,12 @@ LIBRARY_DIRS += $(LIB_BUILD_DIR)
# Automatic dependency generation (nvcc is handled separately)
CXXFLAGS += -MMD -MP

##########SGD FUSION#######################
ifeq ($(ENABLE_SGD_FUSION), 1)
COMMON_FLAGS += -DENABLE_SGD_FUSION
endif
###########################################
#
# Complete build flags.
COMMON_FLAGS += $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir))
CXXFLAGS += -std=c++11 -pthread -fPIC $(COMMON_FLAGS) $(WARNINGS)
Expand Down
3 changes: 3 additions & 0 deletions Makefile.config.example
Original file line number Diff line number Diff line change
Expand Up @@ -170,5 +170,8 @@ DISTRIBUTE_DIR := distribute
# The ID of the GPU that 'make runtest' will use to run unit tests.
TEST_GPUID := 0

# Uncomment for enabling SGD fusion
# ENABLE_SGD_FUSION := 1

# enable pretty build (comment to see full commands)
Q ?= @
8 changes: 4 additions & 4 deletions Makefile.mkldnn
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
CAFFE_ROOTDIR := $(shell pwd)
MKLDNN_ROOTDIR := external/mkldnn
MKLDNN_ROOTDIR := $(CAFFE_ROOTDIR)/external/mkldnn
MKLDNN_TMPDIR := $(MKLDNN_ROOTDIR)/tmp
MKLDNN_SRCDIR := $(MKLDNN_ROOTDIR)/src
MKLDNN_BUILDDIR := $(MKLDNN_ROOTDIR)/build
Expand All @@ -22,7 +22,7 @@ ifneq (,$(findstring ccache,$(CC)))
endif

MKLDNN_GITHUB := https://github.com/01org/mkl-dnn.git
MKLDNN_CMAKE_FLAGS += $(MKLDNN_SRCDIR) -DCMAKE_INSTALL_PREFIX=$(CAFFE_ROOTDIR)/$(MKLDNN_INSTALLDIR) -DMKLROOT=${MKL_ROOTDIR} -B$(CAFFE_ROOTDIR)/$(MKLDNN_BUILDDIR) -DCMAKE_CXX_COMPILER="$(MKLDNN_CXX)" -DCMAKE_C_COMPILER="$(MKLDNN_CC)"
MKLDNN_CMAKE_FLAGS += $(MKLDNN_SRCDIR) -DCMAKE_INSTALL_PREFIX=$(MKLDNN_INSTALLDIR) -DMKLROOT=${MKL_ROOTDIR} -B$(MKLDNN_BUILDDIR) -DCMAKE_CXX_COMPILER="$(MKLDNN_CXX)" -DCMAKE_C_COMPILER="$(MKLDNN_CC)"

ifeq ("$(wildcard $(MKLDNN_INSTALLDIR)/include/mkldnn.hpp)", "")
mkldnn_download:
Expand All @@ -32,8 +32,8 @@ mkldnn_download:

mkldnn_build: mkldnn_download
cmake $(MKLDNN_CMAKE_FLAGS)
make -C $(CAFFE_ROOTDIR)/$(MKLDNN_BUILDDIR) -j$(shell cat /proc/cpuinfo |grep 'processor'|wc -l)
make -C $(CAFFE_ROOTDIR)/$(MKLDNN_BUILDDIR) install
make -C $(MKLDNN_BUILDDIR) -j$(shell cat /proc/cpuinfo |grep 'processor'|wc -l)
make -C $(MKLDNN_BUILDDIR) install
else
mkldnn_download:
mkldnn_build:
Expand Down
2 changes: 1 addition & 1 deletion cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ if(USE_MLSL)
if(CAFFE_MLSL_SHUFFLE)
add_definitions("-DCAFFE_MLSL_SHUFFLE")
endif()
if(FW_OVERLAP_OPT)
if(FW_OVERLAP_OPT OR NOT DEFINED FW_OVERLAP_OPT)
message(STATUS "Forward overlapping optimization is enabled!")
add_definitions("-DFW_OVERLAP_OPT")
endif()
Expand Down
4 changes: 4 additions & 0 deletions examples/cpp_classification/batch_classification.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,10 @@ int main(int argc, char** argv) {
cout<<"Use mean file: "<<FLAGS_mean_file<<endl;
}

#ifdef USE_MLSL
caffe::mn::init(&argc,&argv);
#endif

Classifier classifier(FLAGS_model, FLAGS_weights, FLAGS_mean_file,
FLAGS_mean_value, FLAGS_label_file, FLAGS_engine, FLAGS_batch_size);

Expand Down
4 changes: 4 additions & 0 deletions examples/cpp_classification/classification.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,10 @@ int main(int argc, char** argv) {
engine = argv[6];
}

#ifdef USE_MLSL
caffe::mn::init(&argc,&argv);
#endif

Classifier classifier(model_file, trained_file, mean_file, label_file, engine);


Expand Down
2 changes: 1 addition & 1 deletion examples/pycaffe/tune_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def tuneModelDefinition(model_path, iteration):
caffe_path = os.path.join(working_dir, "..", "..", "build", "tools", "caffe")
if not os.path.exists(caffe_path):
print "Caffe binary does not exist; please build Caffe binary first."
sys,exit(1)
sys.exit(1)

base_model_name = os.path.basename(model_path)
model_dir = os.path.dirname(model_path)
Expand Down
6 changes: 3 additions & 3 deletions include/caffe/blob.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class Blob {
return shape_[CanonicalAxisIndex(index)];
}
inline int num_axes() const { return shape_.size(); }
inline int count() const { return count_; }
inline long count() const { return count_; }

/**
* @brief Compute the volume of a slice; i.e., the product of dimensions
Expand Down Expand Up @@ -332,8 +332,8 @@ class Blob {
shared_ptr<SyncedMemory> shape_data_;
#endif
vector<int> shape_;
int count_;
int capacity_;
long count_;
long capacity_;

DISABLE_COPY_AND_ASSIGN(Blob);
}; // class Blob
Expand Down
13 changes: 9 additions & 4 deletions include/caffe/layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define LOG_BLOB(layer, blob, part, blob_id, description) \
do \
{ \
int elems_to_log = std::min(MAX_ELEMS_TO_LOG, blob->count()); \
for (int idx = 0; idx < elems_to_log; idx++) \
long elems_to_log = std::min(static_cast<long>(MAX_ELEMS_TO_LOG), blob->count()); \
for (long idx = 0; idx < elems_to_log; idx++) \
{ \
LOG_LAYER(layer) << description \
<< ", blob_id " << blob_id \
Expand All @@ -68,8 +68,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define LOG_PARAM_BLOB(blob, part, blob_id, description) \
do \
{ \
int elems_to_log = std::min(MAX_ELEMS_TO_LOG, blob->count()); \
for (int idx = 0; idx < elems_to_log; idx++) \
long elems_to_log = std::min(static_cast<long>(MAX_ELEMS_TO_LOG), blob->count()); \
for (long idx = 0; idx < elems_to_log; idx++) \
{ \
DLOG(INFO) << description \
<< ", blob_id " << blob_id \
Expand Down Expand Up @@ -521,7 +521,12 @@ class Layer {
CHECK_EQ(top.size(), num_loss_weights) << "loss_weight must be "
"unspecified or specified once per top blob.";
for (int top_id = 0; top_id < top.size(); ++top_id) {
#ifdef USE_MLSL
const Dtype loss_weight = layer_param_.loss_weight(top_id) /
GetDistribution().get_data_parts();
#else
const Dtype loss_weight = layer_param_.loss_weight(top_id);
#endif
if (loss_weight == Dtype(0)) { continue; }
this->set_loss(top_id, loss_weight);
const int count = top[top_id]->count();
Expand Down
8 changes: 8 additions & 0 deletions include/caffe/layers/batch_norm_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,19 @@ class BatchNormLayer : public Layer<Dtype> {
const Dtype* data_to_be_replicated,
FuncTy op_func);

void ForwardStatsBatch_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top, int stats_batch_idx);
void BackwardStatsBatch_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom,
int stats_batch_idx);

Blob<Dtype> mean_, variance_, temp_, x_norm_;
bool use_global_stats_;
Dtype moving_average_fraction_;
int channels_;
Dtype eps_;
int num_stats_batches_;
int stats_batch_size_;

// extra temporarary variables is used to carry out sums/broadcasting
// using BLAS
Expand Down
18 changes: 13 additions & 5 deletions include/caffe/layers/mkl_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,12 +481,12 @@ class MKLBatchNormLayer : public Layer<Dtype> {
batchNormFwd(static_cast<dnnPrimitive_t>(NULL)),
batchNormFwdInference(static_cast<dnnPrimitive_t>(NULL)),
batchNormBwd(static_cast<dnnPrimitive_t>(NULL)),
mean_buffer_(static_cast<Dtype*>(NULL)),
variance_buffer_(static_cast<Dtype*>(NULL)),
scaleShift_buffer_(static_cast<Dtype*>(NULL)),
diffScaleShift_buffer_(static_cast<Dtype*>(NULL)),
layout_usr_(static_cast<dnnLayout_t>(NULL)),
use_global_stats_(false)
use_global_stats_(false),
num_stats_batches_(1),
stats_batch_size_(0)
{
PERFORMANCE_EVENT_ID_RESET(perf_id_fw_);
PERFORMANCE_EVENT_ID_RESET(perf_id_bw_);
Expand Down Expand Up @@ -515,6 +515,12 @@ class MKLBatchNormLayer : public Layer<Dtype> {
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

void ForwardStatsBatch_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top, int stats_batch_idx);
void BackwardStatsBatch_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom,
int stats_batch_idx);

void Init(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

Expand All @@ -534,12 +540,14 @@ class MKLBatchNormLayer : public Layer<Dtype> {
shared_ptr<MKLDiff<Dtype> > bwd_bottom_diff;
Blob<Dtype> temp_;
dnnPrimitive_t batchNormFwd, batchNormFwdInference, batchNormBwd;
Dtype *mean_buffer_;
Dtype *variance_buffer_;
vector<Dtype *> mean_buffers_;
vector<Dtype *> variance_buffers_;
Dtype *scaleShift_buffer_;
Dtype *diffScaleShift_buffer_;
dnnLayout_t layout_usr_;
bool use_global_stats_;
int num_stats_batches_;
int stats_batch_size_;

PERFORMANCE_EVENT_ID_DECL(perf_id_fw_);
PERFORMANCE_EVENT_ID_DECL(perf_id_bw_);
Expand Down
15 changes: 12 additions & 3 deletions include/caffe/layers/mkldnn_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ class MKLDNNBatchNormLayer : public MKLDNNLayer<Dtype>, public Layer<Dtype> {
, fwd_top_data(), fwd_bottom_data()
, bwd_top_diff(), bwd_bottom_diff()
, BatchNormFwd_pd(), BatchNormBwd_pd()
, mean_memory(), variance_memory()
, scaleshift_memory(), bwd_scaleshift_diff_memory()
, output_memory(), bwd_bottom_diff_memory()
, input_primitive(), bwd_top_diff_primitive()
Expand Down Expand Up @@ -96,22 +95,32 @@ class MKLDNNBatchNormLayer : public MKLDNNLayer<Dtype>, public Layer<Dtype> {
void InitBatchNormBwd(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom);
void InitBatchNormFwdPrimitive(int stats_batch_idx);
void InitBatchNormBwdPrimitive(int stats_batch_idx);
template <bool diff> shared_ptr<memory> GetStatsBatchMemory(
shared_ptr<MKLDNNMemoryDescriptor<Dtype, diff> > mkldnn_data, int idx);
void InitStatsBatchVars(int batch_size);
shared_ptr<MKLDNNData<Dtype> > fwd_top_data, fwd_bottom_data;
shared_ptr<MKLDNNDiff<Dtype> > bwd_top_diff, bwd_bottom_diff;
shared_ptr<batch_normalization_forward::primitive_desc> BatchNormFwd_pd;
shared_ptr<batch_normalization_backward::primitive_desc> BatchNormBwd_pd;

MKLDNNPrimitive<Dtype> BatchNormFwd, BatchNormBwd;
shared_ptr<memory> mean_memory, variance_memory;
vector<MKLDNNPrimitive<Dtype> > BatchNormFwd, BatchNormBwd;
vector<shared_ptr<memory> > mean_memory, variance_memory;

shared_ptr<memory> scaleshift_memory, bwd_scaleshift_diff_memory;
shared_ptr<memory> output_memory, bwd_bottom_diff_memory;
vector<shared_ptr<memory> > input_stats, output_stats, top_diff_stats, bottom_diff_stats;

shared_ptr<primitive> input_primitive, bwd_top_diff_primitive;

int32_t num_, width_, height_, channels_;
Dtype eps_, moving_average_fraction_;
bool use_weight_bias_, bias_term_, use_global_stats_;
int num_stats_batches_;
int stats_batch_size_;
shared_ptr<Blob<Dtype> > scaleshift_blob_;
shared_ptr<Blob<Dtype> > scaleshift_acc_;

PERFORMANCE_EVENT_ID_DECL(perf_id_fw_);
PERFORMANCE_EVENT_ID_DECL(perf_id_bw_);
Expand Down
3 changes: 3 additions & 0 deletions include/caffe/mkldnn_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class MKLDNNMemoryDescriptorBase : public PrvMemDescr
if (_prv_memory == NULL) allocate();
return _internal_ptr;
}

shared_ptr<primitive> reorder_usr2prv() { return _reorder_usr2prv.aprimitive; }
shared_ptr<primitive> reorder_prv2usr() { return _reorder_prv2usr.aprimitive; }
shared_ptr<primitive> reorder_extprv2prv() { return _reorder_extprv2prv.aprimitive; }
Expand Down Expand Up @@ -201,6 +202,8 @@ class MKLDNNMemoryDescriptor : public MKLDNNMemoryDescriptorBase<Dtype> {
shared_ptr<memory> create_output_memory(Blob<Dtype> * blob, bool inplace = false);
shared_ptr<primitive> create_input(bool set_prv_ptr);
shared_ptr<memory> create_output_memory(bool inplace = false);
Dtype* get_memory_ptr(long offset = 0);
shared_ptr<memory::desc> get_memory_desc();

void set_mkldnn_primitive(MKLDNNPrimitive<Dtype>& mprimitive) { CHECK(mprimitive.aprimitive); _mkldnn_primitive = mprimitive; }
MKLDNNPrimitive<Dtype>& mkldnn_primitive() { return _mkldnn_primitive; }
Expand Down
4 changes: 0 additions & 4 deletions include/caffe/multinode/multi_sync.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,6 @@ namespace caffe {
}

void on_iter_finished(int layer_id) {
#ifdef FW_OVERLAP_OPT
solver->set_layer_finished_flag(layer_id, false);
#endif

boost::shared_ptr<Layer<Dtype>> &layer = layers[layer_id];
if (layer->layerOp == nullptr) {
return;
Expand Down
5 changes: 5 additions & 0 deletions include/caffe/sgd_solvers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ class SGDSolver : public Solver<Dtype> {
// of gradients/updates and is not needed in snapshots
vector<shared_ptr<Blob<Dtype> > > history_, update_, temp_;

#ifdef ENABLE_SGD_FUSION
//Fuse the Normalize, Regularize, ComputeUpdateValue and Update process together
void SGDFusion(int param_id, Dtype rate);
#endif /* ENABLE_SGD_FUSION */

// loss history for 'plateau' LR policy (should be stored in snapshots)
Dtype minimum_loss_;
int iter_last_event_;
Expand Down
45 changes: 45 additions & 0 deletions include/caffe/util/apply_bn_stats_batch_size.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
All modification made by Intel Corporation: © 2017 Intel Corporation
All contributions by the University of California:
Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
All rights reserved.
All other contributions:
Copyright (c) 2014, 2015, the respective contributors
All rights reserved.
For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef APPLY_BN_STATS_BATCH_SIZE_HPP_
#define APPLY_BN_STATS_BATCH_SIZE_HPP_
#include "caffe/proto/caffe.pb.h"

namespace caffe {
void ApplyBnStatsBatchSize(const NetParameter& param,
NetParameter* param_with_stats_batch_size);
}
#endif // APPLY_BN_STATS_BATCH_SIZE_HPP_
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
net: "models/intel_optimized_models/multinode/resnet_50_64_nodes_8k_batch/train_val.prototxt"
test_iter: 1000
test_interval: 156
test_initialization: false
display: 40
base_lr: 3.2
lr_policy: "multistep"
stepvalue:4680
stepvalue:9360
stepvalue:12480
gamma: 0.1
max_iter: 14075
warmup_iter: 780 # 1281167 / 8192 * 5 epochs
warmup_start_lr: 0.1
momentum: 0.9
weight_decay: 0.0001
snapshot: 156
snapshot_prefix: "models/intel_optimized_models/multinode/resnet_50_64_nodes_8k_batch/resnet_50_64_nodes_8k"
solver_mode: CPU
Loading

0 comments on commit c6e2503

Please sign in to comment.