Skip to content

Commit

Permalink
新增最大人脸检测测试接口
Browse files Browse the repository at this point in the history
  • Loading branch information
moli232777144 committed May 22, 2018
1 parent 6fb7303 commit e1a34ef
Show file tree
Hide file tree
Showing 13 changed files with 530 additions and 9 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
---
2018.5.22更新:

针对部分场景仅需检测最大的单人脸,新增最大人脸检测测试接口,不同场景整体速度波动将会更大,但单人脸场景提升较明显;

---
---
2018.5.16更新:

- 更新win端及android端的ncnn版本;
Expand Down
196 changes: 196 additions & 0 deletions mtcnn_AS/app/src/main/cpp/mtcnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ bool cmpScore(Bbox lsh, Bbox rsh) {
return false;
}

bool cmpArea(Bbox lsh, Bbox rsh) {
if (lsh.area < rsh.area)
return false;
else
return true;
}


//MTCNN::MTCNN(){}
MTCNN::MTCNN(const string &model_path) {
Expand Down Expand Up @@ -108,6 +115,50 @@ void MTCNN::generateBbox(ncnn::Mat score, ncnn::Mat location, std::vector<Bbox>&
}
}
}


void MTCNN::nmsTwoBoxs(vector<Bbox>& boundingBox_, vector<Bbox>& previousBox_, const float overlap_threshold, string modelname)
{
if (boundingBox_.empty()) {
return;
}
sort(boundingBox_.begin(), boundingBox_.end(), cmpScore);
float IOU = 0;
float maxX = 0;
float maxY = 0;
float minX = 0;
float minY = 0;
//std::cout << boundingBox_.size() << " ";
for (std::vector<Bbox>::iterator ity = previousBox_.begin(); ity != previousBox_.end(); ity++) {
for (std::vector<Bbox>::iterator itx = boundingBox_.begin(); itx != boundingBox_.end();) {
int i = itx - boundingBox_.begin();
int j = ity - previousBox_.begin();
maxX = std::max(boundingBox_.at(i).x1, previousBox_.at(j).x1);
maxY = std::max(boundingBox_.at(i).y1, previousBox_.at(j).y1);
minX = std::min(boundingBox_.at(i).x2, previousBox_.at(j).x2);
minY = std::min(boundingBox_.at(i).y2, previousBox_.at(j).y2);
//maxX1 and maxY1 reuse
maxX = ((minX - maxX + 1)>0) ? (minX - maxX + 1) : 0;
maxY = ((minY - maxY + 1)>0) ? (minY - maxY + 1) : 0;
//IOU reuse for the area of two bbox
IOU = maxX * maxY;
if (!modelname.compare("Union"))
IOU = IOU / (boundingBox_.at(i).area + previousBox_.at(j).area - IOU);
else if (!modelname.compare("Min")) {
IOU = IOU / ((boundingBox_.at(i).area < previousBox_.at(j).area) ? boundingBox_.at(i).area : previousBox_.at(j).area);
}
if (IOU > overlap_threshold&&boundingBox_.at(i).score>previousBox_.at(j).score) {
//if (IOU > overlap_threshold) {
itx = boundingBox_.erase(itx);
}
else {
itx++;
}
}
}
//std::cout << boundingBox_.size() << std::endl;
}

void MTCNN::nms(std::vector<Bbox> &boundingBox_, const float overlap_threshold, string modelname){
if(boundingBox_.empty()){
return;
Expand Down Expand Up @@ -201,6 +252,42 @@ void MTCNN::refine(vector<Bbox> &vecBbox, const int &height, const int &width, b
it->area = (it->x2 - it->x1)*(it->y2 - it->y1);
}
}

void MTCNN::extractMaxFace(vector<Bbox>& boundingBox_)
{
if (boundingBox_.empty()) {
return;
}
sort(boundingBox_.begin(), boundingBox_.end(), cmpArea);
for (std::vector<Bbox>::iterator itx = boundingBox_.begin() + 1; itx != boundingBox_.end();) {
itx = boundingBox_.erase(itx);
}
}

void MTCNN::PNet(float scale)
{
//first stage
int hs = (int)ceil(img_h*scale);
int ws = (int)ceil(img_w*scale);
ncnn::Mat in;
resize_bilinear(img, in, ws, hs);
ncnn::Extractor ex = Pnet.create_extractor();
ex.set_light_mode(true);
ex.set_num_threads(num_threads);
ex.input("data", in);
ncnn::Mat score_, location_;
ex.extract("prob1", score_);
ex.extract("conv4-2", location_);
std::vector<Bbox> boundingBox_;

generateBbox(score_, location_, boundingBox_, scale);
nms(boundingBox_, nms_threshold[0]);

firstBbox_.insert(firstBbox_.end(), boundingBox_.begin(), boundingBox_.end());
boundingBox_.clear();
}


void MTCNN::PNet(){
firstBbox_.clear();
float minl = img_w < img_h? img_w: img_h;
Expand Down Expand Up @@ -348,6 +435,115 @@ void MTCNN::detect(ncnn::Mat& img_, std::vector<Bbox>& finalBbox_){

}


void MTCNN::detectMaxFace(ncnn::Mat& img_, std::vector<Bbox>& finalBbox) {
firstPreviousBbox_.clear();
secondPreviousBbox_.clear();
thirdPrevioussBbox_.clear();
firstBbox_.clear();
secondBbox_.clear();
thirdBbox_.clear();

//norm
img = img_;
img_w = img.w;
img_h = img.h;
img.substract_mean_normalize(mean_vals, norm_vals);

#if(TIMEOPEN==1)
double total_time = 0.;
double min_time = DBL_MAX;
double max_time = 0.0;
double temp_time = 0.0;
unsigned long time_0, time_1;

for(int i =0 ;i < count; i++) {
time_0 = get_current_time();
#endif

//pyramid size
float minl = img_w < img_h ? img_w : img_h;
float m = (float)MIN_DET_SIZE / minsize;
minl *= m;
float factor = pre_facetor;
vector<float> scales_;
while (minl>MIN_DET_SIZE) {
scales_.push_back(m);
minl *= factor;
m = m*factor;
}
sort(scales_.begin(), scales_.end());
//printf("scales_.size()=%d\n", scales_.size());

//Change the sampling process.
for (size_t i = 0; i < scales_.size(); i++)
{
//first stage
PNet(scales_[i]);
nms(firstBbox_, nms_threshold[0]);
nmsTwoBoxs(firstBbox_, firstPreviousBbox_, nms_threshold[0]);
if (firstBbox_.size() < 1) {
firstBbox_.clear();
continue;
}
firstPreviousBbox_.insert(firstPreviousBbox_.end(), firstBbox_.begin(), firstBbox_.end());
refine(firstBbox_, img_h, img_w, true);
//printf("firstBbox_.size()=%d\n", firstBbox_.size());

//second stage
RNet();
nms(secondBbox_, nms_threshold[1]);
nmsTwoBoxs(secondBbox_, secondPreviousBbox_, nms_threshold[0]);
secondPreviousBbox_.insert(secondPreviousBbox_.end(), secondBbox_.begin(), secondBbox_.end());
if (secondBbox_.size() < 1) {
firstBbox_.clear();
secondBbox_.clear();
continue;
}
refine(secondBbox_, img_h, img_w, true);
//printf("secondBbox_.size()=%d\n", secondBbox_.size());

//third stage
ONet();
//printf("thirdBbox_.size()=%d\n", thirdBbox_.size());
if (thirdBbox_.size() < 1) {
firstBbox_.clear();
secondBbox_.clear();
thirdBbox_.clear();
continue;
}
refine(thirdBbox_, img_h, img_w, true);
nms(thirdBbox_, nms_threshold[2], "Min");

if (thirdBbox_.size() > 0) {
extractMaxFace(thirdBbox_);
finalBbox = thirdBbox_;//if largest face size is similar,.
break;
}
}

//printf("firstPreviousBbox_.size()=%d\n", firstPreviousBbox_.size());
//printf("secondPreviousBbox_.size()=%d\n", secondPreviousBbox_.size());

#if(TIMEOPEN==1)
time_1 = get_current_time();
temp_time = ((time_1 - time_0)/1000.0);
if(temp_time < min_time)
{
min_time = temp_time;
}
if(temp_time > max_time)
{
max_time = temp_time;
}
total_time += temp_time;

LOGD("iter %d/%d cost: %.3f ms\n", i+1, count, temp_time);
}
LOGD("Time cost:Max %.2fms,Min %.2fms,Avg %.2fms\n", max_time,min_time,total_time/count);
#endif
}

//void MTCNN::detection(const cv::Mat& img, std::vector<cv::Rect>& rectangles){
// ncnn::Mat ncnn_img = ncnn::Mat::from_pixels(img.data, ncnn::Mat::PIXEL_BGR2RGB, img.cols, img.rows);
// std::vector<Bbox> finalBbox;
Expand Down
12 changes: 10 additions & 2 deletions mtcnn_AS/app/src/main/cpp/mtcnn.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include <algorithm>
#include <map>
#include <iostream>
#include <math.h>

using namespace std;
//using namespace cv;
struct Bbox
Expand Down Expand Up @@ -40,12 +42,17 @@ class MTCNN {
void SetTimeCount(int timeCount);

void detect(ncnn::Mat& img_, std::vector<Bbox>& finalBbox);
void detectMaxFace(ncnn::Mat& img_, std::vector<Bbox>& finalBbox);
// void detection(const cv::Mat& img, std::vector<cv::Rect>& rectangles);
private:
void generateBbox(ncnn::Mat score, ncnn::Mat location, vector<Bbox>& boundingBox_, float scale);
void nms(vector<Bbox> &boundingBox_, const float overlap_threshold, string modelname="Union");
void nmsTwoBoxs(vector<Bbox> &boundingBox_, vector<Bbox> &previousBox_, const float overlap_threshold, string modelname = "Union");
void nms(vector<Bbox> &boundingBox_, const float overlap_threshold, string modelname="Union");
void refine(vector<Bbox> &vecBbox, const int &height, const int &width, bool square);
void PNet();
void extractMaxFace(vector<Bbox> &boundingBox_);

void PNet(float scale);
void PNet();
void RNet();
void ONet();
ncnn::Net Pnet, Rnet, Onet;
Expand All @@ -56,6 +63,7 @@ class MTCNN {
const float norm_vals[3] = {0.0078125, 0.0078125, 0.0078125};
const int MIN_DET_SIZE = 12;
std::vector<Bbox> firstBbox_, secondBbox_,thirdBbox_;
std::vector<Bbox> firstPreviousBbox_, secondPreviousBbox_, thirdPrevioussBbox_;
int img_w, img_h;

private://部分可调参数
Expand Down
81 changes: 81 additions & 0 deletions mtcnn_AS/app/src/main/cpp/mtcnn_jni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,87 @@ Java_com_mtcnn_1as_MTCNN_FaceDetect(JNIEnv *env, jobject instance, jbyteArray im
return tFaceInfo;
}

JNIEXPORT jintArray JNICALL
Java_com_mtcnn_1as_MTCNN_MaxFaceDetect(JNIEnv *env, jobject instance, jbyteArray imageDate_,
jint imageWidth, jint imageHeight, jint imageChannel) {
// LOGD("JNI开始检测人脸");
if(!detection_sdk_init_ok){
LOGD("人脸检测MTCNN模型SDK未初始化,直接返回空");
return NULL;
}

int tImageDateLen = env->GetArrayLength(imageDate_);
if(imageChannel == tImageDateLen / imageWidth / imageHeight){
LOGD("数据宽=%d,高=%d,通道=%d",imageWidth,imageHeight,imageChannel);
}
else{
LOGD("数据长宽高通道不匹配,直接返回空");
return NULL;
}

jbyte *imageDate = env->GetByteArrayElements(imageDate_, NULL);
if (NULL == imageDate){
LOGD("导入数据为空,直接返回空");
env->ReleaseByteArrayElements(imageDate_, imageDate, 0);
return NULL;
}

if(imageWidth<20||imageHeight<20){
LOGD("导入数据的宽和高小于20,直接返回空");
env->ReleaseByteArrayElements(imageDate_, imageDate, 0);
return NULL;
}

//TODO 通道需测试
if(3 == imageChannel || 4 == imageChannel){
//图像通道数只能是3或4;
}else{
LOGD("图像通道数只能是3或4,直接返回空");
env->ReleaseByteArrayElements(imageDate_, imageDate, 0);
return NULL;
}

//int32_t minFaceSize=40;
//mtcnn->SetMinFace(minFaceSize);

unsigned char *faceImageCharDate = (unsigned char*)imageDate;
ncnn::Mat ncnn_img;
if(imageChannel==3) {
ncnn_img = ncnn::Mat::from_pixels(faceImageCharDate, ncnn::Mat::PIXEL_BGR2RGB,
imageWidth, imageHeight);
}else{
ncnn_img = ncnn::Mat::from_pixels(faceImageCharDate, ncnn::Mat::PIXEL_RGBA2RGB, imageWidth, imageHeight);
}

std::vector<Bbox> finalBbox;
mtcnn->detectMaxFace(ncnn_img, finalBbox);

int32_t num_face = static_cast<int32_t>(finalBbox.size());
LOGD("检测到的人脸数目:%d\n", num_face);

int out_size = 1+num_face*14;
// LOGD("内部人脸检测完成,开始导出数据");
int *faceInfo = new int[out_size];
faceInfo[0] = num_face;
for(int i=0;i<num_face;i++){
faceInfo[14*i+1] = finalBbox[i].x1;//left
faceInfo[14*i+2] = finalBbox[i].y1;//top
faceInfo[14*i+3] = finalBbox[i].x2;//right
faceInfo[14*i+4] = finalBbox[i].y2;//bottom
for (int j =0;j<10;j++){
faceInfo[14*i+5+j]=static_cast<int>(finalBbox[i].ppoint[j]);
}
}

jintArray tFaceInfo = env->NewIntArray(out_size);
env->SetIntArrayRegion(tFaceInfo,0,out_size,faceInfo);
// LOGD("内部人脸检测完成,导出数据成功");
delete[] faceInfo;
env->ReleaseByteArrayElements(imageDate_, imageDate, 0);
return tFaceInfo;
}


JNIEXPORT jboolean JNICALL
Java_com_mtcnn_1as_MTCNN_FaceDetectionModelUnInit(JNIEnv *env, jobject instance) {
if(!detection_sdk_init_ok){
Expand Down
2 changes: 2 additions & 0 deletions mtcnn_AS/app/src/main/java/com/mtcnn_as/MTCNN.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ public class MTCNN{
//人脸检测
public native int[] FaceDetect(byte[] imageDate, int imageWidth , int imageHeight, int imageChannel);

public native int[] MaxFaceDetect(byte[] imageDate, int imageWidth , int imageHeight, int imageChannel);

//人脸检测模型反初始化
public native boolean FaceDetectionModelUnInit();

Expand Down
Loading

0 comments on commit e1a34ef

Please sign in to comment.