From e1a34ef317dd6925f13753f45ebda89acedf67b2 Mon Sep 17 00:00:00 2001 From: moli232777144 <232777144@qq.com> Date: Tue, 22 May 2018 17:41:31 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E6=9C=80=E5=A4=A7=E4=BA=BA?= =?UTF-8?q?=E8=84=B8=E6=A3=80=E6=B5=8B=E6=B5=8B=E8=AF=95=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 6 + mtcnn_AS/app/src/main/cpp/mtcnn.cpp | 196 ++++++++++++++++++ mtcnn_AS/app/src/main/cpp/mtcnn.h | 12 +- mtcnn_AS/app/src/main/cpp/mtcnn_jni.cpp | 81 ++++++++ .../app/src/main/java/com/mtcnn_as/MTCNN.java | 2 + .../main/java/com/mtcnn_as/MainActivity.java | 31 ++- .../app/src/main/res/drawable/toggle_btn.xml | 6 + .../main/res/drawable/toggle_btn_checked.png | Bin 0 -> 1272 bytes .../res/drawable/toggle_btn_unchecked.png | Bin 0 -> 1265 bytes mtcnn_AS/app/src/main/res/layout/main.xml | 13 +- src/main.cpp | 10 + src/mtcnn.cpp | 173 +++++++++++++++- src/mtcnn.h | 9 +- 13 files changed, 530 insertions(+), 9 deletions(-) create mode 100644 mtcnn_AS/app/src/main/res/drawable/toggle_btn.xml create mode 100644 mtcnn_AS/app/src/main/res/drawable/toggle_btn_checked.png create mode 100644 mtcnn_AS/app/src/main/res/drawable/toggle_btn_unchecked.png diff --git a/README.md b/README.md index c65a1f3..88a1e29 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,10 @@ --- +2018.5.22更新: + +针对部分场景仅需检测最大的单人脸,新增最大人脸检测测试接口,不同场景整体速度波动将会更大,但单人脸场景提升较明显; + +--- +--- 2018.5.16更新: - 更新win端及android端的ncnn版本; diff --git a/mtcnn_AS/app/src/main/cpp/mtcnn.cpp b/mtcnn_AS/app/src/main/cpp/mtcnn.cpp index c6f8213..35222bd 100644 --- a/mtcnn_AS/app/src/main/cpp/mtcnn.cpp +++ b/mtcnn_AS/app/src/main/cpp/mtcnn.cpp @@ -27,6 +27,13 @@ bool cmpScore(Bbox lsh, Bbox rsh) { return false; } +bool cmpArea(Bbox lsh, Bbox rsh) { + if (lsh.area < rsh.area) + return false; + else + return true; +} + //MTCNN::MTCNN(){} MTCNN::MTCNN(const string &model_path) { @@ -108,6 +115,50 @@ void MTCNN::generateBbox(ncnn::Mat score, ncnn::Mat location, std::vector& } } } + + +void MTCNN::nmsTwoBoxs(vector& boundingBox_, vector& previousBox_, const float overlap_threshold, string modelname) +{ + if (boundingBox_.empty()) { + return; + } + sort(boundingBox_.begin(), boundingBox_.end(), cmpScore); + float IOU = 0; + float maxX = 0; + float maxY = 0; + float minX = 0; + float minY = 0; + //std::cout << boundingBox_.size() << " "; + for (std::vector::iterator ity = previousBox_.begin(); ity != previousBox_.end(); ity++) { + for (std::vector::iterator itx = boundingBox_.begin(); itx != boundingBox_.end();) { + int i = itx - boundingBox_.begin(); + int j = ity - previousBox_.begin(); + maxX = std::max(boundingBox_.at(i).x1, previousBox_.at(j).x1); + maxY = std::max(boundingBox_.at(i).y1, previousBox_.at(j).y1); + minX = std::min(boundingBox_.at(i).x2, previousBox_.at(j).x2); + minY = std::min(boundingBox_.at(i).y2, previousBox_.at(j).y2); + //maxX1 and maxY1 reuse + maxX = ((minX - maxX + 1)>0) ? (minX - maxX + 1) : 0; + maxY = ((minY - maxY + 1)>0) ? (minY - maxY + 1) : 0; + //IOU reuse for the area of two bbox + IOU = maxX * maxY; + if (!modelname.compare("Union")) + IOU = IOU / (boundingBox_.at(i).area + previousBox_.at(j).area - IOU); + else if (!modelname.compare("Min")) { + IOU = IOU / ((boundingBox_.at(i).area < previousBox_.at(j).area) ? boundingBox_.at(i).area : previousBox_.at(j).area); + } + if (IOU > overlap_threshold&&boundingBox_.at(i).score>previousBox_.at(j).score) { + //if (IOU > overlap_threshold) { + itx = boundingBox_.erase(itx); + } + else { + itx++; + } + } + } + //std::cout << boundingBox_.size() << std::endl; +} + void MTCNN::nms(std::vector &boundingBox_, const float overlap_threshold, string modelname){ if(boundingBox_.empty()){ return; @@ -201,6 +252,42 @@ void MTCNN::refine(vector &vecBbox, const int &height, const int &width, b it->area = (it->x2 - it->x1)*(it->y2 - it->y1); } } + +void MTCNN::extractMaxFace(vector& boundingBox_) +{ + if (boundingBox_.empty()) { + return; + } + sort(boundingBox_.begin(), boundingBox_.end(), cmpArea); + for (std::vector::iterator itx = boundingBox_.begin() + 1; itx != boundingBox_.end();) { + itx = boundingBox_.erase(itx); + } +} + +void MTCNN::PNet(float scale) +{ + //first stage + int hs = (int)ceil(img_h*scale); + int ws = (int)ceil(img_w*scale); + ncnn::Mat in; + resize_bilinear(img, in, ws, hs); + ncnn::Extractor ex = Pnet.create_extractor(); + ex.set_light_mode(true); + ex.set_num_threads(num_threads); + ex.input("data", in); + ncnn::Mat score_, location_; + ex.extract("prob1", score_); + ex.extract("conv4-2", location_); + std::vector boundingBox_; + + generateBbox(score_, location_, boundingBox_, scale); + nms(boundingBox_, nms_threshold[0]); + + firstBbox_.insert(firstBbox_.end(), boundingBox_.begin(), boundingBox_.end()); + boundingBox_.clear(); +} + + void MTCNN::PNet(){ firstBbox_.clear(); float minl = img_w < img_h? img_w: img_h; @@ -348,6 +435,115 @@ void MTCNN::detect(ncnn::Mat& img_, std::vector& finalBbox_){ } + +void MTCNN::detectMaxFace(ncnn::Mat& img_, std::vector& finalBbox) { + firstPreviousBbox_.clear(); + secondPreviousBbox_.clear(); + thirdPrevioussBbox_.clear(); + firstBbox_.clear(); + secondBbox_.clear(); + thirdBbox_.clear(); + + //norm + img = img_; + img_w = img.w; + img_h = img.h; + img.substract_mean_normalize(mean_vals, norm_vals); + +#if(TIMEOPEN==1) + double total_time = 0.; + double min_time = DBL_MAX; + double max_time = 0.0; + double temp_time = 0.0; + unsigned long time_0, time_1; + + for(int i =0 ;i < count; i++) { + time_0 = get_current_time(); +#endif + + //pyramid size + float minl = img_w < img_h ? img_w : img_h; + float m = (float)MIN_DET_SIZE / minsize; + minl *= m; + float factor = pre_facetor; + vector scales_; + while (minl>MIN_DET_SIZE) { + scales_.push_back(m); + minl *= factor; + m = m*factor; + } + sort(scales_.begin(), scales_.end()); + //printf("scales_.size()=%d\n", scales_.size()); + + //Change the sampling process. + for (size_t i = 0; i < scales_.size(); i++) + { + //first stage + PNet(scales_[i]); + nms(firstBbox_, nms_threshold[0]); + nmsTwoBoxs(firstBbox_, firstPreviousBbox_, nms_threshold[0]); + if (firstBbox_.size() < 1) { + firstBbox_.clear(); + continue; + } + firstPreviousBbox_.insert(firstPreviousBbox_.end(), firstBbox_.begin(), firstBbox_.end()); + refine(firstBbox_, img_h, img_w, true); + //printf("firstBbox_.size()=%d\n", firstBbox_.size()); + + //second stage + RNet(); + nms(secondBbox_, nms_threshold[1]); + nmsTwoBoxs(secondBbox_, secondPreviousBbox_, nms_threshold[0]); + secondPreviousBbox_.insert(secondPreviousBbox_.end(), secondBbox_.begin(), secondBbox_.end()); + if (secondBbox_.size() < 1) { + firstBbox_.clear(); + secondBbox_.clear(); + continue; + } + refine(secondBbox_, img_h, img_w, true); + //printf("secondBbox_.size()=%d\n", secondBbox_.size()); + + //third stage + ONet(); + //printf("thirdBbox_.size()=%d\n", thirdBbox_.size()); + if (thirdBbox_.size() < 1) { + firstBbox_.clear(); + secondBbox_.clear(); + thirdBbox_.clear(); + continue; + } + refine(thirdBbox_, img_h, img_w, true); + nms(thirdBbox_, nms_threshold[2], "Min"); + + if (thirdBbox_.size() > 0) { + extractMaxFace(thirdBbox_); + finalBbox = thirdBbox_;//if largest face size is similar,. + break; + } + } + + //printf("firstPreviousBbox_.size()=%d\n", firstPreviousBbox_.size()); + //printf("secondPreviousBbox_.size()=%d\n", secondPreviousBbox_.size()); + +#if(TIMEOPEN==1) + time_1 = get_current_time(); + temp_time = ((time_1 - time_0)/1000.0); + if(temp_time < min_time) + { + min_time = temp_time; + } + if(temp_time > max_time) + { + max_time = temp_time; + } + total_time += temp_time; + + LOGD("iter %d/%d cost: %.3f ms\n", i+1, count, temp_time); + } + LOGD("Time cost:Max %.2fms,Min %.2fms,Avg %.2fms\n", max_time,min_time,total_time/count); +#endif +} + //void MTCNN::detection(const cv::Mat& img, std::vector& rectangles){ // ncnn::Mat ncnn_img = ncnn::Mat::from_pixels(img.data, ncnn::Mat::PIXEL_BGR2RGB, img.cols, img.rows); // std::vector finalBbox; diff --git a/mtcnn_AS/app/src/main/cpp/mtcnn.h b/mtcnn_AS/app/src/main/cpp/mtcnn.h index 65bee8d..de23d73 100644 --- a/mtcnn_AS/app/src/main/cpp/mtcnn.h +++ b/mtcnn_AS/app/src/main/cpp/mtcnn.h @@ -13,6 +13,8 @@ #include #include #include +#include + using namespace std; //using namespace cv; struct Bbox @@ -40,12 +42,17 @@ class MTCNN { void SetTimeCount(int timeCount); void detect(ncnn::Mat& img_, std::vector& finalBbox); + void detectMaxFace(ncnn::Mat& img_, std::vector& finalBbox); // void detection(const cv::Mat& img, std::vector& rectangles); private: void generateBbox(ncnn::Mat score, ncnn::Mat location, vector& boundingBox_, float scale); - void nms(vector &boundingBox_, const float overlap_threshold, string modelname="Union"); + void nmsTwoBoxs(vector &boundingBox_, vector &previousBox_, const float overlap_threshold, string modelname = "Union"); + void nms(vector &boundingBox_, const float overlap_threshold, string modelname="Union"); void refine(vector &vecBbox, const int &height, const int &width, bool square); - void PNet(); + void extractMaxFace(vector &boundingBox_); + + void PNet(float scale); + void PNet(); void RNet(); void ONet(); ncnn::Net Pnet, Rnet, Onet; @@ -56,6 +63,7 @@ class MTCNN { const float norm_vals[3] = {0.0078125, 0.0078125, 0.0078125}; const int MIN_DET_SIZE = 12; std::vector firstBbox_, secondBbox_,thirdBbox_; + std::vector firstPreviousBbox_, secondPreviousBbox_, thirdPrevioussBbox_; int img_w, img_h; private://部分可调参数 diff --git a/mtcnn_AS/app/src/main/cpp/mtcnn_jni.cpp b/mtcnn_AS/app/src/main/cpp/mtcnn_jni.cpp index ef04080..b35027b 100644 --- a/mtcnn_AS/app/src/main/cpp/mtcnn_jni.cpp +++ b/mtcnn_AS/app/src/main/cpp/mtcnn_jni.cpp @@ -141,6 +141,87 @@ Java_com_mtcnn_1as_MTCNN_FaceDetect(JNIEnv *env, jobject instance, jbyteArray im return tFaceInfo; } +JNIEXPORT jintArray JNICALL +Java_com_mtcnn_1as_MTCNN_MaxFaceDetect(JNIEnv *env, jobject instance, jbyteArray imageDate_, + jint imageWidth, jint imageHeight, jint imageChannel) { + // LOGD("JNI开始检测人脸"); + if(!detection_sdk_init_ok){ + LOGD("人脸检测MTCNN模型SDK未初始化,直接返回空"); + return NULL; + } + + int tImageDateLen = env->GetArrayLength(imageDate_); + if(imageChannel == tImageDateLen / imageWidth / imageHeight){ + LOGD("数据宽=%d,高=%d,通道=%d",imageWidth,imageHeight,imageChannel); + } + else{ + LOGD("数据长宽高通道不匹配,直接返回空"); + return NULL; + } + + jbyte *imageDate = env->GetByteArrayElements(imageDate_, NULL); + if (NULL == imageDate){ + LOGD("导入数据为空,直接返回空"); + env->ReleaseByteArrayElements(imageDate_, imageDate, 0); + return NULL; + } + + if(imageWidth<20||imageHeight<20){ + LOGD("导入数据的宽和高小于20,直接返回空"); + env->ReleaseByteArrayElements(imageDate_, imageDate, 0); + return NULL; + } + + //TODO 通道需测试 + if(3 == imageChannel || 4 == imageChannel){ + //图像通道数只能是3或4; + }else{ + LOGD("图像通道数只能是3或4,直接返回空"); + env->ReleaseByteArrayElements(imageDate_, imageDate, 0); + return NULL; + } + + //int32_t minFaceSize=40; + //mtcnn->SetMinFace(minFaceSize); + + unsigned char *faceImageCharDate = (unsigned char*)imageDate; + ncnn::Mat ncnn_img; + if(imageChannel==3) { + ncnn_img = ncnn::Mat::from_pixels(faceImageCharDate, ncnn::Mat::PIXEL_BGR2RGB, + imageWidth, imageHeight); + }else{ + ncnn_img = ncnn::Mat::from_pixels(faceImageCharDate, ncnn::Mat::PIXEL_RGBA2RGB, imageWidth, imageHeight); + } + + std::vector finalBbox; + mtcnn->detectMaxFace(ncnn_img, finalBbox); + + int32_t num_face = static_cast(finalBbox.size()); + LOGD("检测到的人脸数目:%d\n", num_face); + + int out_size = 1+num_face*14; + // LOGD("内部人脸检测完成,开始导出数据"); + int *faceInfo = new int[out_size]; + faceInfo[0] = num_face; + for(int i=0;i(finalBbox[i].ppoint[j]); + } + } + + jintArray tFaceInfo = env->NewIntArray(out_size); + env->SetIntArrayRegion(tFaceInfo,0,out_size,faceInfo); + // LOGD("内部人脸检测完成,导出数据成功"); + delete[] faceInfo; + env->ReleaseByteArrayElements(imageDate_, imageDate, 0); + return tFaceInfo; +} + + JNIEXPORT jboolean JNICALL Java_com_mtcnn_1as_MTCNN_FaceDetectionModelUnInit(JNIEnv *env, jobject instance) { if(!detection_sdk_init_ok){ diff --git a/mtcnn_AS/app/src/main/java/com/mtcnn_as/MTCNN.java b/mtcnn_AS/app/src/main/java/com/mtcnn_as/MTCNN.java index 630e959..0a4a371 100644 --- a/mtcnn_AS/app/src/main/java/com/mtcnn_as/MTCNN.java +++ b/mtcnn_AS/app/src/main/java/com/mtcnn_as/MTCNN.java @@ -11,6 +11,8 @@ public class MTCNN{ //人脸检测 public native int[] FaceDetect(byte[] imageDate, int imageWidth , int imageHeight, int imageChannel); + public native int[] MaxFaceDetect(byte[] imageDate, int imageWidth , int imageHeight, int imageChannel); + //人脸检测模型反初始化 public native boolean FaceDetectionModelUnInit(); diff --git a/mtcnn_AS/app/src/main/java/com/mtcnn_as/MainActivity.java b/mtcnn_AS/app/src/main/java/com/mtcnn_as/MainActivity.java index 60edb95..3337beb 100644 --- a/mtcnn_AS/app/src/main/java/com/mtcnn_as/MainActivity.java +++ b/mtcnn_AS/app/src/main/java/com/mtcnn_as/MainActivity.java @@ -17,8 +17,11 @@ import android.util.Log; import android.view.View; import android.widget.Button; +import android.widget.CompoundButton; import android.widget.ImageView; import android.widget.TextView; +import android.widget.Toast; +import android.widget.ToggleButton; import com.google.android.gms.appindexing.AppIndex; import com.google.android.gms.common.api.GoogleApiClient; @@ -44,6 +47,7 @@ public class MainActivity extends Activity { private int testTimeCount = 10; private int threadsNumber = 4; + private boolean maxFaceSetting = false; private MTCNN mtcnn = new MTCNN(); /** @@ -104,7 +108,22 @@ public void onCreate(Bundle savedInstanceState) { etTestTimeCount = (AppCompatEditText) findViewById(R.id.etTestTimeCount); etThreadsNumber = (AppCompatEditText) findViewById(R.id.etThreadsNumber); - + ToggleButton mToggleBt = (ToggleButton) findViewById(R.id.toggle_bt); + mToggleBt.setOnCheckedChangeListener(new CompoundButton.OnCheckedChangeListener() { + @Override + public void onCheckedChanged(CompoundButton buttonView, + boolean isChecked) { + if (isChecked) { + Toast.makeText(getApplication(), "打开只检测最大人脸功能", Toast.LENGTH_SHORT) + .show(); + maxFaceSetting = true; + } else { + Toast.makeText(getApplication(), "关闭只检测最大人脸功能", Toast.LENGTH_SHORT) + .show(); + maxFaceSetting = false; + } + } + }); Button buttonImage = (Button) findViewById(R.id.buttonImage); buttonImage.setOnClickListener(new View.OnClickListener() { @@ -144,7 +163,15 @@ public void onClick(View arg0) { byte[] imageDate = getPixelsRGBA(yourSelectedImage); long timeDetectFace = System.currentTimeMillis(); - int faceInfo[]=mtcnn.FaceDetect(imageDate,width,height,4); + int faceInfo[] = null; + if(!maxFaceSetting) { + faceInfo = mtcnn.FaceDetect(imageDate, width, height, 4); + Log.i(TAG, "检测所有人脸"); + } + else{ + faceInfo = mtcnn.MaxFaceDetect(imageDate, width, height, 4); + Log.i(TAG, "检测最大人脸"); + } timeDetectFace = System.currentTimeMillis() - timeDetectFace; Log.i(TAG, "人脸平均检测时间:"+timeDetectFace/testTimeCount); diff --git a/mtcnn_AS/app/src/main/res/drawable/toggle_btn.xml b/mtcnn_AS/app/src/main/res/drawable/toggle_btn.xml new file mode 100644 index 0000000..f77b24b --- /dev/null +++ b/mtcnn_AS/app/src/main/res/drawable/toggle_btn.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/mtcnn_AS/app/src/main/res/drawable/toggle_btn_checked.png b/mtcnn_AS/app/src/main/res/drawable/toggle_btn_checked.png new file mode 100644 index 0000000000000000000000000000000000000000..f84f02208e9e3a6b750e6fe6710f4b370206cdbf GIT binary patch literal 1272 zcmVK{-ldV)jRKcPY( zX#N?XQ1y<}6R7_};F4ZAa6nZLNTecC3Zj?tLjqJ$kuH&H`ToCxF9Jp2c=v5?x>ecm+HG zeh2OX_oW=1XQ)@~7>C=&8uo5?M2my)pnK6J45tj=9R~$lo@~7je7TcjF}0;WpkA za*d$hd^uZ6<48YrtaLOam)1LYT`;?@vm+jtj$1Y8Eb!Q}!+g-%G>WvSDX zHlfkRNvhLgXaseK&w&?4*kMf8f$#9swY;kGf)750FK}t(pwU->uNehr=>L{7;1eNhjJnj-vTGF@L3P^@ zo!>)+${T$Z^rqBp(4QWk!sQFAz)PWrQg+H1r;h(*ZdPan)lmnNIbcl+!$__F*MNgk zUq`)(OEN}vxP+gBwe7(neeO0{o#wzr>rs`@i{LXx`CpAib-uT`x|a8(ZZqI}) zk9%rVc~&y~`Z-JwHOSo!-hVI8BAX3bcQN3lymxyOC4~0h0(eWl>*~nZ4F-S*l=pxK z0k0N*^s87vz0L4RO&%mI;E|R(ZAXTiN40 zSW#VHX1G_L}+11#X+g?EIxsXNrhLWjpcEnBIPYDz|Iy!?TAj=O!#65cD@{5 z!YzUAa(D@MYzC9L9R2}=*bR~EMxnABBG)TaK6gW;=$6JlTb1%wx4wX^ z^Y3TC^T1MrjPMvZNBpMfIcZa+@~YEw(bei+dRu%AHK&k+HC8?>z7$ z?m|-4BOcOIkxln6qAp489-B=95{m$7L2!Xs;sd}6PH+f(gcSk- z`7%6L+;KR;?pFw0!i56|gg77pA`dkSSlv3q)| z>tFq~02 zV61@7yyyXoz_*llEu15|S(RlqR%=^pdK@?m90Lvl`+)6|o@6b&AiBT;@B#Q2cnI7D z?n^qjmZ7a;)o9{LwSlX^D=DB(@=|PTi>-;+UKkBrlLFWcT*oy%EpP=`!gYrgk#0$% zX1E5@!xv30lD-l9OaY~a0j7T4S?XwEZx8n zGsXcU$0zmDIB;7Oe5W2Y9(IE)E#kZUzAcICic zof?qW0WKu+Q6#iY2jWpq=mcDUB0nt|mntN`jBn!4fb+m{T&%HEWS^v+mpncBCUlw= z&^%;4vE{dVgRPPWVmaHDD2wH_LCN(|p*$lt-&n9lchEuiT=$vYnCI5oflR;BvLH>y`IvkJ}N9gF9Jl6spzliQMCn(no0CO2U^?!)g-bG_Dz>xy>Xx_ttF0I^f zY}5R634Z-t_MV;&a(9FB&&e~-W`^dS4s@yRa1o^M+W=oEzwhgGv_$^~2EZeVw}3~1 zt~S2*)28%laPm9#NyF`b!h)+_+AWS4UPi$Bur0=elg)P7~ zW6NjYwfxUhF`D;{=+G{(P0ARAZwW54%3I)S(ZDs+7BDGkIu+iNXOTfrzg1mr{8qW` zQp)vxf?Ijn#Im499}CLADbGCH>zY@;Rb5AevIyQk2Nw+3;GitAfsoGI8pK?acZ&>P zoQ(>Eo(5&9^aVmwB@<6`LKq5<;RCcRq41o1V|AP!K;Fs})O#`1$)KadDi=dg|K;!u zZV7Cc!!x*JIViRqzJx(tHbkx%LZxkp)NM=bZ~WYyXG?60d^?D|<<>V~>ioC|cpG@C zDkuB{oFx0E=t=oz7 + + android:text="检测" /> + finalBbox; +#if(MAXFACEOPEN==1) + mtcnn.detectMaxFace(ncnn_img, finalBbox); +#else mtcnn.detect(ncnn_img, finalBbox); +#endif const int num_box = finalBbox.size(); std::vector bbox; bbox.resize(num_box); @@ -62,7 +67,12 @@ int test_picture(){ image = cv::imread("../sample.jpg"); ncnn::Mat ncnn_img = ncnn::Mat::from_pixels(image.data, ncnn::Mat::PIXEL_BGR2RGB, image.cols, image.rows); std::vector finalBbox; + +#if(MAXFACEOPEN==1) + mtcnn.detectMaxFace(ncnn_img, finalBbox); +#else mtcnn.detect(ncnn_img, finalBbox); +#endif const int num_box = finalBbox.size(); std::vector bbox; diff --git a/src/mtcnn.cpp b/src/mtcnn.cpp index 5808e15..cf7184a 100644 --- a/src/mtcnn.cpp +++ b/src/mtcnn.cpp @@ -15,6 +15,12 @@ bool cmpScore(Bbox lsh, Bbox rsh) { return false; } +bool cmpArea(Bbox lsh, Bbox rsh) { + if (lsh.area < rsh.area) + return false; + else + return true; +} //MTCNN::MTCNN(){} MTCNN::MTCNN(const string &model_path) { @@ -85,6 +91,49 @@ void MTCNN::generateBbox(ncnn::Mat score, ncnn::Mat location, std::vector& } } } + +void MTCNN::nmsTwoBoxs(vector& boundingBox_, vector& previousBox_, const float overlap_threshold, string modelname) +{ + if (boundingBox_.empty()) { + return; + } + sort(boundingBox_.begin(), boundingBox_.end(), cmpScore); + float IOU = 0; + float maxX = 0; + float maxY = 0; + float minX = 0; + float minY = 0; + //std::cout << boundingBox_.size() << " "; + for (std::vector::iterator ity = previousBox_.begin(); ity != previousBox_.end(); ity++) { + for (std::vector::iterator itx = boundingBox_.begin(); itx != boundingBox_.end();) { + int i = itx - boundingBox_.begin(); + int j = ity - previousBox_.begin(); + maxX = std::max(boundingBox_.at(i).x1, previousBox_.at(j).x1); + maxY = std::max(boundingBox_.at(i).y1, previousBox_.at(j).y1); + minX = std::min(boundingBox_.at(i).x2, previousBox_.at(j).x2); + minY = std::min(boundingBox_.at(i).y2, previousBox_.at(j).y2); + //maxX1 and maxY1 reuse + maxX = ((minX - maxX + 1)>0) ? (minX - maxX + 1) : 0; + maxY = ((minY - maxY + 1)>0) ? (minY - maxY + 1) : 0; + //IOU reuse for the area of two bbox + IOU = maxX * maxY; + if (!modelname.compare("Union")) + IOU = IOU / (boundingBox_.at(i).area + previousBox_.at(j).area - IOU); + else if (!modelname.compare("Min")) { + IOU = IOU / ((boundingBox_.at(i).area < previousBox_.at(j).area) ? boundingBox_.at(i).area : previousBox_.at(j).area); + } + if (IOU > overlap_threshold&&boundingBox_.at(i).score>previousBox_.at(j).score) { + //if (IOU > overlap_threshold) { + itx = boundingBox_.erase(itx); + } + else { + itx++; + } + } + } + //std::cout << boundingBox_.size() << std::endl; +} + void MTCNN::nms(std::vector &boundingBox_, const float overlap_threshold, string modelname){ if(boundingBox_.empty()){ return; @@ -178,6 +227,41 @@ void MTCNN::refine(vector &vecBbox, const int &height, const int &width, b it->area = (it->x2 - it->x1)*(it->y2 - it->y1); } } + +void MTCNN::extractMaxFace(vector& boundingBox_) +{ + if (boundingBox_.empty()) { + return; + } + sort(boundingBox_.begin(), boundingBox_.end(), cmpArea); + for (std::vector::iterator itx = boundingBox_.begin() + 1; itx != boundingBox_.end();) { + itx = boundingBox_.erase(itx); + } +} + +void MTCNN::PNet(float scale) +{ + //first stage + int hs = (int)ceil(img_h*scale); + int ws = (int)ceil(img_w*scale); + ncnn::Mat in; + resize_bilinear(img, in, ws, hs); + ncnn::Extractor ex = Pnet.create_extractor(); + ex.set_light_mode(true); + //sex.set_num_threads(4); + ex.input("data", in); + ncnn::Mat score_, location_; + ex.extract("prob1", score_); + ex.extract("conv4-2", location_); + std::vector boundingBox_; + + generateBbox(score_, location_, boundingBox_, scale); + nms(boundingBox_, nms_threshold[0]); + + firstBbox_.insert(firstBbox_.end(), boundingBox_.begin(), boundingBox_.end()); + boundingBox_.clear(); +} + void MTCNN::PNet(){ firstBbox_.clear(); float minl = img_w < img_h? img_w: img_h; @@ -218,6 +302,7 @@ void MTCNN::RNet(){ ncnn::Mat in; resize_bilinear(tempIm, in, 24, 24); ncnn::Extractor ex = Rnet.create_extractor(); + //ex.set_num_threads(2); ex.set_light_mode(true); ex.input("data", in); ncnn::Mat score, bbox; @@ -241,6 +326,7 @@ void MTCNN::ONet(){ ncnn::Mat in; resize_bilinear(tempIm, in, 48, 48); ncnn::Extractor ex = Onet.create_extractor(); + //ex.set_num_threads(2); ex.set_light_mode(true); ex.input("data", in); ncnn::Mat score, bbox, keyPoint; @@ -273,25 +359,106 @@ void MTCNN::detect(ncnn::Mat& img_, std::vector& finalBbox_){ if(firstBbox_.size() < 1) return; nms(firstBbox_, nms_threshold[0]); refine(firstBbox_, img_h, img_w, true); - printf("firstBbox_.size()=%d\n", firstBbox_.size()); + //printf("firstBbox_.size()=%d\n", firstBbox_.size()); //second stage RNet(); - printf("secondBbox_.size()=%d\n", secondBbox_.size()); + //printf("secondBbox_.size()=%d\n", secondBbox_.size()); if(secondBbox_.size() < 1) return; nms(secondBbox_, nms_threshold[1]); refine(secondBbox_, img_h, img_w, true); //third stage ONet(); - printf("thirdBbox_.size()=%d\n", thirdBbox_.size()); + //printf("thirdBbox_.size()=%d\n", thirdBbox_.size()); if(thirdBbox_.size() < 1) return; refine(thirdBbox_, img_h, img_w, true); nms(thirdBbox_, nms_threshold[2], "Min"); finalBbox_ = thirdBbox_; } + +void MTCNN::detectMaxFace(ncnn::Mat& img_, std::vector& finalBbox) { + firstPreviousBbox_.clear(); + secondPreviousBbox_.clear(); + thirdPrevioussBbox_.clear(); + firstBbox_.clear(); + secondBbox_.clear(); + thirdBbox_.clear(); + + //norm + img = img_; + img_w = img.w; + img_h = img.h; + img.substract_mean_normalize(mean_vals, norm_vals); + + //pyramid size + float minl = img_w < img_h ? img_w : img_h; + float m = (float)MIN_DET_SIZE / minsize; + minl *= m; + float factor = pre_facetor; + vector scales_; + while (minl>MIN_DET_SIZE) { + scales_.push_back(m); + minl *= factor; + m = m*factor; + } + sort(scales_.begin(), scales_.end()); + //printf("scales_.size()=%d\n", scales_.size()); + + //Change the sampling process. + for (size_t i = 0; i < scales_.size(); i++) + { + //first stage + PNet(scales_[i]); + nms(firstBbox_, nms_threshold[0]); + nmsTwoBoxs(firstBbox_, firstPreviousBbox_, nms_threshold[0]); + if (firstBbox_.size() < 1) { + firstBbox_.clear(); + continue; + } + firstPreviousBbox_.insert(firstPreviousBbox_.end(), firstBbox_.begin(), firstBbox_.end()); + refine(firstBbox_, img_h, img_w, true); + //printf("firstBbox_.size()=%d\n", firstBbox_.size()); + + //second stage + RNet(); + nms(secondBbox_, nms_threshold[1]); + nmsTwoBoxs(secondBbox_, secondPreviousBbox_, nms_threshold[0]); + secondPreviousBbox_.insert(secondPreviousBbox_.end(), secondBbox_.begin(), secondBbox_.end()); + if (secondBbox_.size() < 1) { + firstBbox_.clear(); + secondBbox_.clear(); + continue; + } + refine(secondBbox_, img_h, img_w, true); + //printf("secondBbox_.size()=%d\n", secondBbox_.size()); + + //third stage + ONet(); + //printf("thirdBbox_.size()=%d\n", thirdBbox_.size()); + if (thirdBbox_.size() < 1) { + firstBbox_.clear(); + secondBbox_.clear(); + thirdBbox_.clear(); + continue; + } + refine(thirdBbox_, img_h, img_w, true); + nms(thirdBbox_, nms_threshold[2], "Min"); + + if (thirdBbox_.size() > 0) { + extractMaxFace(thirdBbox_); + finalBbox = thirdBbox_;//if largest face size is similar,. + break; + } + } + + //printf("firstPreviousBbox_.size()=%d\n", firstPreviousBbox_.size()); + //printf("secondPreviousBbox_.size()=%d\n", secondPreviousBbox_.size()); +} + + //void MTCNN::detection(const cv::Mat& img, std::vector& rectangles){ // ncnn::Mat ncnn_img = ncnn::Mat::from_pixels(img.data, ncnn::Mat::PIXEL_BGR2RGB, img.cols, img.rows); // std::vector finalBbox; diff --git a/src/mtcnn.h b/src/mtcnn.h index c3e8ece..a2848cd 100644 --- a/src/mtcnn.h +++ b/src/mtcnn.h @@ -37,21 +37,28 @@ class MTCNN { void SetMinFace(int minSize); void detect(ncnn::Mat& img_, std::vector& finalBbox); + void detectMaxFace(ncnn::Mat& img_, std::vector& finalBbox); // void detection(const cv::Mat& img, std::vector& rectangles); private: void generateBbox(ncnn::Mat score, ncnn::Mat location, vector& boundingBox_, float scale); + void nmsTwoBoxs(vector &boundingBox_, vector &previousBox_, const float overlap_threshold, string modelname = "Union"); void nms(vector &boundingBox_, const float overlap_threshold, string modelname="Union"); void refine(vector &vecBbox, const int &height, const int &width, bool square); + void extractMaxFace(vector &boundingBox_); + + void PNet(float scale); void PNet(); void RNet(); void ONet(); + ncnn::Net Pnet, Rnet, Onet; ncnn::Mat img; + const float nms_threshold[3] = {0.5f, 0.7f, 0.7f}; - const float mean_vals[3] = {127.5, 127.5, 127.5}; const float norm_vals[3] = {0.0078125, 0.0078125, 0.0078125}; const int MIN_DET_SIZE = 12; + std::vector firstPreviousBbox_, secondPreviousBbox_, thirdPrevioussBbox_; std::vector firstBbox_, secondBbox_,thirdBbox_; int img_w, img_h;