diff --git a/src/cpu.h b/src/cpu.h new file mode 100644 index 0000000..7dcb7e3 --- /dev/null +++ b/src/cpu.h @@ -0,0 +1,143 @@ +/* cpu.h : Author : Alexander J. Yee */ +#ifndef _cpu_H +#define _cpu_H +#include +#include +#include +#ifdef __ANDROID__ + +#else +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# if _WIN32 +#include +#include +# elif defined(__GNUC__) || defined(__clang__) +#include +#define _XCR_XFEATURE_ENABLED_MASK 0 +# else +# error "No cpuid intrinsic defined for compiler." +# endif +#else +# error "No cpuid intrinsic defined for processor architecture." +#endif +#endif //ANDROID +namespace fbow{ +struct cpu{ + bool Vendor_AMD,Vendor_Intel;// Vendor + bool OS_x64,OS_AVX,OS_AVX512;// OS Features + bool HW_MMX,HW_x64,HW_ABM,HW_RDRAND,HW_BMI1,HW_BMI2,HW_ADX,HW_PREFETCHWT1,HW_MPX;// Misc. + bool HW_SSE,HW_SSE2,HW_SSE3,HW_SSSE3,HW_SSE41,HW_SSE42,HW_SSE4a,HW_AES,HW_SHA;// SIMD: 128-bit + bool HW_AVX,HW_XOP,HW_FMA3,HW_FMA4,HW_AVX2;// SIMD: 256-bit + bool HW_AVX512_F,HW_AVX512_PF,HW_AVX512_ER,HW_AVX512_CD,HW_AVX512_VL,HW_AVX512_BW,HW_AVX512_DQ,HW_AVX512_IFMA,HW_AVX512_VBMI;// SIMD: 512-bit +public: + inline cpu(){ memset(this, 0, sizeof(*this)); } + inline void detect_host(); + inline bool isSafeAVX(){return HW_AVX && OS_AVX;} + inline bool isSafeSSE(){return HW_SSE ;} + inline bool isSafeMMX(){return HW_MMX ;} + inline void disableAVX(){HW_AVX =false;} + inline void disableMMX(){HW_MMX =false;} + inline void disableSSE(){HW_SSE=false;} + + static inline void cpuid(int32_t out[4], int32_t x); + static inline std::string get_vendor_string(); + +private: + static bool inline detect_OS_x64(); + static bool inline detect_OS_AVX(); + static bool inline detect_OS_AVX512(); + static inline uint64_t xgetbv(unsigned int x); +}; +#ifdef __ANDROID__ + +void cpu::cpuid(int32_t out[4], int32_t x){} +#else +//// MSCV +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# if _WIN32 +void cpu::cpuid(int32_t out[4], int32_t x){ __cpuidex(out, x, 0); } +uint64_t cpu::xgetbv(unsigned int x){ return _xgetbv(x); } +// Detect 64-bit - Note that this snippet of code for detecting 64-bit has been copied from MSDN. +typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL); +static inline BOOL IsWow64() +{ + BOOL bIsWow64 = FALSE; + LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS) GetProcAddress( GetModuleHandle(TEXT("kernel32")), "IsWow64Process"); + if (NULL == fnIsWow64Process) return FALSE; + if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64)) return FALSE; + return bIsWow64 ; +} +bool cpu::detect_OS_x64(){ +#ifdef _M_X64 + return true; +#else + return IsWow64() != 0; +#endif +} +//////////////////////////////////// +/////GCC +//////////////////////////////////// + +# elif defined(__GNUC__) || defined(__clang__) +void cpu::cpuid(int32_t out[4], int32_t x){ __cpuid_count(x, 0, out[0], out[1], out[2], out[3]); } +uint64_t cpu::xgetbv(unsigned int index){ uint32_t eax, edx; __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); return ((uint64_t)edx << 32) | eax;} +//////////////////////////////////////////////////////////////////////////////// +// Detect 64-bit. We only support x64 on Linux. +bool cpu::detect_OS_x64(){ return true;} +# endif +#endif +#endif + +//////////////////////////////////////////////////////////////////////////////// +bool cpu::detect_OS_AVX(){ + // Copied from: http://stackoverflow.com/a/22521619/922184 +#ifndef __ANDROID__ + + bool avxSupported = false; + int cpuInfo[4]; cpuid(cpuInfo, 1); + bool osUsesXSAVE_XRSTORE = (cpuInfo[2] & (1 << 27)) != 0; + bool cpuAVXSuport = (cpuInfo[2] & (1 << 28)) != 0; + if (osUsesXSAVE_XRSTORE && cpuAVXSuport) { uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK); avxSupported = (xcrFeatureMask & 0x6) == 0x6;} + return avxSupported; +#else + return false; +#endif +} +bool cpu::detect_OS_AVX512(){ + +#ifndef __ANDROID__ + if (!detect_OS_AVX()) + return false; + uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK); + return (xcrFeatureMask & 0xe6) == 0xe6; +#else + return false; +#endif + +} +std::string cpu::get_vendor_string(){ int32_t CPUInfo[4]; char name[13];cpuid(CPUInfo, 0); memcpy(name + 0, &CPUInfo[1], 4);memcpy(name + 4, &CPUInfo[3], 4); memcpy(name + 8, &CPUInfo[2], 4); name[12] = '\0'; return name;} +void cpu::detect_host(){ + +#ifndef __ANDROID__ + + OS_x64 = detect_OS_x64(); + OS_AVX = detect_OS_AVX(); + OS_AVX512 = detect_OS_AVX512(); + // Vendor + std::string vendor(get_vendor_string()); + if (vendor == "GenuineIntel"){ Vendor_Intel = true;} + else if (vendor == "AuthenticAMD"){ Vendor_AMD = true; } + int info[4]; + cpuid(info, 0); + int nIds = info[0]; + cpuid(info, 0x80000000); + uint32_t nExIds = info[0]; + // Detect Features + if (nIds >= 0x00000001){ cpuid(info, 0x00000001); HW_MMX = (info[3] & ((int)1 << 23)) != 0; HW_SSE = (info[3] & ((int)1 << 25)) != 0; HW_SSE2 = (info[3] & ((int)1 << 26)) != 0; HW_SSE3 = (info[2] & ((int)1 << 0)) != 0; HW_SSSE3 = (info[2] & ((int)1 << 9)) != 0; HW_SSE41 = (info[2] & ((int)1 << 19)) != 0; HW_SSE42 = (info[2] & ((int)1 << 20)) != 0; HW_AES = (info[2] & ((int)1 << 25)) != 0; HW_AVX = (info[2] & ((int)1 << 28)) != 0; HW_FMA3 = (info[2] & ((int)1 << 12)) != 0; HW_RDRAND = (info[2] & ((int)1 << 30)) != 0; } + if (nIds >= 0x00000007){ cpuid(info, 0x00000007); HW_AVX2 = (info[1] & ((int)1 << 5)) != 0; HW_BMI1 = (info[1] & ((int)1 << 3)) != 0; HW_BMI2 = (info[1] & ((int)1 << 8)) != 0; HW_ADX = (info[1] & ((int)1 << 19)) != 0; HW_MPX = (info[1] & ((int)1 << 14)) != 0; HW_SHA = (info[1] & ((int)1 << 29)) != 0; HW_PREFETCHWT1 = (info[2] & ((int)1 << 0)) != 0; HW_AVX512_F = (info[1] & ((int)1 << 16)) != 0; HW_AVX512_CD = (info[1] & ((int)1 << 28)) != 0; HW_AVX512_PF = (info[1] & ((int)1 << 26)) != 0; HW_AVX512_ER = (info[1] & ((int)1 << 27)) != 0; HW_AVX512_VL = (info[1] & ((int)1 << 31)) != 0; HW_AVX512_BW = (info[1] & ((int)1 << 30)) != 0; HW_AVX512_DQ = (info[1] & ((int)1 << 17)) != 0; HW_AVX512_IFMA = (info[1] & ((int)1 << 21)) != 0; HW_AVX512_VBMI = (info[2] & ((int)1 << 1)) != 0; } + if (nExIds >= 0x80000001){ cpuid(info, 0x80000001); HW_x64 = (info[3] & ((int)1 << 29)) != 0; HW_ABM = (info[2] & ((int)1 << 5)) != 0; HW_SSE4a = (info[2] & ((int)1 << 6)) != 0; HW_FMA4 = (info[2] & ((int)1 << 16)) != 0; HW_XOP = (info[2] & ((int)1 << 11)) != 0; } +#endif +} + +} +#endif diff --git a/src/fbow.cpp b/src/fbow.cpp index c10e067..f474120 100644 --- a/src/fbow.cpp +++ b/src/fbow.cpp @@ -1,18 +1,21 @@ #include "fbow.h" #include #include +#include +#include +#include namespace fbow{ Vocabulary::~Vocabulary(){ - if (_data!=0) free( _data); + if (_data!=0) AlignedFree( _data); } void Vocabulary::setParams(int aligment, int k, int desc_type, int desc_size, int nblocks, std::string desc_name)throw(std::runtime_error){ - //if(k>128)throw std::runtime_error("fbow::Vocabulary::setParams k must be <=128") - desc_name.resize(std::min(desc_name.size(),size_t(49))); + auto ns= desc_name.size()(49)?desc_name.size():128; + desc_name.resize(ns); std::strcpy(_params._desc_name_,desc_name.c_str()); _params._aligment=aligment; @@ -45,7 +48,7 @@ void Vocabulary::setParams(int aligment, int k, int desc_type, int desc_size, in //give memory _params._total_size=_params._block_size_bytes_wp*_params._nblocks; - _data=(char*)aligned_alloc(_params._aligment,_params._total_size); + _data=(char*)AlignedAlloc(_params._aligment,_params._total_size); memset( _data,0,_params._total_size); } @@ -59,29 +62,31 @@ fBow Vocabulary::transform(const cv::Mat &features)throw(std::exception) //get host info to decide the version to execute if (!cpu_info){ - cpu_info=std::make_shared(); + cpu_info=std::make_shared(); cpu_info->detect_host(); } fBow result; //decide the version to employ according to the type of features, aligment and cpu capabilities if (_params._desc_type==CV_8UC1){ - //orb if (cpu_info->HW_x64){ - if (_params._desc_size==32) result=_transform(features); + if (_params._desc_size==32) + result=_transform(features); //full akaze - else if( _params._desc_size==61 && _params._aligment%8==0) result=_transform(features); + else if( _params._desc_size==61 && _params._aligment%8==0) + result=_transform(features); //generic - else result=_transform(features ); + else + result=_transform(features ); } else result= _transform(features ); } else if(features.type()==CV_32FC1){ - if( cpu_info->HW_AVX && _params._aligment%32==0){ //AVX version + if( cpu_info->isSafeAVX() && _params._aligment%32==0){ //AVX version if ( _params._desc_size==256) result= _transform(features);//specific for surf 256 bytes else result= _transform(features);//any other } - if( cpu_info->HW_SSE && _params._aligment%16==0){//SSE version + if( cpu_info->isSafeSSE() && _params._aligment%16==0){//SSE version if ( _params._desc_size==256) result= _transform(features);//specific for surf 256 bytes else result=_transform(features);//any other } @@ -108,7 +113,7 @@ fBow Vocabulary::transform(const cv::Mat &features)throw(std::exception) void Vocabulary::clear() { - if (_data!=0) free(_data); + if (_data!=0) AlignedFree(_data); _data=0; memset(&_params,0,sizeof(_params)); _params._desc_name_[0]='\0'; @@ -117,13 +122,13 @@ void Vocabulary::clear() //loads/saves from a file void Vocabulary::readFromFile(const std::string &filepath)throw(std::exception){ - std::ifstream file(filepath); + std::ifstream file(filepath,std::ios::binary); if (!file) throw std::runtime_error("Vocabulary::readFromFile could not open:"+filepath); fromStream(file); } void Vocabulary::saveToFile(const std::string &filepath)throw(std::exception){ - std::ofstream file(filepath); + std::ofstream file(filepath, std::ios::binary); if (!file) throw std::runtime_error("Vocabulary::saveToFile could not open:"+filepath); toStream(file); @@ -141,18 +146,18 @@ void Vocabulary::toStream(std::ostream &str)const{ void Vocabulary::fromStream(std::istream &str)throw(std::exception) { - if (_data!=0) free (_data); + if (_data!=0) AlignedFree (_data); uint64_t sig; str.read((char*)&sig,sizeof(sig)); if (sig!=55824124) throw std::runtime_error("Vocabulary::fromStream invalid signature"); //read string str.read((char*)&_params,sizeof(params)); - _data=(char*)aligned_alloc(_params._aligment,_params._total_size); + _data=(char*)AlignedAlloc(_params._aligment,_params._total_size); if (_data==0) throw std::runtime_error("Vocabulary::fromStream Could not allocate data"); str.read(_data,_params._total_size); } -double fBow::score(const fBow &v1,const fBow &v2){ +double fBow::score (const fBow &v1,const fBow &v2){ fBow::const_iterator v1_it, v2_it; @@ -180,13 +185,17 @@ double fBow::score(const fBow &v1,const fBow &v2){ else if(v1_it->first < v2_it->first) { // move v1 forward - v1_it = v1.lower_bound(v2_it->first); - // v1_it = (first element >= v2_it.id) +// v1_it = v1.lower_bound(v2_it->first); + while(v1_it!=v1_end&& v1_it->firstfirst) + ++v1_it; } else { // move v2 forward - v2_it = v2.lower_bound(v1_it->first); +// v2_it = v2.lower_bound(v1_it->first); + while(v2_it!=v2_end && v2_it->firstfirst) + ++v2_it; + // v2_it = (first element >= v1_it.id) } } diff --git a/src/fbow.h b/src/fbow.h index 5b816c3..d25f591 100644 --- a/src/fbow.h +++ b/src/fbow.h @@ -1,16 +1,16 @@ #ifndef _FBOW_VOCABULARY_H #define _FBOW_VOCABULARY_H -#include "exports.h" +#include "fbow_exports.h" #include -#include +#include #include #include #include -#include "cpu_x86.h" +#include "cpu.h" namespace fbow{ //float initialized to zero. -struct _float{ +struct FBOW_API _float{ float var=0; inline float operator=(float &f){var=f;return var;} inline operator float&() {return var;} @@ -19,7 +19,7 @@ struct _float{ /**Bag of words */ -struct fBow:std::map{ +struct FBOW_API fBow:std::map{ void toStream(std::ostream &str) const { uint32_t _size=size(); @@ -51,6 +51,31 @@ struct fBow:std::map{ */ class FBOW_API Vocabulary { + + + static inline void * AlignedAlloc(int __alignment,int size){ + assert(__alignment<256); + + unsigned char *ptr= (unsigned char*)malloc(size + __alignment); + + if( !ptr ) return 0; + + // align the pointer + + size_t lptr=(size_t)ptr; + int off =lptr%__alignment; + if (off==0) off=__alignment; + + ptr = ptr+off ; //move to next aligned address + *(ptr-1)=(unsigned char)off; //save in prev, the offset to properly remove it + return ptr; + } + static inline void AlignedFree(void *ptr){ + unsigned char *uptr=(unsigned char *)ptr; + unsigned char off= *(uptr-1); + uptr-=off; + std::free(uptr); + } friend class VocabularyCreator; public: @@ -86,8 +111,8 @@ class FBOW_API Vocabulary private: void setParams( int aligment,int k,int desc_type,int desc_size, int nblocks,std::string desc_name)throw(std::runtime_error); struct params{ - char _desc_name_[50]="";//descriptor name. May be empty - uint32_t _aligment,_nblocks=0 ;//memory aligment and total number of blocks + char _desc_name_[50];//descriptor name. May be empty + uint32_t _aligment=0,_nblocks=0 ;//memory aligment and total number of blocks uint64_t _desc_size_bytes_wp=0;//size of the descriptor(includes padding) uint64_t _block_size_bytes_wp=0;//size of a block (includes padding) uint64_t _feature_off_start=0;//within a block, where the features start @@ -167,7 +192,7 @@ class FBOW_API Vocabulary inline void setBlock(uint32_t b,Block &block){ block._blockstart= _data+ b*_params._block_size_bytes_wp;} //information about the cpu so that mmx,sse or avx extensions can be employed - std::shared_ptr cpu_info; + std::shared_ptr cpu_info; //////////////////////////////////////////////////////////// @@ -183,14 +208,14 @@ class FBOW_API Vocabulary int _block_desc_size_bytes_wp; register_type *feature=0; public: - ~Lx(){if (feature!=0)free(feature);} + ~Lx(){if (feature!=0)AlignedFree(feature);} void setParams(int desc_size, int block_desc_size_bytes_wp){ assert(block_desc_size_bytes_wp%aligment==0); _desc_size=desc_size; _block_desc_size_bytes_wp=block_desc_size_bytes_wp; assert(_block_desc_size_bytes_wp%sizeof(register_type )==0); _nwords=_block_desc_size_bytes_wp/sizeof(register_type );//number of aligned words - feature=(register_type*)aligned_alloc(aligment,_nwords*sizeof(register_type )); + feature=(register_type*)AlignedAlloc(aligment,_nwords*sizeof(register_type )); memset(feature,0,_nwords*sizeof(register_type )); } inline void startwithfeature(const register_type *feat_ptr){memcpy(feature,feat_ptr,_desc_size);} @@ -198,6 +223,7 @@ class FBOW_API Vocabulary }; + struct L2_generic:public Lx{ ~L2_generic(){ } inline float computeDist(float *fptr){ @@ -206,7 +232,17 @@ class FBOW_API Vocabulary return d; } }; +#ifdef __ANDROID__ + //fake elements to allow compilation + struct L2_avx_generic:public Lx{inline float computeDist(uint64_t *ptr){return std::numeric_limits::max();}}; + struct L2_se3_generic:public Lx{inline float computeDist(uint64_t *ptr){return std::numeric_limits::max();}}; + struct L2_sse3_16w:public Lx{inline float computeDist(uint64_t *ptr){return std::numeric_limits::max();}}; + struct L2_avx_8w:public Lx{inline float computeDist(uint64_t *ptr){return std::numeric_limits::max();}}; + + + +#else struct L2_avx_generic:public Lx<__m256,float,32>{ inline float computeDist(__m256 *ptr){ __m256 sum=_mm256_setzero_ps(), sub_mult; @@ -222,7 +258,6 @@ class FBOW_API Vocabulary return sum_ptr[0]+sum_ptr[4]; } }; - struct L2_se3_generic:public Lx<__m128,float,16>{ inline float computeDist(__m128 *ptr){ __m128 sum=_mm_setzero_ps(), sub_mult; @@ -272,11 +307,9 @@ class FBOW_API Vocabulary float *sum_ptr=(float*)∑ return sum_ptr[0]+sum_ptr[4]; } - - }; - + #endif //generic hamming distance calculator struct L1_x64:public Lx{ @@ -350,7 +383,7 @@ class FBOW_API Vocabulary if ( bn_info->isleaf()) result[bn_info->getId()]+=bn_info->weight;//if the node is leaf get word id and weight else setBlock(bn_info->getId(),c_block);//go to its children - }while( !bn_info->isleaf()); + }while( !bn_info->isleaf() && bn_info->getId()!=0); } return result; } diff --git a/src/fbow_exports.h b/src/fbow_exports.h new file mode 100644 index 0000000..870978e --- /dev/null +++ b/src/fbow_exports.h @@ -0,0 +1,51 @@ +/***************************** +Copyright 2014 Rafael Muñoz Salinas. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are +permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, this list + of conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY Rafael Muñoz Salinas ''AS IS'' AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Rafael Muñoz Salinas OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those of the +authors and should not be interpreted as representing official policies, either expressed +or implied, of Rafael Muñoz Salinas. +********************************/ + + + +#ifndef __FBOW_CORE_TYPES_H__ +#define __FBOW_CORE_TYPES_H__ + +#if !defined _CRT_SECURE_NO_DEPRECATE && _MSC_VER > 1300 +#define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio 2005 warnings */ +#endif + +#if (defined WIN32 || defined _WIN32 || defined WINCE) && defined FBOW_DSO_EXPORTS + #define FBOW_API __declspec(dllexport) + #pragma warning ( disable : 4251 ) //disable warning to templates with dll linkage. + #pragma warning ( disable : 4290 ) //disable warning due to exception specifications. + #pragma warning ( disable : 4996 ) //disable warning regarding unsafe vsprintf. + #pragma warning ( disable : 4244 ) //disable warning convesions with lost of data. + +#else + #define FBOW_API +#endif + + +#define FBOW_VERSION "${PROJECT_VERSION}" +#endif diff --git a/src/vocabulary_creator.cpp b/src/vocabulary_creator.cpp index dd30b54..51ae41b 100644 --- a/src/vocabulary_creator.cpp +++ b/src/vocabulary_creator.cpp @@ -1,5 +1,10 @@ #include "vocabulary_creator.h" +#ifndef __ANDROID__ #include +#else +inline int omp_get_max_threads(){return 1;} +inline int omp_get_thread_num(){return 0;} +#endif #include using namespace std; namespace fbow{ @@ -107,9 +112,9 @@ void VocabularyCreator::createLevel( int parent, int curL,bool recursive){ for(size_t i=0;i #include #include #include #include #include -#include -#include "fbow.h" #include -#include +#include +#include +#include #include +#include +#include "fbow_exports.h" +#include "fbow.h" namespace fbow{ /**This class creates the vocabulary */ @@ -27,6 +29,7 @@ class FBOW_API VocabularyCreator int L=-1; uint32_t nthreads=1; int maxIters=11; + bool verbose=false; }; //create this from a set of features @@ -144,7 +147,7 @@ class FBOW_API VocabularyCreator // struct Node{ Node(){} - Node(uint32_t Id,uint32_t Parent,const cv::Mat &Feature, uint32_t Feat_idx=std::numeric_limits::max()):id(Id),parent(Parent),feature(Feature),feat_idx(Feat_idx){ + Node(uint32_t Id,uint32_t Parent,const cv::Mat &Feature, uint32_t Feat_idx=std::numeric_limits::max() ):id(Id),parent(Parent),feature(Feature),feat_idx(Feat_idx){ }