hog lbp svm 目标检测svm能同时识别两种物体吗

HOG+SVM行人检测辨别的两种方法 - 编程当前位置:& &&&HOG+SVM行人检测辨别的两种方法HOG+SVM行人检测辨别的两种方法&&网友分享于:&&浏览:0次HOG+SVM行人检测识别的两种方法关于HOG+SVM,CSDN上有很多牛人写了很多非常好的文章,这里给出我觉得写的比较好的几篇,仅供大家参考
目标检测的图像特征提取之(一)HOG特征
HOG:从理论到OpenCV实践
opencv 学习笔记-入门(21)之三线性插值-hog(二)
这篇博客写的是关于三线性插值的,为了减少混叠效应的,写的很好
OpenCV中的HOG+SVM物体分类
OpenCV HOGDescriptor 参数图解
利用Hog特征和SVM分类器进行行人检测
以上就是个人觉得写的比较好的博客,基本上将上面的博客看懂了,HOG也比较理解了,如果还想输入了解HOG,建议直接看OpenCV HOG的源码
下面,就说说使用OpenCV 中的HOG+SVM实现行人检测的两种方式
先说第一种方式,直接上代码:
#include "opencv2/opencv.hpp"
#include "opencv2/ml.hpp"
#include &stdio.h&
#include &string.h&
#include &cctype&
#include&iostream&
#include &fstream&
using namespace cv::
//HOG+SVM识别方式2
void Train()
//读入训练样本图片路径和类别
vector&string& imageP
vector&int& imageC
int numberOfLine = 0;
ifstream trainingData("D:/Image/Pedestrians64x128/TrainData.txt");
unsigned long
while (trainingData)
if (getline(trainingData, buffer))
numberOfLine++;
if (numberOfLine % 2 == 0)
imageClass.push_back(atoi(buffer.c_str()));
imagePath.push_back(buffer);
trainingData.close();
//获取样本的HOG特征
int numberOfSample = numberOfLine / 2;
Mat featureVectorOfSample(numberOfSample, 3780, CV_32FC1);
Mat classOfSample(numberOfSample, 1, CV_32SC1);
for (string::size_type i = 0; i &= imagePath.size() - 1; ++i)
Mat src = imread(imagePath[i], -1);
if (src.empty())
cout && "can not load the image:" && imagePath[i] &&
cout && "processing" && imagePath[i] &&
Mat trainI
resize(src, trainImage, Size(64, 128));
HOGDescriptor hog(cvSize(64, 128), cvSize(16, 16), cvSize(8, 8), cvSize(8, 8), 9);
vector&float&
pute(trainImage, descriptors);
cout && "HOG dimensions:" && descriptors.size() &&
for (vector&float&::size_type j = 0; j &= descriptors.size() - 1; ++j)
featureVectorOfSample.at&float&(i, j) = descriptors[j];
classOfSample.at&int&(i, 0) = imageClass[i];
//使用SVM分类器训练
Ptr&SVM& svm = SVM::create();
svm-&setType(SVM::C_SVC);
svm-&setKernel(SVM::LINEAR);
svm-&setTermCriteria(TermCriteria(CV_TERMCRIT_ITER, 1000, FLT_EPSILON));
svm-&train(featureVectorOfSample, ROW_SAMPLE, classOfSample);
svm-&save("D:/Image/Pedestrians64x128/Classifier.txt");
Mat supportVector = svm-&getSupportVectors();
for (int i = 0; i & 3780; ++i)
cout && supportVector.at&float&(0, i)&&
float rho = svm-&getDecisionFunction(0, alpha, svIndex);
Mat alpha2;
alpha.convertTo(alpha2, CV_32FC1);
Mat result(1, 3780, CV_32FC1);
result = alpha2*supportV
for (int i = 0; i & 3780; ++i)
result.at&float&(0, i) *= -1;
FILE* fp = fopen("D:/Image/Pedestrians64x128/HOG_SVM.txt", "wb");
for (int i = 0; i&3780; i++)
fprintf(fp, "%f \n", result.at&float&(0,i));
fprintf(fp, "%f", rho);
fclose(fp);
void Detect()
FILE* f = 0;
char _filename[1024];
f = fopen("D:/Image/Pedestrians64x128/TestData/FileNameList.txt", "rt");
fprintf(stderr, "ERROR: the specified file could not be loaded\n");
vector&float&
ifstream fileIn("D:/Image/Pedestrians64x128/HOG_SVM.txt",ios::in);
float val = 0.0f;
while (!fileIn.eof())
detector.push_back(val);
fileIn.close();
hog.setSVMDetector(detector);
namedWindow("people detector", 1);
char* filename = _
if (!fgets(filename, (int)sizeof(_filename)-2, f))
if (filename[0] == '#')
int l = (int)strlen(filename);
while (l & 0 && isspace(filename[l - 1]))
filename[l] = '\0';
img = imread(filename);
printf("%s:\n", filename);
if (!img.data)
fflush(stdout);
vector&Rect& found, found_
double t = (double)getTickCount();
hog.detectMultiScale(img, found, 0, Size(8, 8), Size(32, 32), 1.05, 2);
t = (double)getTickCount() -
printf("detection time = %gms\n", t*1000. / cv::getTickFrequency());
for (i = 0; i & found.size(); i++)
Rect r = found[i];
for (j = 0; j & found.size(); j++)
if (j != i && (r & found[j]) == r)
if (j == found.size())
found_filtered.push_back(r);
for (i = 0; i & found_filtered.size(); i++)
Rect r = found_filtered[i];
r.x += cvRound(r.width*0.1);
r.width = cvRound(r.width*0.8);
r.y += cvRound(r.height*0.07);
r.height = cvRound(r.height*0.8);
rectangle(img, r.tl(), r.br(), cv::Scalar(0, 255, 0), 3);
imshow("people detector", img);
int c = waitKey(0) & 255;
if (c == 'q' || c == 'Q' || !f)
fclose(f);
void HOG_SVM2()
int main()
HOG_SVM2();
这里我想说明一下TrainData.txt,这个文件放置了所有样本的路径和类别,如下:
于如何读取正负样本的路径到txt文件,可以使用批处理文件,批处理文件我上传到了CSDN,大家可以去下载
正负样本至少保证有1000,不能太少,否则效果就不好了,其中HOG_SVM.txt里面包含了判别函数的参数,这个参数可以直接给HOG用
下面就是我的测试效果:
检测效果还可以.
测试图片我也上传到网上了
当然你也可以不用自己训练分类器,直接使用OpenCV自带的分类器,OpenCV自带的分类器使用的是05年CVPR那篇文章中作者训练好的分类器,下面我们就来看看效果:
图中可以看出,OpenCV自带的分类器效果要比自己训练的好,主要原因大概有以下几点
1.训练样本不足,我的正负样本才900多
2.正样本图片不够清晰,导致特征提取有比较大的误差
下面说说第二种方式,第二种方式就是传统的方式了,就是对于测试样本,提取特征,然后使用训练好的分类器进行识别,代码
//HOG+SVM识别方式1
void HOG_SVM1()
//读入训练样本图片路径和类别
vector&string& imageP
vector&int& imageC
int numberOfLine = 0;
ifstream trainingData("D:/Image/Vehicle/TrainData.txt");
unsigned long
while (trainingData)
if (getline(trainingData, buffer))
numberOfLine++;
if (numberOfLine % 2 == 0)
imageClass.push_back(atoi(buffer.c_str()));
imagePath.push_back(buffer);
trainingData.close();
//获取样本的HOG特征
int numberOfSample = numberOfLine / 2;
Mat featureVectorOfSample(numberOfSample, 1764, CV_32FC1);
Mat classOfSample(numberOfSample, 1, CV_32SC1);
for (string::size_type i = 0; i &= imagePath.size() - 1; ++i)
Mat src = imread(imagePath[i], -1);
if (src.empty())
cout && "can not load the image:" && imagePath[i] &&
cout && "processing" && imagePath[i] &&
Mat trainI
resize(src, trainImage, Size(64, 64));
HOGDescriptor hog(cvSize(64, 64), cvSize(16, 16), cvSize(8, 8), cvSize(8, 8), 9);
vector&float&
pute(trainImage, descriptors);
cout && "HOG dimensions:" && descriptors.size() &&
for (vector&float&::size_type j = 0; j &= descriptors.size() - 1; ++j)
featureVectorOfSample.at&float&(i, j) = descriptors[j];
classOfSample.at&int&(i, 0) = imageClass[i];
//使用SVM分类器训练
Ptr&SVM& svm = SVM::create();
svm-&setKernel(SVM::RBF);
svm-&setType(SVM::C_SVC);
svm-&setC(10);
svm-&setCoef0(1.0);
svm-&setP(1.0);
svm-&setNu(0.5);
svm-&setTermCriteria(TermCriteria(CV_TERMCRIT_EPS, 1000, FLT_EPSILON));
svm-&train(featureVectorOfSample, ROW_SAMPLE, classOfSample);
svm-&save("Classifier.xml");
//使用训练好的分类器进行识别
vector&string& testImageP
ifstream testData("D:/Image/Vehicle/TestData.txt");
while (testData)
if (getline(testData, buffer))
testImagePath.push_back(buffer);
testData.close();
ofstream fileOfPredictResult("D:/Image/Vehicle/PredictResult.txt");
for (vector&string&::size_type i = 0; i &= testImagePath.size() - 1; ++i)
Mat src = imread(testImagePath[i], -1);
if (src.empty())
cout && "Can not load the image:" && testImagePath[i] &&
resize(src, testImage, Size(64, 64));
HOGDescriptor hog(cvSize(64, 64), cvSize(16, 16), cvSize(8, 8), cvSize(8, 8), 9);
vector&float&
pute(testImage, descriptors);
cout && "HOG dimensions:" && descriptors.size() &&
Mat featureVectorOfTestImage(1, descriptors.size(), CV_32FC1);
for (int j = 0; j &= descriptors.size() - 1; ++j)
featureVectorOfTestImage.at&float&(0, j) = descriptors[j];
int predictResult = svm-&predict(featureVectorOfTestImage);
char line[512];
std::sprintf(line, "%s %d\n", testImagePath[i].c_str(), predictResult);
fileOfPredictResult &&
fileOfPredictResult.close();
int main()
HOG_SVM1();
大家可以分别使用自己的数据集测试一下上面的两种方式,如果有上面疑问,欢迎留言讨论
12345678910
12345678910
12345678910 上一篇:下一篇:文章评论相关解决方案 1234567891011 Copyright & &&版权所有关于HOG+SVM的手势识别(转载)
大家好,由于我的毕业设计是“基于图像的手势识别”,因而对HOG算法和SVM算法有一定的研究,下面将我的学习心得和论文算法部分和大家分享。计算机视觉是很有发展潜力的,希望大家共同分享,共同进步。
首先关于HOG算法:
#include "_cvaux.h"
namespace cv
size_t HOGDescriptor::getDescriptorSize() const
//检测数据的合理性
CV_Assert(blockSize.width % cellSize.width == 0
blockSize.height % cellSize.height == 0);
CV_Assert((winSize.width - blockSize.width) % blockStride.width ==
(winSize.height - blockSize.height) % blockStride.height == 0
//Descriptor的大小
return (size_t)nbins*
(blockSize.width/cellSize.width)*
(blockSize.height/cellSize.height)*
((winSize.width - blockSize.width)/blockStride.width + 1)*
((winSize.height - blockSize.height)/blockStride.height + 1);
//9*(16/8)*(16/8)*((64-16)/8+1)*((128-16)/8+1)=9*2*2*7*15=3780,实际上的检测算子为3781,多的1表示偏置
double HOGDescriptor::getWinSigma() const
//winSigma默认为-1,然而有下式知,实际上为4;否则自己选择参数
return winSigma &= 0 ? winSigma : (blockSize.width +
blockSize.height)/8.;
bool HOGDescriptor::checkDetectorSize() const
//size_t:unsigned int
size_t detectorSize = svmDetector.size(), descriptorSize =
getDescriptorSize();
//三种情况任意一种为true则表达式为true,实际上是最后一种
return detectorSize == 0 ||
detectorSize == descriptorSize ||
detectorSize == descriptorSize + 1;
void HOGDescriptor::setSVMDetector(const
vector&float&&
_svmDetector)
svmDetector = _svmD
CV_Assert( checkDetectorSize() );
bool HOGDescriptor::load(const String& filename,
const String& objname)
//XML/YML文件存储
FileStorage fs(filename, FileStorage::READ);
//objname为空,!1=0,选择fs.getFirstTopLevelNode();否则为fs[objname]
//注意到FileStorage中[]重载了:FileNode operator[](const
string& nodename)(returns the top-level node by
FileNode obj = !objname.empty() ? fs[objname] :
fs.getFirstTopLevelNode();
if( !obj.isMap() )
FileNodeIterator it = obj["winSize"].begin();
it && winSize.width
&& winSize.
it = obj["blockSize"].begin();
it && blockSize.width
&& blockSize.
it = obj["blockStride"].begin();
it && blockStride.width
&& blockStride.
it = obj["cellSize"].begin();
it && cellSize.width
&& cellSize.
obj["nbins"] &&
obj["derivAperture"] &&
obj["winSigma"] && winS
obj["histogramNormType"] &&
histogramNormT
obj["L2HysThreshold"] &&
obj["gammaCorrection"] &&
FileNode vecNode = obj["SVMDetector"];
if( vecNode.isSeq() )
vecNode && svmD
CV_Assert(checkDetectorSize());
void HOGDescriptor::save(const String& filename,
const String& objName) const
FileStorage fs(filename, FileStorage::WRITE);
//空的对象名则取默认名,输出有一定格式,对象名后紧接{
fs && (!objName.empty() ? objName :
FileStorage::getDefaultObjectName(filename))
//之后依次为:
fs && "winSize"
&& winSize
&& "blockSize"
&& blockSize
&& "blockStride"
&& blockStride
&& "cellSize"
&& cellSize
&& "nbins"
&& "derivAperture"
&& derivAperture
&& "winSigma"
&& getWinSigma()
&& "histogramNormType"
&& histogramNormType
&& "L2HysThreshold"
&& L2HysThreshold
&& "gammaCorrection"
if( !svmDetector.empty() )
fs && "SVMDetector"
&& svmDetector
//注意还要输出"}"
fs && "}";
//img:原始图像
//grad:记录每个像素所属bin对应的权重的矩阵,为幅值乘以权值
//这个权值是关键,也很复杂:包括高斯权重,三次插值的权重,在本函数中先值考虑幅值和相邻bin间的插值权重
//qangle:记录每个像素角度所属的bin序号的矩阵,均为2通道,为了线性插值
//paddingTL:Top和Left扩充像素数
//paddingBR:类似同上
//功能:计算img经扩张后的图像中每个像素的梯度和角度
void HOGDescriptor::computeGradient(const Mat& img,
Mat& grad, Mat& qangle,
Size paddingTL, Size paddingBR) const
//先判断是否为单通道的灰度或者3通道的图像
CV_Assert( img.type() == CV_8U || img.type() == CV_8UC3 );
//计算gradient的图的大小,由64*128==》112*160,则会产生5*7=35个窗口(windowstride:8)
//每个窗口105个block,105*36=3780维特征向量
//paddingTL.width=16,paddingTL.height=24
Size gradsize(img.cols + paddingTL.width + paddingBR.width,
img.rows + paddingTL.height + paddingBR.height);
//注意grad和qangle是2通道的矩阵,为3D-trilinear插值中的orientation维度,另两维为坐标x与y
grad.create(gradsize, CV_32FC2); //
&magnitude*(1-alpha),
magnitude*alpha&
qangle.create(gradsize, CV_8UC2); // [0..nbins-1] - quantized
gradient orientation
//wholeSize为parent
matrix大小,不是扩展后gradsize的大小
//roiofs即为img在parent matrix中的偏置
//对于正样本img=但对于负样本img是从parent img中抽取的10个随机位置
//至于OpenCv具体是怎么操作,使得img和parent img相联系,不是很了解
//wholeSize与roiofs仅在padding时有用,可以不管,就认为传入的img==parent
img,是否是从parent img中取出无所谓
Size wholeS
img.locateROI(wholeSize, roiofs);
int cn = img.channels();
//产生1行256列的向量,lut为列向量头地址
Mat_&float& _lut(1, 256);
const float* lut = &_lut(0,0);
//gamma校正,作者的编程思路很有意思
//初看不知道这怎么会与图像的gamma校正有关系,压根img都没出现,看到后面大家会豁然开朗的
if( gammaCorrection )
for( i = 0; i & 256; i++ )
_lut(0,i) = std::sqrt((float)i);
for( i = 0; i & 256; i++ )
_lut(0,i) = (float)i;
//开辟空间存xmap和ymap,其中各占gradsize.width+2和gradsize.height+2空间
//+2是为了计算dx,dy时用[-1,0,1]算子,即使在扩充图像中,其边缘计算梯度时还是要再额外加一个像素的
//作者很喜欢直接用内存地址及之间的关系,初看是有点头大的
//另外再说说xmap与ymap的作用:其引入是因为img图像需要扩充到gradsize大小
//如果我们计算img中位于(-5,-6)像素时,需要将基于img的(-5,-6)坐标,映射为基于grad和qangle的坐标(xmap,ymap)
AutoBuffer&int& mapbuf(gradsize.width
+ gradsize.height + 4);
int* xmap = (int*)mapbuf + 1;
int* ymap = xmap + gradsize.width + 2;
BORDER_REFLECT_101:(左插值)gfedcb|abcdefgh(原始像素)|gfedcba(右插值),一种插值模式
const int borderType = (int)BORDER_REFLECT_101;
//borderInterpolate函数完成两项操作,一是利用插值扩充img,二是返回x-paddingTL.width+roiofs.x映射后的坐标xmap
//例如,ximg=x(取0)-paddingTL.width(取24)+roiofs.x(取0)=-24
==&xmap[0]=0
//即img中x=-24,映射到grad中xmap=0,并且存在xmap[0]中,至于borderInterpolate的具体操作可以不必细究
for( x = -1; x & gradsize.width + 1; x++ )
xmap[x] = borderInterpolate(x - paddingTL.width + roiofs.x,
wholeSize.width, borderType);
for( y = -1; y & gradsize.height + 1; y++ )
ymap[y] = borderInterpolate(y - paddingTL.height + roiofs.y,
wholeSize.height, borderType);
// x- & y- derivatives
for the whole row
// 由于后面的循环是以行为单位,每次循环内存重复使用,所以只要记录一行的信息而不是整个矩阵
int width = gradsize.
AutoBuffer&float&
_dbuf(width*4);
float* dbuf = _
//注意到内存的连续性方便之后的编程
Mat Dx(1, width, CV_32F, dbuf);
Mat Dy(1, width, CV_32F, dbuf + width);
Mat Mag(1, width, CV_32F, dbuf + width*2);
Mat Angle(1, width, CV_32F, dbuf + width*3);
int _nbins =
float angleScale = (float)(_nbins/CV_PI);//9/pi
for( y = 0; y & gradsize. y++ )
//指向每行的第一个元素,img.data为矩阵的第一个元素地址
const uchar* imgPtr = img.data + img.step*ymap[y];
const uchar* prevPtr = img.data + img.step*ymap[y-1];
const uchar* nextPtr = img.data + img.step*ymap[y+1];
float* gradPtr = (float*)grad.ptr(y);
uchar* qanglePtr = (uchar*)qangle.ptr(y);
if( cn == 1 )
for( x = 0; x & x++ )
int x1 = xmap[x];
//imgPtr指向img第y行首元素,imgPtr[x]即表示第(x,y)像素,其亮度值位于0~255,对应lut[0]~lut[255]
//即若像素亮度为120,则对应lut[120],若有gamma校正,lut[120]=sqrt(120)
//由于补充了虚拟像素,即在imgPtr[-1]无法表示gradsize中-1位置元素,而需要有个转换
//imgPtr[-1-paddingTL.width+roiofs.x],即imgPtr[xmap[-1]],即gradsize中-1位置元素为img中xmap[-1]位置的元素
dbuf[x] = (float)(lut[imgPtr[xmap[x+1]]] -
lut[imgPtr[xmap[x-1]]]);
//由于内存的连续性,隔width,即存Dy
dbuf[width + x] = (float)(lut[nextPtr[x1]] -
lut[prevPtr[x1]]);
//3通道,3通道中取最大值
for( x = 0; x & x++ )
int x1 = xmap[x]*3;
const uchar* p2 = imgPtr + xmap[x+1]*3;
const uchar* p0 = imgPtr + xmap[x-1]*3;
float dx0, dy0, dx, dy, mag0,
dx0 = lut[p2[2]] - lut[p0[2]];
dy0 = lut[nextPtr[x1+2]] - lut[prevPtr[x1+2]];
mag0 = dx0*dx0 + dy0*dy0;
dx = lut[p2[1]] - lut[p0[1]];
dy = lut[nextPtr[x1+1]] - lut[prevPtr[x1+1]];
mag = dx*dx + dy*
if( mag0 & mag )
dx = lut[p2[0]] - lut[p0[0]];
dy = lut[nextPtr[x1]] - lut[prevPtr[x1]];
mag = dx*dx + dy*
if( mag0 & mag )
dbuf[x] = dx0;
dbuf[x+width] = dy0;
//函数 cvCartToPolar
计算二维向量(x(I),y(I))的长度,角度:
//magnitude(I) = sqrt(x(I)2 + y(I)2),angle(I) = atan(y(I) /
x(I)),注意属于-pi/2~pi/2
cartToPolar( Dx, Dy, Mag, Angle, false );
for( x = 0; x & x++ )
float mag = dbuf[x+width*2];
float angle = dbuf[x+width*3]*angleScale -
0.5f;//-5&=angle&=4
//判断angle属于哪个bin
int hidx = cvFloor(angle);
//hidx=-5~-1===&4~8
if( hidx & 0 )
else if( hidx &= _nbins )
//检测是否&9
assert( (unsigned)hidx & (unsigned)_nbins );
qanglePtr[x*2] = (uchar)
//hidx = hidx & 1111
1111 当hidx&nbins,即hidx=hidx
//hidx = hidx &
当hidx&=nbins,即hidx=0
//注意到nbins=9时,hidx最大值只为8
hidx &= hidx & _nbins ? -1 :
//qangle两通道分别存放相邻的两个bin
qanglePtr[x*2+1] = (uchar)
//幅度,注意此时的0&angle&1,由于hidx
= cvFloor(angle),angle -=
gradPtr[x*2] = mag*(1.f - angle);
gradPtr[x*2+1] = mag*
//HOG存储结构,每个window包含105block,每个block包含36bin
struct HOGCache
struct BlockData
BlockData() : histOfs(0), imgOffset() {}
//以block为单位,譬如block[0]中的36个bin在内存中位于最前面
//而block[1]中的36个bin存储位置在连续内存中则有一个距离起点的偏置,即为histOfs:hist
//imgOffset表示该block在检测窗口window中的位置
Point imgO
//PixData是作者程序中比较晦涩的部分,具体见后面程序分析
//gradOfs:该pixel的grad在Mat
grad中的位置,是一个数:(grad.cols*i+j)*2,2表示2通道
//qangleOfs:pixel的angle在Mat
qangle中的位置,是一个数:(qangle.cols*i+j)*2,2表示2通道
//histOfs[4]:在后面程序中,作者把一个block中的像素分为四个区域,每个区域的像素最多对四个不同Cell中的hist有贡献
//即一个区域中进行直方图统计,则最多包含四个Cell的不同直方图,histOfs[i]表示每个区域中的第i个直方图
//在整个block直方图存储空间中的距离原始位置的偏置
//显然第一个Cell的hist其对应的histOfs[0]=0,依次类推有:histOfs[1]=9,histOfs[2]=18,histOfs[3]=27
//|_1_|_2_|_3_|_4_|一个block四个cell,这里把每个cell又分四分,1,2,5,6中像素统计属于hist[0],3,4,7,8在hist[1]...
//|_5_|_6_|_7_|_8_|作者将一个block分为了四块区域为:A:1,4,13,16/B:2,3,14,15/C:5,9,8,12/D:6,7,10,11
//|_9_|_10|_11|_12|作者认为A区域中的像素只对其所属的Cell中的hist有贡献,即此区域的像素只会产生一个hist
//|_13|_14|_15|_16|而B区域2,3的像素会对Cell0与Cell1中的hist有贡献,相应的会产生hist[0]与hist[1],14,15类似
//C区域与B区域类似,会对上下两个Cell的hist产生影响,而D区域会对相邻四个Cell的hist产生影响
//histWeights:每个像素对不同cell的hist贡献大小,由像素在block中的位置决定
//个人觉得这是论文中trilinear插值中对于position中x和y两个维度的插值
//其中像素的角度对于相邻两个bin的权重在HOGDescriptor::computerGradient中已有体现,至此trilinear完成
//其实作者认为每个像素对于其他cell的hist的影响,其大小与该像素距各个cell中心的距离决定
//譬如处于中心的像素(8,8)可以认为对每个cell的hist贡献一样,后面程序中权重的分配也可以看出
//gradWeight:为幅值与高斯权重的乘积
//其中高斯权重选择exp^(-(dx^2+dy^2)/(2*sigma^2)),sigma在HOGDescriptor中决定,以block中(8,8)为中心
//区别gradWeight和histWeight,gradWeight认为在同一个Cell中不同元素对hist的贡献是不一样的,由二维高斯分布决定
//而histweight说的是一个元素对不同cell中的hist的贡献不同,其贡献由其坐标距离各个cell的距离决定
struct PixData
size_t gradOfs, qangleO
int histOfs[4];
float histWeights[4];
float gradW
HOGCache();
HOGCache(const HOGDescriptor* descriptor,
const Mat& img, Size paddingTL, Size
paddingBR,
bool useCache, Size cacheStride);
virtual ~HOGCache() {};
virtual void init(const HOGDescriptor* descriptor,
const Mat& img, Size paddingTL, Size
paddingBR,
bool useCache, Size cacheStride);
//windowsInImage返回Image中横竖可产生多少个windows
Size windowsInImage(Size imageSize, Size winStride)
//依据img大小,窗口移动步伐,即窗口序号得到窗口在img中的位置
Rect getWindow(Size imageSize, Size winStride, int idx)
//buf为存储blockdata的内存空间,pt为block在parent
img中的位置
const float* getBlock(Point pt, float* buf);
virtual void normalizeBlockHistogram(float* histogram)
vector&PixData& pixD
vector&BlockData& blockD
//以下的参数是为了充分利用重叠的block信息,避免重叠的block信息重复计算采用的一种缓存思想具体见后面代码
bool useC//是否存储已经计算的block信息
vector&int& ymaxC//见后文
Size winSize,
cacheS//cacheStride认为等于blockStride,降低代码的复杂性
Size nblocks,
int blockHistogramS
int count1, count2, count4;
P//img在扩展后图像中img原点关于扩展后原点偏置
Mat_&float& blockCache;//待检测图像中以检测窗口进行横向扫描,所扫描的block信息存储在blockCache中
Mat_&uchar& blockCacheF
//判断当前block的信息blockCache中是否有存储,1:存储,于是直接调用;0:未存储,需要把信息存储到blockCache中
const HOGDescriptor*
HOGCache::HOGCache()
useCache =
blockHistogramSize = count1 = count2 = count4 = 0;
descriptor = 0;
HOGCache::HOGCache(const HOGDescriptor* _descriptor,
const Mat& _img, Size _paddingTL, Size
_paddingBR,
bool _useCache, Size _cacheStride)
init(_descriptor, _img, _paddingTL, _paddingBR, _useCache,
_cacheStride);
//初始化主要包括:1、block中各像素对block四个bin的贡献权重,以及在存储空间中的位置记录
//2、block的初始化,以及每个block在存储空间中的偏置及在检测窗口中的位置 记录
//3、其他参数的赋值
//并没有实际计算HOG
void HOGCache::init(const HOGDescriptor* _descriptor,
const Mat& _img, Size _paddingTL, Size
_paddingBR,
bool _useCache, Size _cacheStride)
descriptor = _
cacheStride = _cacheS
useCache = _useC
descriptor-&computeGradient(_img, grad, qangle,
_paddingTL, _paddingBR);
imgoffset = _paddingTL;//16,24
winSize = descriptor-&winS//64*128
Size blockSize = descriptor-&blockS//16*16
Size blockStride =
descriptor-&blockS//8*8
Size cellSize = descriptor-&cellS//8*8
Size winSize = descriptor-&winS//64*128
int i, j, nbins = descriptor-&//9
int rawBlockSize =
blockSize.width*blockSize.//16*16=256
nblocks = Size((winSize.width - blockSize.width)/blockStride.width
(winSize.height - blockSize.height)/blockStride.height +
1);//7*15=105
ncells = Size(blockSize.width/cellSize.width,
blockSize.height/cellSize.height);//2*2=4
blockHistogramSize =
ncells.width*ncells.height*//9*2*2=36
//对于训练时,该段代码不起作用;对于检测时,该段代码可以提高运行速度。
//在训练时,由于样本大小即等于检测窗口大小,因而不需要额外存储
//但是在检测时由于待检测图像大于检测窗口,因而当检测窗口移动时,检测相邻检测窗口具有大量共同的block信息
//为了节省时间,对于之前计算过大block信息,这里只需要调用,而对于未计算过的block信息,则重新计算并存储
//其具体思路如下:假设待检测图像640*480,检测窗口为144*144
//待检测图像水平方向有79个block,检测窗口垂直方向有17个block
//于是由以下代码知道:blockCache为18*(79*36)=18*2844,blockCacheFlags为17*79,ymxcCached为17
//以左上角代表检测窗口位置,当位于(0,0)时,第一次计算block信息,blockCache中是没有保存任何信息的。
//当位于(0,0)时须计算(也以block左上角代表block位置):
//(0,0)----&(128,0)
信息均存储到blockCache中,分别为blockCache[0][0]---&blockCache[0][17*36],相应blockCacheFlags置1
//(0,128)--&(128,128)
blockCache[17][0]--&blockCache[17][17*36]
//当检测窗口移动到(8,0)时,可以发现两个窗口中有大量信息是重复的,于是可以直接调用blockCache中相关block信息
//并把(136,0)--&(136,128)新增列的block信息加到blockCache中,同时跟新blockCacheFlags
//一直到窗口移到(624,0)进入到下一行(0,8),上述过程持续,于是blockCache中前17行存储了待检测图像中前17*79个block信息
//当检测窗口移动到(624,0)时此时blockCache已经存储满了
//当检测窗口移动到(0,8)时,第18行的信息怎么处理呢?
//此时大家要留意的是第1行的block信息已经没有用啦,于是可以将第18行的信息替代第1行的信息。
//当检测窗口不断横向扫描时,最新一行的信息总是会替代最旧一行的信息,如此反复,达到提高运行速度的目的
//另外需要提到一点的是当block在pt=(x,y)=(0,0)--&(624,0)---&(0,128)----&(624.128)
//可以用x/cacheStride=blockStride---&Canche_X,y/blockStride---&Cache_Y
//从而从blockCache中取出对应的blockCache[Cache_Y][Cache_X*36]
//当pt中y&128时,对应的第18行信息存储在第blockCache中的第0行
//于是我们可以用取余的办法,y/blockStride---&Cache_Y,而Cache_X的计算不变
//getblock函数中代码正是按该方法进行操作的
if( useCache )
//HOGCache的grad,qangle由discriptor-&computerGradient得到
//grad.cols=img.cols + paddingTL.width + paddingBR.width
Size cacheSize((grad.cols -
blockSize.width)/cacheStride.width+1,
(winSize.height/cacheStride.height)+1);
blockCache.create(cacheSize.height,
cacheSize.width*blockHistogramSize);
blockCacheFlags.create(cacheSize);
size_t i, cacheRows = blockCache.
ymaxCached.resize(cacheRows);
for( i = 0; i & cacheR i++ )
ymaxCached[i] = -1;
Mat_&float& weights(blockSize);
//sigma默认值为4
float sigma =
(float)descriptor-&getWinSigma();
float scale = 1.f/(sigma*sigma*2);
//权重的二维高斯分布
for(i = 0; i & blockSize. i++)
for(j = 0; j & blockSize. j++)
float di = i - blockSize.height*0.5f;
float dj = j - blockSize.width*0.5f;
weights(i,j) = std::exp(-(di*di + dj*dj)*scale);
blockData.resize(nblocks.width*nblocks.height);//105个block
pixData.resize(rawBlockSize*3);//256*3(通道数)
// Initialize 2 lookup tables, pixData &
blockData.
// Here is why:
// The detection algorithm runs in 4 nested loops (at each pyramid
// loop over the windows within the input image
// loop over the blocks within each window
// loop over the cells within each block
// loop over the pixels in each cell
// As each of the loops runs over a 2-dimensional array,
// we could get 8(!) nested loops in total, which is very-very
// To speed the things up, we do the following:
// 1. loop over windows is unrolled in the
HOGDescriptor::{compute|detect}
// inside we compute the current search window using getWindow()
// Yes, it involves some overhead (function call + couple of
divisions),
// but it's tiny in fact.
// 2. loop over the blocks is also unrolled. Inside we use
pre-computed blockData[j]
// to set up gradient and histogram pointers.
// 3. loops over cells and pixels in each cell are merged
// (since there is no overlap between cells, each pixel in the
block is processed once)
// and also unrolled. Inside we use PixData[k] to access the
gradient values and
// update the histogram
//作者用查找表的方法来计算。具体实现时是先执行HoGCache的初始化函数Init()
//构造查找表,然后用getWindow()和getBlock()两个函数实现的表的查找
count1 = count2 = count4 = 0;
//blockSize.width=16
for( j = 0; j & blockSize. j++ )
for( i = 0; i & blockSize. i++ )
PixData* data = 0;
//确定cell在block中的位置
float cellX = (j+0.5f)/cellSize.width - 0.5f;
float cellY = (i+0.5f)/cellSize.height - 0.5f;
int icellX0 = cvFloor(cellX);
int icellY0 = cvFloor(cellY);
int icellX1 = icellX0 + 1, icellY1 = icellY0 + 1;
cellX -= icellX0;
cellY -= icellY0;
//注意到unsigned,当icellX0=-1时,(unsigned)icellX0&2
//(0~3,0~3)+(0~3,12~15)+(12~15,0~3)+(12~15,12~15)
//(icellX0,icellY0,icellX1,icellY1)=(-1,-1,0,0),(-1,1,0,2),(1,-1,0,2),(1,1,2,2)===》条件4
//(4~11,4~11)==》(0,0,1,1)==》条件1
//(0~3,4~11)+(12~15,4~11)==》(-1,0,0,1)==》条件3
//(4~11,0~3)+(4~11,12~15)==》(0,-1,1,0)==》条件2
//情况2,3中元素对两个cell中的hist有贡献
//(0~3,4~11):histofs=(0,9,0,0);(12~15,4~11):histofs=(18,27,0,0)
//(4~11,0~3):histofs=(0,18,0,0);(4~11,12~15):hisofs=(9,27,0,0)
//情况1中,元素对4个cell的hist有贡献,则会有4个hist及histofs,并且为(0,9,18,27)
//情况4中,元素属于一个cell,则只有一个hist,对应的只有一个histofs:hist offset
//分别应为:(0,0,0,0),(9,0,0,0),(18,0,0,0),(27,0,0,0)
//对于权重的理解看后面的注释,选择第二种情况,其他可类推
if( (unsigned)icellX0 & (unsigned)ncells.width
(unsigned)icellX1 & (unsigned)ncells.width )
if( (unsigned)icellY0 & (unsigned)ncells.height
(unsigned)icellY1 & (unsigned)ncells.height )
data = &pixData[rawBlockSize*2 + (count4++)];
data-&histOfs[0] = (icellX0*ncells.height +
data-&histWeights[0] = (1.f - cellX)*(1.f -
data-&histOfs[1] = (icellX1*ncells.height +
data-&histWeights[1] = cellX*(1.f - cellY);
data-&histOfs[2] = (icellX0*ncells.height +
data-&histWeights[2] = (1.f - cellX)*cellY;
data-&histOfs[3] = (icellX1*ncells.height +
data-&histWeights[3] = cellX*cellY;
data = &pixData[rawBlockSize + (count2++)];
if( (unsigned)icellY0 & (unsigned)ncells.height
icellY1 = icellY0;
cellY = 1.f - cellY;
//|_1_|_2_|_3_|_4_|第二中情况是位于(2,3),(14,15)。感性上可以认为(2,3)中的像素对cell0与cell1的贡献中
//|_5_|_6_|_7_|_8_|其中y分量的贡献都是相同的,由于距离各cell的中心距离相同,而x分量的影响是不同的
//|_9_|_10|_11|_12|所以权重的分配为(1-cellx)*celly和cellx*celly
//|_13|_14|_15|_16|
//挑了中简单的情况,情况1中可以类似分析
data-&histOfs[0] = (icellX0*ncells.height +
data-&histWeights[0] = (1.f - cellX)*cellY;
data-&histOfs[1] = (icellX1*ncells.height +
data-&histWeights[1] = cellX*cellY;
data-&histOfs[2] = data-&histOfs[3] =
data-&histWeights[2] =
data-&histWeights[3] = 0;
if( (unsigned)icellX0 & (unsigned)ncells.width
icellX1 = icellX0;
cellX = 1.f - cellX;
if( (unsigned)icellY0 & (unsigned)ncells.height
(unsigned)icellY1 & (unsigned)ncells.height )
data = &pixData[rawBlockSize + (count2++)];
data-&histOfs[0] = (icellX1*ncells.height +
data-&histWeights[0] = cellX*(1.f - cellY);
data-&histOfs[1] = (icellX1*ncells.height +
data-&histWeights[1] = cellX*cellY;
data-&histOfs[2] = data-&histOfs[3] =
data-&histWeights[2] =
data-&histWeights[3] = 0;
data = &pixData[count1++];
if( (unsigned)icellY0 & (unsigned)ncells.height
icellY1 = icellY0;
cellY = 1.f - cellY;
data-&histOfs[0] = (icellX1*ncells.height +
data-&histWeights[0] = cellX*cellY;
data-&histOfs[1] = data-&histOfs[2] =
data-&histOfs[3] = 0;
data-&histWeights[1] =
data-&histWeights[2] =
data-&histWeights[3] = 0;
data-&gradOfs = (grad.cols*i + j)*2;
data-&qangleOfs = (qangle.cols*i + j)*2;
data-&gradWeight = weights(i,j);
assert( count1 + count2 + count4 == rawBlockSize
);//rawBlockSize=105*36=3780
// defragment
pixData,重新整理数据使其连贯存储
//由图1表示,内存中存储顺序为:1,4,13,16/2,3,5,8,9,12,14,15/6,7,10,11区域像素的信息
for( j = 0; j & count2; j++ )
pixData[j + count1] = pixData[j + rawBlockSize];
for( j = 0; j & count4; j++ )
pixData[j + count1 + count2] = pixData[j + rawBlockSize*2];
count2 += count1;
count4 += count2;
// 初始化blockData
for( j = 0; j & nblocks. j++ )
for( i = 0; i & nblocks. i++ )
BlockData& data = blockData[j*nblocks.height +
//histofs:hist off set,直方图信息在blockData中的偏置
data.histOfs = (j*nblocks.height + i)*blockHistogramS
data.imgOffset =
Point(j*blockStride.width,i*blockStride.height);
//buf:存储空间
//pt:block在parent img中的坐标,或偏置(左上角)
//只获取一个block中的信息:将256个像素的grad和angle信息变为36个bin的信息并保存
const float* HOGCache::getBlock(Point pt, float* buf)
float* blockHist =
assert(descriptor != 0);
Size blockSize = descriptor-&blockS
//imgoffset = _paddingTL;16,24,从parent img==&grad
CV_Assert( (unsigned)pt.x &= (unsigned)(grad.cols -
blockSize.width) &&
(unsigned)pt.y &= (unsigned)(grad.rows -
blockSize.height) );
//相关解释见init函数注释
if( useCache )
CV_Assert( pt.x % cacheStride.width == 0
pt.y % cacheStride.height == 0 );
Point cacheIdx(pt.x/cacheStride.width,
(pt.y/cacheStride.height) % blockCache.rows);
if( pt.y != ymaxCached[cacheIdx.y] )
Mat_&uchar& cacheRow =
blockCacheFlags.row(cacheIdx.y);
cacheRow = (uchar)0;
ymaxCached[cacheIdx.y] = pt.y;
blockHist =
&blockCache[cacheIdx.y][cacheIdx.x*blockHistogramSize];
uchar& computedFlag = blockCacheFlags(cacheIdx.y,
cacheIdx.x);
if( computedFlag != 0 )
return blockH
computedFlag = (uchar)1; // set it at once, before actual
int k, C1 = count1, C2 = count2, C4 = count4;
//pt.x*2由于是2通道,记录block左上角对应在grad.data和qangle.data中的位置
const float* gradPtr = (const float*)(grad.data + grad.step*pt.y) +
const uchar* qanglePtr = qangle.data + qangle.step*pt.y +
CV_Assert( blockHist != 0 );
//blockHistogramSize=36
for( k = 0; k & blockHistogramS k++ )
blockHist[k] = 0.f;
//pixData包含256个元素,blockData包含105个block
const PixData* _pixData = &pixData[0];
//遍历一个block中所有像素256个,以像素为单位取
//一个像素包含:gradofs,qangleofs,gradweight,histofs[4],histweight[4]
for( k = 0; k & C1; k++ )
const PixData& pk = _pixData[k];
const float* a = gradPtr + pk.gradOfs;//gradPtr起始地址,由不同输入Point
pt而变化,pk.gradOfs偏置
float w = pk.gradWeight*pk.histWeights[0];
const uchar* h = qanglePtr + pk.qangleO
int h0 = h[0], h1 = h[1];//h[0]为angle所在bin的位置0~8,hist[h0]表示第h0个bin其中存储的是相应的幅度与权重
float* hist = blockHist + pk.histOfs[0];//blockHist为buff的地址,histOfs即为偏置
float t0 = hist[h0] + a[0]*w;
float t1 = hist[h1] + a[1]*w;
hist[h0] = t0; hist[h1] = t1;
for( ; k & C2; k++ )
const PixData& pk = _pixData[k];
const float* a = gradPtr + pk.gradO
float w, t0, t1, a0 = a[0], a1 = a[1];
const uchar* h = qanglePtr + pk.qangleO
int h0 = h[0], h1 = h[1];
float* hist = blockHist + pk.histOfs[0];
w = pk.gradWeight*pk.histWeights[0];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
hist = blockHist + pk.histOfs[1];
w = pk.gradWeight*pk.histWeights[1];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
for( ; k & C4; k++ )
const PixData& pk = _pixData[k];
const float* a = gradPtr + pk.gradO
float w, t0, t1, a0 = a[0], a1 = a[1];
const uchar* h = qanglePtr + pk.qangleO
int h0 = h[0], h1 = h[1];
float* hist = blockHist + pk.histOfs[0];
w = pk.gradWeight*pk.histWeights[0];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
hist = blockHist + pk.histOfs[1];
w = pk.gradWeight*pk.histWeights[1];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
hist = blockHist + pk.histOfs[2];
w = pk.gradWeight*pk.histWeights[2];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
hist = blockHist + pk.histOfs[3];
w = pk.gradWeight*pk.histWeights[3];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
normalizeBlockHistogram(blockHist);
return blockH
void HOGCache::normalizeBlockHistogram(float* _hist) const
float* hist = &_hist[0];
size_t i, sz = blockHistogramS
float sum = 0;
for( i = 0; i & i++ )
sum += hist[i]*hist[i];
//为啥+sz*0.1=25.6??难道是实验经验??
float scale = 1.f/(std::sqrt(sum)+sz*0.1f);
float thresh =
(float)descriptor-&L2HysT//缺省值0.2
for( i = 0, sum = 0; i & i++ )
hist[i] = std::min(hist[i]*scale, thresh);//限制最大值为0.2
sum += hist[i]*hist[i];
//在归一化一遍,使得各项平方和为1,即单位化
scale = 1.f/(std::sqrt(sum)+1e-3f);
for( i = 0; i & i++ )
hist[i] *=
Size HOGCache::windowsInImage(Size imageSize, Size winStride)
return Size((imageSize.width - winSize.width)/winStride.width +
(imageSize.height - winSize.height)/winStride.height + 1);
//依据img大小,窗口移动步伐,即窗口序号得到窗口在img中的位置
Rect HOGCache::getWindow(Size imageSize, Size winStride, int idx)
int nwindowsX = (imageSize.width - winSize.width)/winStride.width +
int y = idx / nwindowsX;//会自动取整
int x = idx - nwindowsX*y;
return Rect( x*winStride.width, y*winStride.height, winSize.width,
winSize.height );
//img:待检测或计算的图像
//descriptors:Hog描述结构
//winStride:窗口移动步伐
//padding:扩充图像相关尺寸
//locations:对于正样本可以直接取(0,0),负样本为随机产生合理坐标范围内的点坐标
void HOGDescriptor::compute(const Mat& img,
vector&float&&
descriptors,
Size winStride, Size padding,
const vector&Point&&
locations) const
//若winStride.width=0,winStride.height=0,取(8,8)
if( winStride == Size() )
winStride = cellS
//gcd(a,b)可认为取小的
//默认的winStride=blockStride,暂时忽视
Size cacheStride(gcd(winStride.width, blockStride.width),
gcd(winStride.height, blockStride.height));
//正样本只有一个窗口,如果未扩充
//负样本按论文中所说会随机产生10副图,若未扩充则会有10个窗口
size_t nwindows = locations.size();
//alignSize(size_t sz, int n)
返回n的倍数中不小于sz的最小数,对padding.width进行修正
//由默认参数有cacheStride=blockStride=(8,8),padding.width=24,padding.height=16,所以也不需要修正,可忽视
padding.width = (int)alignSize(std::max(padding.width, 0),
cacheStride.width);
padding.height = (int)alignSize(std::max(padding.height, 0),
cacheStride.height);
Size paddedImgSize(img.cols + padding.width*2, img.rows +
padding.height*2);
//HOGCache(const HOGDescriptor* _descriptor,const
Mat& _img, Size _paddingTL, Size _paddingBR,bool
_useCache, Size _cacheStride)
//nwindows==0表示useCache=1
HOGCache cache(this, img, padding, padding, nwindows == 0,
cacheStride);
//当nwidows=0时扩充图像,之后再计算共有多少窗口area()=size.width*size.height,windowsInImage返回的是nwidth和nheight
//在检测时会有用,由于检测时是不知道要计算哪块区域的,所以需要对整副图像需要多少窗口
//训练时由于样本大小均为窗口大小,所以不需要额外存储block信息,则useCache=0,nwindows=1;
//检测时由于待检测图像大于检测窗口大小,所以需要额外存储重复的block信息,则useCache=1,需要重新计算nwindows
//detect函数中的useCache默认值为1,即检测时是需要额外存储block信息的
//compute函数中的useCache默认值为0,detect会调用compute,会改变useCache的值
if( !nwindows )
nwindows = cache.windowsInImage(paddedImgSize,
winStride).area();
const HOGCache::BlockData* blockData =
&cache.blockData[0];
int nblocks = cache.nblocks.area();
int blockHistogramSize = cache.blockHistogramS
size_t dsize =
getDescriptorSize();//一个窗口中特征向量大小:2*2*9*15*7=3780
descriptors.resize(dsize*nwindows);//注意到算法中样本大小为64*128,但实际上是有扩充的,实际特征向量还要乘上nwindows
//descriptor存储分nwindows段,每段又分nblocks=105段,每段又有36个bin
for( size_t i = 0; i & i++ )
float* descriptor = &descriptors[i*dsize];
Point pt0;
//locations.empty()为空返回1
//不为空时
if( !locations.empty() )
pt0 = locations[i];
if( pt0.x & -padding.width || pt0.x &
img.cols + padding.width - winSize.width ||
pt0.y & -padding.height || pt0.y &
img.rows + padding.height - winSize.height )
//为空时:
pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl() -
Point(padding);
CV_Assert(pt0.x % cacheStride.width == 0
&& pt0.y % cacheStride.height ==
for( int j = 0; j & j++
)//nblocks=105
const HOGCache::BlockData& bj = blockData[j];
//imgOffset =
Point(j*blockStride.width,i*blockStride.height),block在window中的位置
//pt0:为img在parent img中的位置,注意到getBlock(pt,dst)中pt就是指的在parent
img中的位置
Point pt = pt0 + bj.imgO
//histOfs=(j*nblocks.height +
i)*blockHistogramSize,nblocks.height=15
float* dst = descriptor + bj.histO
//dst只是该block的存储空间,pt表示该block在图中的位置,src才是计算后的直方图,将其赋值给dst
const float* src = cache.getBlock(pt, dst);
if( src != dst )
for( int k = 0; k & blockHistogramS k++
)//blockHistogramSize=36
dst[k] = src[k];
//hits:检测图像中存在目标的区域的坐标
//hitThreshold:为目标的阈值
//img:不要求为64*128
//处理固定尺度上目标的检测,detectMultiScale中Scale循环,每个循环中调用detect
void HOGDescriptor::detect(const Mat& img,
vector&Point&& hits,
double hitThreshold,
Size winStride, Size padding, const
vector&Point&&
locations) const
hits.clear();
if( svmDetector.empty() )
if( winStride == Size() )
winStride = cellS
Size cacheStride(gcd(winStride.width, blockStride.width),
gcd(winStride.height, blockStride.height));
size_t nwindows = locations.size();
padding.width = (int)alignSize(std::max(padding.width, 0),
cacheStride.width);
padding.height = (int)alignSize(std::max(padding.height, 0),
cacheStride.height);
Size paddedImgSize(img.cols + padding.width*2, img.rows +
padding.height*2);
HOGCache cache(this, img, padding, padding, nwindows == 0,
cacheStride);
if( !nwindows )
nwindows = cache.windowsInImage(paddedImgSize,
winStride).area();
const HOGCache::BlockData* blockData =
&cache.blockData[0];
int nblocks = cache.nblocks.area();
int blockHistogramSize = cache.blockHistogramS
size_t dsize = getDescriptorSize();
double rho = svmDetector.size() & dsize ?
svmDetector[dsize] : 0;
vector&float&
blockHist(blockHistogramSize);
for( size_t i = 0; i & i++ )
Point pt0;
if( !locations.empty() )
pt0 = locations[i];
if( pt0.x & -padding.width || pt0.x &
img.cols + padding.width - winSize.width ||
pt0.y & -padding.height || pt0.y &
img.rows + padding.height - winSize.height )
pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl() -
Point(padding);
CV_Assert(pt0.x % cacheStride.width == 0
&& pt0.y % cacheStride.height ==
double s =
const float* svmVec = &svmDetector[0];
for( j = 0; j & j++, svmVec +=
blockHistogramSize )
const HOGCache::BlockData& bj = blockData[j];
Point pt = pt0 + bj.imgO
const float* vec = cache.getBlock(pt,
&blockHist[0]);
//分两步,考虑到检测算子中的偏置
for( k = 0; k &= blockHistogramSize - 4; k += 4
s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] +
vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3];
for( ; k & blockHistogramS k++ )
s += vec[k]*svmVec[k];
if( s &= hitThreshold )
hits.push_back(pt0);
struct HOGThreadData
vector&Rect&
vector&Point&
Mat smallerImgB
void HOGDescriptor::detectMultiScale(
const Mat& img,
vector&Rect&&
foundLocations,
double hitThreshold, Size winStride, Size padding,
double scale0, int groupThreshold) const
double scale = 1.;
foundLocations.clear();
int i, levels = 0;
const int maxLevels = 64;
//getNumThreads得到线程最大数目
int t, nthreads = getNumThreads();
vector&HOGThreadData&
threadData(nthreads);
for( t = 0; t & t++ )
threadData[t].smallerImgBuf.create(img.size(), img.type());
vector&double&
levelScale(maxLevels);
//计算出最大层数,基本是将图像缩小,即认为样本尺度已经很小了,实际的行人只会大于样本尺寸,小于样本尺寸的行人无法检测
for( levels = 0; levels & maxL levels++
levelScale[levels] =
if( cvRound(img.cols/scale) & winSize.width ||
cvRound(img.rows/scale) & winSize.height ||
scale0 &= 1 )
scale *= scale0;
levels = std::max(levels, 1);
levelScale.resize(levels);
#ifdef _OPENMP
#pragma omp parallel for num_threads(nthreads)
schedule(dynamic)
#endif // _OPENMP
//外循环为尺度金字塔循环
for( i = 0; i & i++ )
//getThreadNum:得到OpenCV正在用的线程序号
HOGThreadData& tdata =
threadData[getThreadNum()];
double scale = levelScale[i];
Size sz(cvRound(img.cols/scale), cvRound(img.rows/scale));
Mat smallerImg(sz, img.type(), tdata.smallerImgBuf.data);
//缩小图像
if( sz == img.size() )
smallerImg = Mat(sz, img.type(), img.data, img.step);
resize(img, smallerImg, sz);
//每层的检测
detect(smallerImg, tdata.locations, hitThreshold, winStride,
Size scaledWinSize = Size(cvRound(winSize.width*scale),
cvRound(winSize.height*scale));
for( size_t j = 0; j & tdata.locations.size(); j++
tdata.rectangles.push_back(Rect(
cvRound(tdata.locations[j].x*scale),
cvRound(tdata.locations[j].y*scale),
scaledWinSize.width, scaledWinSize.height));
for( t = 0; t & t++ )
HOGThreadData& tdata = threadData[t];
//将tdata.rectagnles中的数据拷贝到foundLocation中
std::copy(tdata.rectangles.begin(), tdata.rectangles.end(),
std::back_inserter(foundLocations));
//从一群找到的矩形区域提取出一个,这里直接调用了函数,我们可以不细究
groupRectangles(foundLocations, groupThreshold, 0.2);
vector&float&
HOGDescriptor::getDefaultPeopleDetector()
static const float detector[] = {0,0};
return vector&float&(detector,
detector + sizeof(detector)/sizeof(detector[0]));
以上为HOG代码的注释与理解~不清楚的欢迎提问~有不对的地方,欢迎指出~
编辑太累啦~svm算法下次再说,其实可以看我的论文...
顺便上传以下我的毕业论文,若有人转载和引用请标明出处:
PS:论文里只针对张开的手进行了学习,接下来是采用取巧的办法,识别不同手势。其中张开的手势识别率挺高,应该有85%以上。其检测时间是在Debug下进行的,Release下应该快很多,当时忘记测试啦~
PPS:再写完论文后,对5个手势都进行了学习,每个手势都学习了一个检测算子,鲁棒性更强,对光照和背景的依赖性减小,但正确率也相应下降啦~达不到预期效果。
还想说的是:想利用HOG+SVM识别简单的物体是切实可行的,建议大家首先对HOG代码有一定了解。我在实际编写识别手势的代码时,将OpenCV的代码改写了许多的地方。
戴丹_浙江大学_毕业论文
(2.38 MiB) 被下载 3326 次
已投稿到:
以上网友发言只代表其个人观点,不代表新浪网的观点或立场。

我要回帖

更多关于 hog svm matlab 的文章

 

随机推荐