Skip to content

Commit 65fc9be

Browse files
committedJul 16, 2019
Add speed_sobel_edgedetection_sse.cpp
1 parent b2befd2 commit 65fc9be

File tree

2 files changed

+253
-0
lines changed

2 files changed

+253
-0
lines changed
 

‎README.md

+7
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,10 @@
3939
|浮点数改成整形运算+单线程|4032*3024|36.30ms|
4040
|SSE优化+单线程|4032*3024|8.72ms|
4141

42+
- speed_sobel_edgedetection_sse.cpp 使用SSE加速Sobel边缘检测算法,加速幅度巨大,算法原理请看:https://www.cnblogs.com/Imageshop/p/7285564.html 。速度测试结果如下:
43+
44+
|优化方式|图像分辨率 |速度|
45+
|---------|----------|-------|
46+
|C语言实现+单线程|4032*3024||
47+
|浮点数改成整形运算+单线程|4032*3024||
48+
|SSE优化+单线程|4032*3024||

‎speed_sobel_edgedetection_sse.cpp

+246
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
#include <stdio.h>
2+
#include <opencv2/opencv.hpp>
3+
4+
using namespace std;
5+
using namespace cv;
6+
7+
inline unsigned char IM_ClampToByte(int Value)
8+
{
9+
if (Value < 0)
10+
return 0;
11+
else if (Value > 255)
12+
return 255;
13+
else
14+
return (unsigned char)Value;
15+
//return ((Value | ((signed int)(255 - Value) >> 31)) & ~((signed int)Value >> 31));
16+
}
17+
18+
void Sobel_FLOAT(unsigned char *Src, unsigned char *Dest, int Width, int Height, int Stride) {
19+
int Channel = Stride / Width;
20+
unsigned char *RowCopy = (unsigned char*)malloc((Width + 2) * 3 * Channel);
21+
unsigned char *First = RowCopy;
22+
unsigned char *Second = RowCopy + (Width + 2) * Channel;
23+
unsigned char *Third = RowCopy + (Width + 2) * 2 * Channel;
24+
//拷贝第二行数据,边界值填充
25+
memcpy(Second, Src, Channel);
26+
memcpy(Second + Channel, Src, Width*Channel);
27+
memcpy(Second + (Width + 1)*Channel, Src + (Width - 1)*Channel, Channel);
28+
//第一行和第二行一样
29+
memcpy(First, Second, (Width + 2) * Channel);
30+
//拷贝第三行数据,边界值填充
31+
memcpy(Third, Src + Stride, Channel);
32+
memcpy(Third + Channel, Src + Stride, Width * Channel);
33+
memcpy(Third + (Width + 1) * Channel, Src + Stride + (Width - 1) * Channel, Channel);
34+
35+
for (int Y = 0; Y < Height; Y++) {
36+
unsigned char *LinePS = Src + Y * Stride;
37+
unsigned char *LinePD = Dest + Y * Stride;
38+
if (Y != 0) {
39+
unsigned char *Temp = First;
40+
First = Second;
41+
Second = Third;
42+
Third = Temp;
43+
}
44+
if (Y == Height - 1) {
45+
memcpy(Third, Second, (Width + 2) * Channel);
46+
}
47+
else {
48+
memcpy(Third, Src + (Y + 1) * Stride, Channel);
49+
memcpy(Third + Channel, Src + (Y + 1) * Stride, Width * Channel); // 由于备份了前面一行的数据,这里即使Src和Dest相同也是没有问题的
50+
memcpy(Third + (Width + 1) * Channel, Src + (Y + 1) * Stride + (Width - 1) * Channel, Channel);
51+
}
52+
if (Channel == 1) {
53+
for (int X = 0; X < Width; X++)
54+
{
55+
int GX = First[X] - First[X + 2] + (Second[X] - Second[X + 2]) * 2 + Third[X] - Third[X + 2];
56+
int GY = First[X] + First[X + 2] + (First[X + 1] - Third[X + 1]) * 2 - Third[X] - Third[X + 2];
57+
LinePD[X] = IM_ClampToByte(sqrtf(GX * GX + GY * GY + 0.0F));
58+
}
59+
}
60+
else
61+
{
62+
for (int X = 0; X < Width * 3; X++)
63+
{
64+
int GX = First[X] - First[X + 6] + (Second[X] - Second[X + 6]) * 2 + Third[X] - Third[X + 6];
65+
int GY = First[X] + First[X + 6] + (First[X + 3] - Third[X + 3]) * 2 - Third[X] - Third[X + 6];
66+
LinePD[X] = IM_ClampToByte(sqrtf(GX * GX + GY * GY + 0.0F));
67+
}
68+
}
69+
}
70+
free(RowCopy);
71+
}
72+
73+
void Sobel_INT(unsigned char *Src, unsigned char *Dest, int Width, int Height, int Stride) {
74+
int Channel = Stride / Width;
75+
unsigned char *RowCopy = (unsigned char*)malloc((Width + 2) * 3 * Channel);
76+
unsigned char *First = RowCopy;
77+
unsigned char *Second = RowCopy + (Width + 2) * Channel;
78+
unsigned char *Third = RowCopy + (Width + 2) * 2 * Channel;
79+
//拷贝第二行数据,边界值填充
80+
memcpy(Second, Src, Channel);
81+
memcpy(Second + Channel, Src, Width*Channel);
82+
memcpy(Second + (Width + 1)*Channel, Src + (Width - 1)*Channel, Channel);
83+
//第一行和第二行一样
84+
memcpy(First, Second, (Width + 2) * Channel);
85+
//拷贝第三行数据,边界值填充
86+
memcpy(Third, Src + Stride, Channel);
87+
memcpy(Third + Channel, Src + Stride, Width * Channel);
88+
memcpy(Third + (Width + 1) * Channel, Src + Stride + (Width - 1) * Channel, Channel);
89+
90+
unsigned char Table[65026];
91+
for (int Y = 0; Y < 65026; Y++) Table[Y] = (sqrtf(Y + 0.0f) + 0.5f);
92+
for (int Y = 0; Y < Height; Y++) {
93+
unsigned char *LinePS = Src + Y * Stride;
94+
unsigned char *LinePD = Dest + Y * Stride;
95+
if (Y != 0) {
96+
unsigned char *Temp = First;
97+
First = Second;
98+
Second = Third;
99+
Third = Temp;
100+
}
101+
if (Y == Height - 1) {
102+
memcpy(Third, Second, (Width + 2) * Channel);
103+
}
104+
else {
105+
memcpy(Third, Src + (Y + 1) * Stride, Channel);
106+
memcpy(Third + Channel, Src + (Y + 1) * Stride, Width * Channel); // 由于备份了前面一行的数据,这里即使Src和Dest相同也是没有问题的
107+
memcpy(Third + (Width + 1) * Channel, Src + (Y + 1) * Stride + (Width - 1) * Channel, Channel);
108+
}
109+
if (Channel == 1) {
110+
for (int X = 0; X < Width; X++)
111+
{
112+
int GX = First[X] - First[X + 2] + (Second[X] - Second[X + 2]) * 2 + Third[X] - Third[X + 2];
113+
int GY = First[X] + First[X + 2] + (First[X + 1] - Third[X + 1]) * 2 - Third[X] - Third[X + 2];
114+
LinePD[X] = Table[min(GX * GX + GY * GY, 65025)];
115+
}
116+
}
117+
else
118+
{
119+
for (int X = 0; X < Width * 3; X++)
120+
{
121+
int GX = First[X] - First[X + 6] + (Second[X] - Second[X + 6]) * 2 + Third[X] - Third[X + 6];
122+
int GY = First[X] + First[X + 6] + (First[X + 3] - Third[X + 3]) * 2 - Third[X] - Third[X + 6];
123+
LinePD[X] = Table[min(GX * GX + GY * GY, 65025)];
124+
}
125+
}
126+
}
127+
free(RowCopy);
128+
}
129+
130+
void Sobel_SSE(unsigned char *Src, unsigned char *Dest, int Width, int Height, int Stride) {
131+
int Channel = Stride / Width;
132+
unsigned char *RowCopy = (unsigned char*)malloc((Width + 2) * 3 * Channel);
133+
unsigned char *First = RowCopy;
134+
unsigned char *Second = RowCopy + (Width + 2) * Channel;
135+
unsigned char *Third = RowCopy + (Width + 2) * 2 * Channel;
136+
//拷贝第二行数据,边界值填充
137+
memcpy(Second, Src, Channel);
138+
memcpy(Second + Channel, Src, Width*Channel);
139+
memcpy(Second + (Width + 1)*Channel, Src + (Width - 1)*Channel, Channel);
140+
//第一行和第二行一样
141+
memcpy(First, Second, (Width + 2) * Channel);
142+
//拷贝第三行数据,边界值填充
143+
memcpy(Third, Src + Stride, Channel);
144+
memcpy(Third + Channel, Src + Stride, Width * Channel);
145+
memcpy(Third + (Width + 1) * Channel, Src + Stride + (Width - 1) * Channel, Channel);
146+
147+
int BlockSize = 8, Block = (Width * Channel) / BlockSize;
148+
149+
unsigned char Table[65026];
150+
for (int Y = 0; Y < 65026; Y++) Table[Y] = (sqrtf(Y + 0.0f) + 0.5f);
151+
for (int Y = 0; Y < Height; Y++) {
152+
unsigned char *LinePS = Src + Y * Stride;
153+
unsigned char *LinePD = Dest + Y * Stride;
154+
if (Y != 0) {
155+
unsigned char *Temp = First;
156+
First = Second;
157+
Second = Third;
158+
Third = Temp;
159+
}
160+
if (Y == Height - 1) {
161+
memcpy(Third, Second, (Width + 2) * Channel);
162+
}
163+
else {
164+
memcpy(Third, Src + (Y + 1) * Stride, Channel);
165+
memcpy(Third + Channel, Src + (Y + 1) * Stride, Width * Channel); // 由于备份了前面一行的数据,这里即使Src和Dest相同也是没有问题的
166+
memcpy(Third + (Width + 1) * Channel, Src + (Y + 1) * Stride + (Width - 1) * Channel, Channel);
167+
}
168+
if (Channel == 1) {
169+
for (int X = 0; X < Width; X++)
170+
{
171+
int GX = First[X] - First[X + 2] + (Second[X] - Second[X + 2]) * 2 + Third[X] - Third[X + 2];
172+
int GY = First[X] + First[X + 2] + (First[X + 1] - Third[X + 1]) * 2 - Third[X] - Third[X + 2];
173+
//LinePD[X] = Table[min(GX * GX + GY * GY, 65025)];
174+
}
175+
}
176+
else
177+
{
178+
__m128i Zero = _mm_setzero_si128();
179+
for (int X = 0; X < Block * BlockSize; X += BlockSize)
180+
{
181+
__m128i FirstP0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(First + X)), Zero);
182+
__m128i FirstP1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(First + X + 3)), Zero);
183+
__m128i FirstP2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(First + X + 6)), Zero);
184+
185+
__m128i SecondP0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(Second + X)), Zero);
186+
__m128i SecondP2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(Second + X + 6)), Zero);
187+
188+
__m128i ThirdP0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(Third + X)), Zero);
189+
__m128i ThirdP1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(Third + X + 3)), Zero);
190+
__m128i ThirdP2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(Third + X + 6)), Zero);
191+
192+
__m128i GX16 = _mm_abs_epi16(_mm_add_epi16(_mm_add_epi16(_mm_sub_epi16(FirstP0, FirstP2), _mm_slli_epi16(_mm_sub_epi16(SecondP0, SecondP2), 1)), _mm_sub_epi16(ThirdP0, ThirdP2)));
193+
__m128i GY16 = _mm_abs_epi16(_mm_sub_epi16(_mm_add_epi16(_mm_add_epi16(FirstP0, FirstP2), _mm_slli_epi16(_mm_sub_epi16(FirstP1, ThirdP1), 1)), _mm_add_epi16(ThirdP0, ThirdP2)));
194+
195+
__m128i GXYL = _mm_unpacklo_epi16(GX16, GY16);
196+
__m128i GXYH = _mm_unpackhi_epi16(GX16, GY16);
197+
198+
__m128i ResultL = _mm_cvtps_epi32(_mm_sqrt_ps(_mm_cvtepi32_ps(_mm_madd_epi16(GXYL, GXYL))));
199+
__m128i ResultH = _mm_cvtps_epi32(_mm_sqrt_ps(_mm_cvtepi32_ps(_mm_madd_epi16(GXYH, GXYH))));
200+
201+
/*__m128i GX32L = _mm_unpacklo_epi16(GX16, Zero);
202+
__m128i GX32H = _mm_unpackhi_epi16(GX16, Zero);
203+
__m128i GY32L = _mm_unpacklo_epi16(GY16, Zero);
204+
__m128i GY32H = _mm_unpackhi_epi16(GY16, Zero);
205+
206+
__m128i ResultL = _mm_cvtps_epi32(_mm_sqrt_ps(_mm_cvtepi32_ps(_mm_add_epi32(_mm_mullo_epi32(GX32L, GX32L), _mm_mullo_epi32(GY32L, GY32L)))));
207+
__m128i ResultH = _mm_cvtps_epi32(_mm_sqrt_ps(_mm_cvtepi32_ps(_mm_add_epi32(_mm_mullo_epi32(GX32H, GX32H), _mm_mullo_epi32(GY32H, GY32H)))));
208+
209+
_mm_storel_epi64((__m128i *)(LinePD + X), _mm_packus_epi16(_mm_packus_epi32(ResultL, ResultH), Zero));*/
210+
_mm_storel_epi64((__m128i *)(LinePD + X), _mm_packus_epi16(_mm_packus_epi32(ResultL, ResultH), Zero));
211+
}
212+
213+
for (int X = Block * BlockSize; X < Width * 3; X++)
214+
{
215+
int GX = First[X] - First[X + 6] + (Second[X] - Second[X + 6]) * 2 + Third[X] - Third[X + 6];
216+
int GY = First[X] + First[X + 6] + (First[X + 3] - Third[X + 3]) * 2 - Third[X] - Third[X + 6];
217+
LinePD[X] = IM_ClampToByte(sqrtf(GX * GX + GY * GY + 0.0F));
218+
}
219+
}
220+
}
221+
free(RowCopy);
222+
}
223+
224+
int main() {
225+
Mat src = imread("F:\\car.jpg");
226+
int Height = src.rows;
227+
int Width = src.cols;
228+
unsigned char *Src = src.data;
229+
unsigned char *Dest = new unsigned char[Height * Width * 3];
230+
int Stride = Width * 3;
231+
int Radius = 11;
232+
int Adjustment = 50;
233+
int64 st = cvGetTickCount();
234+
for (int i = 0; i <50; i++) {
235+
Sobel_SSE(Src, Dest, Width, Height, Stride);
236+
}
237+
double duration = (cv::getTickCount() - st) / cv::getTickFrequency() * 20;
238+
printf("%.5f\n", duration);
239+
Sobel_SSE(Src, Dest, Width, Height, Stride);
240+
Mat dst(Height, Width, CV_8UC3, Dest);
241+
imshow("origin", src);
242+
imshow("result", dst);
243+
imwrite("F:\\res.jpg", dst);
244+
waitKey(0);
245+
waitKey(0);
246+
}

0 commit comments

Comments
 (0)
Please sign in to comment.