Skip to content

Commit cf4ca8f

Browse files
authored
Merge pull request #135 from yuxianzhi/patch-29
delete unused file
2 parents 12f107f + 0bc7832 commit cf4ca8f

File tree

4 files changed

+4
-261
lines changed

4 files changed

+4
-261
lines changed

compute/image/src/gpu/mali/cl/lut_yuvnv21_preprocess.cl

Lines changed: 0 additions & 48 deletions
This file was deleted.

compute/image/src/gpu/mali/cl/lut_yuvnv21_trilinear.cl

Lines changed: 0 additions & 138 deletions
This file was deleted.

compute/tensor/src/gpu/mali/cl/bilateral_slice_apply_c12.cl

Lines changed: 3 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -16,80 +16,9 @@
1616
/*these parameters are belong to matrix mult/add and conv*/
1717
/*they are extract from HDR model*/
1818
/*they may be changful for different model*/
19-
inline T guide_cal0(T3 v)
19+
inline T guide_cal(T3 v)
2020
{
21-
T3 tmp;
22-
tmp.x = v.x * (T)0.900616 - v.y * (T)0.1006 - v.z * (T)0.058384 + (T)0.072721;
23-
tmp.y = -v.x * (T)0.079311 + v.y * (T)0.91976 - v.z * (T)0.037624 + (T)0.124359;
24-
tmp.z = -v.x * (T)0.068347 - v.y * (T)0.069032 + v.z * (T)0.975032 + (T)0.129721;
25-
tmp = max(tmp, (T)0);
26-
tmp.x = tmp.x * (T)0.003211 * 16;
27-
tmp.y = tmp.y * (T)0.007948 * 16;
28-
tmp.z = tmp.z * (T)0.046259 * 16;
29-
T g = tmp.x * (T)0.249512 + tmp.y * (T)0.274577 + tmp.z * (T)0.324276 + (T)0.078941;
30-
return g;
31-
}
32-
33-
inline T guide_cal1(T3 v)
34-
{
35-
T4 a = {v.x, v.y, v.z, 1};
36-
37-
T4 wx = {0.9266905188560486, -0.07651382684707642, -0.11796596646308899, 0.03732128441333771};
38-
T4 wy = {0.016965966671705246, 1.0332931280136108, 0.09558156877756119, 0.049296945333480835};
39-
T4 wz = {-0.060142070055007935, -0.0184615608304739, 0.9641872048377991, 0.03588166460394859};
40-
41-
T x = dot(a, wx);
42-
T y = dot(a, wy);
43-
T z = dot(a, wz);
44-
45-
T16 sx = {-0.04031608998775482, 0.203898087143898, 0.21509018540382385, 0.2156994342803955,
46-
0.22189579904079437, 0.2710961699485779, 0.33060845732688904, 0.3510134816169739,
47-
0.3799624741077423, 0.4165642559528351, 0.5429311394691467, 0.6519719958305359,
48-
0.7579551339149475, 0.8117461800575256, 0.8115477561950684, 0.811525821685791};
49-
50-
T16 sy = {-0.04493796080350876, 0.2501078248023987, 0.24961410462856293, 0.24829524755477905,
51-
0.25029096007347107, 0.25275537371635437, 0.2535839378833771, 0.25915712118148804,
52-
0.992545485496521, 0.869307279586792, 0.8143411874771118, 0.8268355131149292,
53-
0.849763810634613, 0.8641695380210876, 0.8749480843544006, 0.9124495387077332};
54-
55-
T16 sz = {-0.0450710691511631, 0.17914339900016785, 0.20727036893367767, 0.21128158271312714,
56-
0.785589873790741, 0.40014126896858215, 0.39716723561286926, 0.4003089666366577,
57-
0.5749346613883972, 0.6277766227722168, 0.7884474992752075, 0.788446307182312,
58-
0.789533257484436, 0.7905913591384888, 0.7964500188827515, 0.7964839339256287};
59-
60-
T16 rx = max(x - sx, (T)0);
61-
T16 ry = max(x - sy, (T)0);
62-
T16 rz = max(z - sz, (T)0);
63-
64-
T16 mx = {0.9483454823493958, -0.02504969760775566, -0.0731356292963028, -0.08960649371147156,
65-
-0.0989985391497612, -0.0911787822842598, -0.07849951088428497, -0.07431424409151077,
66-
-0.05982533469796181, -0.027073463425040245, 0.09377846121788025, 0.07562971860170364,
67-
-0.05076618492603302, 0.2615104913711548, 0.42631882429122925, 0.6887183785438538};
68-
69-
T16 my = {0.9732255339622498, -0.03841959312558174, -0.07476486265659332, -0.08849595487117767,
70-
-0.10008298605680466, -0.10915014147758484, -0.1108635663986206, -0.09364574402570724,
71-
-0.04355158284306526, -0.015994733199477196, -0.025348246097564697, -0.051913388073444366,
72-
-0.07183714956045151, -0.0823502317070961, -0.09460879862308502, -0.13453315198421478};
73-
74-
T16 mz = {0.951180636882782, -0.014929438941180706, -0.022745108231902122, -0.042111292481422424,
75-
0.061638616025447845, -0.04308458790183067, -0.050973013043403625, -0.045611534267663956,
76-
0.037990815937519073, 0.04962018504738808, 0.15617141127586365, 0.13662904500961304,
77-
0.16109246015548706, 0.160025492310524, 0.12079561501741409, 0.15001150965690613};
78-
79-
x = dot(rx.s0123, mx.s0123) + dot(rx.s4567, mx.s4567) + dot(rx.s89ab, mx.s89ab) +
80-
dot(rx.scdef, mx.scdef);
81-
y = dot(ry.s0123, my.s0123) + dot(ry.s4567, my.s4567) + dot(ry.s89ab, my.s89ab) +
82-
dot(ry.scdef, my.scdef);
83-
z = dot(rz.s0123, mz.s0123) + dot(rz.s4567, mz.s4567) + dot(rz.s89ab, mz.s89ab) +
84-
dot(rz.scdef, mz.scdef);
85-
86-
T4 t1 = {x, y, z, 1};
87-
T4 w = {0.28540247678756714, 0.31782254576683044, 0.28381019830703735, 0.06326253712177277};
88-
T g = dot(t1, w);
89-
90-
g = min(max((T)0., g), (T)1.);
91-
92-
return g;
21+
return v.x;
9322
}
9423

9524
__kernel void KERNEL_NAME
@@ -135,7 +64,7 @@ __kernel void KERNEL_NAME
13564

13665
T gx = (x + (T)0.5) * (T)scale_x;
13766
#if defined(CONV)
138-
T gz = guide_cal1(in_val);
67+
T gz = guide_cal(in_val);
13968
#else
14069
T gz = guide[in_off];
14170
#endif

docs/CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ and this project adheres to [Semantic Versioning](<https://semver.org/spec/v2.0.
1616
- Support Intel Desktop GPU (float16 and float32)
1717
- Support Windows on arm platform
1818
- Support more operators : Random, Sin, Cos, Einsum, Elu, UnPooling, Flatten, ConvertColor, BilateralSliceApply, Lut
19-
- Support more networks : ViTAE, CMT, EfficientFormer, hdrnet, 3dLut, ConvTT, Wenet, NFM, AFM, ONN, wide&deep, DeepFM, MMOE, etc
19+
- Support more networks : ViTAE, CMT, EfficientFormer, ConvTT, Wenet, NFM, AFM, ONN, wide&deep, DeepFM, MMOE, etc
2020
- Improve multi-threads parallel inference performance on CPU
2121
- Add simple chinese deployment guide
2222
- Support model file compatibility

0 commit comments

Comments
 (0)