Skip to content

Commit a30297f

Browse files
Speeding up no X86 processors.
1 parent ecb8300 commit a30297f

File tree

1 file changed

+142
-65
lines changed

1 file changed

+142
-65
lines changed

neural/neuralvolume.pas

+142-65
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ TVolume = class(TObject)
111111
procedure SetTag(I: integer); {$IFDEF Release} inline; {$ENDIF}
112112
function GetTags(x: integer): integer; {$IFDEF Release} inline; {$ENDIF}
113113
procedure SetTags(x: integer; AValue: integer); {$IFDEF Release} inline; {$ENDIF}
114+
class procedure MulAddPPVS(PtrA, PtrB: TNeuralFloatArrPtr; Value: T;
115+
pSize: integer); {$IFDEF Release} inline; {$ENDIF}
114116
public
115117
// FData was made public to allow other fast operations
116118
FData: array of T;
@@ -163,6 +165,7 @@ TVolume = class(TObject)
163165
procedure VSqrt(); {$IFDEF Release} inline; {$ENDIF}
164166
procedure MulAdd(Value: T; Original: TVolume); overload; {$IFDEF Release} inline; {$ENDIF}
165167
procedure MulMulAdd(Value1, Value2: T; Original: TVolume); overload; {$IFDEF Release} inline; {$ENDIF}
168+
class procedure MulMulAdd(PtrA, PtrB: TNeuralFloatArrPtr; Value1, Value2: T; pSize: integer); overload; {$IFDEF Release} inline; {$ENDIF}
166169
procedure MulAdd(Value: T; PtrB: TNeuralFloatArrPtr); overload; {$IFDEF Release} inline; {$ENDIF}
167170
procedure MulAdd(Original1, Original2: TVolume); overload; {$IFDEF Release} inline; {$ENDIF}
168171
class procedure MulAdd(PtrA, PtrB: TNeuralFloatArrPtr; Value: T; pSize: integer); overload; {$IFDEF Release} inline; {$ENDIF}
@@ -2603,6 +2606,73 @@ procedure TVolume.InterleaveWithDepthFrom(Original: TVolume; NewDepth: integer);
26032606
end;
26042607
end;
26052608
end;
2609+
(*
2610+
// this is a new version to be validated.
2611+
var
2612+
NewX: integer;
2613+
I: integer;
2614+
vHigh: integer;
2615+
posX, posD, maxPosX: integer;
2616+
NewDepth2, NewDepth3, NewDepth4, vHighM4: integer;
2617+
SourcePtr, DestPtr: TNeuralFloatPtr;
2618+
begin
2619+
NewX := Original.FSize div NewDepth;
2620+
Resize(NewX,1,NewDepth);
2621+
NewDepth2 := NewDepth + NewDepth;
2622+
NewDepth3 := NewDepth2 + NewDepth;
2623+
NewDepth4 := NewDepth3 + NewDepth;
2624+
2625+
vHigh := High(FData);
2626+
vHighM4 := vHigh - 4;
2627+
2628+
posX := 0;
2629+
posD := 0;
2630+
2631+
maxPosX := NewX * NewDepth;
2632+
2633+
SourcePtr := Addr(Original.FData[0]);
2634+
DestPtr := Addr(FData[posX + posD]);
2635+
2636+
//for I := 0 to vHigh do
2637+
I := 0;
2638+
while I <= vHigh do
2639+
begin
2640+
//posX := I mod NewX;
2641+
//posD := I div NewX;
2642+
//Self.Data[posX, 0, posD] := Original.FData[I];
2643+
while ( (I<vHighM4) and (posX + NewDepth4 < maxPosX) ) do
2644+
begin
2645+
(DestPtr )^ := (SourcePtr)^;
2646+
(DestPtr + NewDepth )^ := (SourcePtr+1)^;
2647+
(DestPtr + NewDepth2)^ := (SourcePtr+2)^;
2648+
(DestPtr + NewDepth3)^ := (SourcePtr+3)^;
2649+
Inc(I, 4);
2650+
Inc(posX, NewDepth4);
2651+
Inc(SourcePtr,4);
2652+
Inc(DestPtr, NewDepth4);
2653+
end;
2654+
2655+
(DestPtr)^ := (SourcePtr)^;
2656+
Inc(SourcePtr, 1);
2657+
Inc(posX, NewDepth);
2658+
Inc(I);
2659+
2660+
if I <= vHigh then
2661+
begin
2662+
if posX >= maxPosX then
2663+
begin
2664+
posX := 0;
2665+
posD := posD + 1;
2666+
DestPtr := Addr(FData[posX + posD]);
2667+
end
2668+
else
2669+
begin
2670+
Inc(DestPtr, NewDepth);
2671+
end;
2672+
end;
2673+
end;
2674+
end;
2675+
*)
26062676

26072677
procedure TVolume.InterleaveWithXFrom(Original: TVolume; NewX: integer);
26082678
begin
@@ -2754,40 +2824,29 @@ procedure TVolume.MulAdd(Value: T; Original: TVolume);
27542824
end;
27552825

27562826
procedure TVolume.MulMulAdd(Value1, Value2: T; Original: TVolume);
2757-
var
2758-
I: integer;
2759-
vHigh: integer;
27602827
begin
2761-
vHigh := High(FData);
2762-
for I := 0 to vHigh do
2763-
FData[I] := FData[I]*Value1 + Original.FData[I]*Value2;
2828+
MulMulAdd(Addr(Self.FData[0]), Addr(Original.FData[0]), Value1, Value2, Self.Size);
27642829
end;
27652830

27662831
procedure TVolume.MulAdd(Value: T; PtrB: TNeuralFloatArrPtr);
2767-
var
2768-
I: integer;
2769-
vHigh: integer;
27702832
begin
2771-
vHigh := High(FData);
2772-
for I := 0 to vHigh do
2773-
{$IFDEF FPC}
2774-
FData[I] += PtrB^[I]*Value;
2775-
{$ELSE}
2776-
FData[I] := FData[I] + PtrB^[I]*Value;
2777-
{$ENDIF}
2833+
MulAddPPVS(TNeuralFloatArrPtr(Addr(Self.FData[0])), PtrB, Value, Self.Size);
27782834
end;
27792835

27802836
procedure TVolume.MulAdd(Original1, Original2: TVolume);
2781-
var
2782-
I: integer;
2783-
vHigh: integer;
27842837
begin
2785-
vHigh := High(FData);
2786-
for I := 0 to vHigh do
2787-
FData[I] := FData[I] + Original1.FData[I] * Original2.FData[I];
2838+
{$IFDEF Debug}
2839+
if Original1.Size <> Self.Size then
2840+
raise Exception.Create('Sizes don''t match at MulAdd parameter 1: ' +
2841+
IntToStr(Self.Size) + ' and ' + IntToStr(Original1.Size) + ' .');
2842+
if Original2.Size <> Self.Size then
2843+
raise Exception.Create('Sizes don''t match at MulAdd parameter 2: ' +
2844+
IntToStr(Self.Size) + ' and ' + IntToStr(Original2.Size) + ' .');
2845+
{$ENDIF}
2846+
MulAdd(Addr(Self.FData[0]), Addr(Original1.FData[0]), Addr(Original2.FData[0]), Self.Size);
27882847
end;
27892848

2790-
class procedure TVolume.MulAdd(PtrA, PtrB: TNeuralFloatArrPtr; Value: T;
2849+
class procedure TVolume.MulAddPPVS(PtrA, PtrB: TNeuralFloatArrPtr; Value: T;
27912850
pSize: integer);
27922851
var
27932852
I: integer;
@@ -2796,10 +2855,11 @@ class procedure TVolume.MulAdd(PtrA, PtrB: TNeuralFloatArrPtr; Value: T;
27962855
AddrA, AddrB: TNeuralFloatPtr;
27972856
begin
27982857
BasePos := 0;
2799-
AddrA := pointer(PtrA);
2800-
AddrB := pointer(PtrB);
28012858
vHigh := pSize - 1;
28022859

2860+
{$IFDEF FPC}
2861+
AddrA := pointer(PtrA);
2862+
AddrB := pointer(PtrB);
28032863
while BasePos <= vHigh - 7 do
28042864
begin
28052865
(AddrA)^ := (AddrA)^ + (AddrB)^ * Value;
@@ -2825,6 +2885,7 @@ class procedure TVolume.MulAdd(PtrA, PtrB: TNeuralFloatArrPtr; Value: T;
28252885
AddrA := AddrA + 4;
28262886
AddrB := AddrB + 4;
28272887
end;
2888+
{$ENDIF}
28282889

28292890
if BasePos <= vHigh then for I := BasePos to vHigh do
28302891
{$IFDEF FPC}
@@ -2834,6 +2895,56 @@ class procedure TVolume.MulAdd(PtrA, PtrB: TNeuralFloatArrPtr; Value: T;
28342895
{$ENDIF}
28352896
end;
28362897

2898+
class procedure TVolume.MulMulAdd(PtrA, PtrB: TNeuralFloatArrPtr; Value1,
2899+
Value2: T; pSize: integer);
2900+
var
2901+
I: integer;
2902+
vHigh: integer;
2903+
BasePos: integer;
2904+
AddrA, AddrB: TNeuralFloatPtr;
2905+
begin
2906+
BasePos := 0;
2907+
vHigh := pSize - 1;
2908+
{$IFDEF FPC}
2909+
AddrA := pointer(PtrA);
2910+
AddrB := pointer(PtrB);
2911+
while BasePos <= vHigh - 7 do
2912+
begin
2913+
(AddrA)^ := (AddrA)^ * Value1 + (AddrB)^ * Value2;
2914+
(AddrA+1)^ := (AddrA+1)^ * Value1 + (AddrB+1)^ * Value2;
2915+
(AddrA+2)^ := (AddrA+2)^ * Value1 + (AddrB+2)^ * Value2;
2916+
(AddrA+3)^ := (AddrA+3)^ * Value1 + (AddrB+3)^ * Value2;
2917+
(AddrA+4)^ := (AddrA+4)^ * Value1 + (AddrB+4)^ * Value2;
2918+
(AddrA+5)^ := (AddrA+5)^ * Value1 + (AddrB+5)^ * Value2;
2919+
(AddrA+6)^ := (AddrA+6)^ * Value1 + (AddrB+6)^ * Value2;
2920+
(AddrA+7)^ := (AddrA+7)^ * Value1 + (AddrB+7)^ * Value2;
2921+
BasePos := BasePos + 8;
2922+
AddrA := AddrA + 8;
2923+
AddrB := AddrB + 8;
2924+
end;
2925+
2926+
while BasePos <= vHigh - 3 do
2927+
begin
2928+
(AddrA)^ := (AddrA)^ * Value1 + (AddrB)^ * Value2;
2929+
(AddrA+1)^ := (AddrA+1)^ * Value1 + (AddrB+1)^ * Value2;
2930+
(AddrA+2)^ := (AddrA+2)^ * Value1 + (AddrB+2)^ * Value2;
2931+
(AddrA+3)^ := (AddrA+3)^ * Value1 + (AddrB+3)^ * Value2;
2932+
BasePos := BasePos + 4;
2933+
AddrA := AddrA + 4;
2934+
AddrB := AddrB + 4;
2935+
end;
2936+
{$ENDIF}
2937+
if BasePos <= vHigh then for I := BasePos to vHigh do
2938+
PtrA^[I] := PtrA^[I] * Value1 + PtrB^[I] * Value2;
2939+
end;
2940+
2941+
2942+
class procedure TVolume.MulAdd(PtrA, PtrB: TNeuralFloatArrPtr; Value: T;
2943+
pSize: integer);
2944+
begin
2945+
Self.MulAddPPVS(PtrA, PtrB, Value, pSize);
2946+
end;
2947+
28372948
class procedure TVolume.MulAdd(PtrA, PtrB, PtrC: TNeuralFloatArrPtr;
28382949
pSize: integer);
28392950
var
@@ -2847,7 +2958,7 @@ class procedure TVolume.MulAdd(PtrA, PtrB, PtrC: TNeuralFloatArrPtr;
28472958
AddrB := pointer(PtrB);
28482959
AddrC := pointer(PtrC);
28492960
vHigh := pSize - 1;
2850-
2961+
{$IFDEF FPC}
28512962
while BasePos <= vHigh - 7 do
28522963
begin
28532964
(AddrA)^ := (AddrA)^ + (AddrB)^ * (AddrC)^;
@@ -2875,7 +2986,7 @@ class procedure TVolume.MulAdd(PtrA, PtrB, PtrC: TNeuralFloatArrPtr;
28752986
AddrB := AddrB + 4;
28762987
AddrC := AddrC + 4;
28772988
end;
2878-
2989+
{$ENDIF}
28792990
if BasePos <= vHigh then for I := BasePos to vHigh do
28802991
{$IFDEF FPC}
28812992
PtrA^[I] += PtrB^[I]*PtrC^[I];
@@ -3392,46 +3503,13 @@ procedure TVolume.CopyResizing(Original: TVolume; NewSizeX, NewSizeY: integer);
33923503
end;
33933504

33943505
function TVolume.DotProduct(Original: TVolume): T;
3395-
var
3396-
I: integer;
3397-
vHigh: integer;
3398-
BasePos: integer;
33993506
begin
34003507
{$IFDEF Debug}
34013508
if Original.Size <> Self.Size then
34023509
raise Exception.Create('Sizes don''t match at DotProduct: ' +
34033510
IntToStr(Self.Size) + ' and ' + IntToStr(Original.Size) + ' .');
34043511
{$ENDIF}
3405-
Result := 0;
3406-
vHigh := High(FData);
3407-
BasePos := 0;
3408-
3409-
while BasePos <= vHigh - 7 do
3410-
begin
3411-
Result := Result +
3412-
FData[BasePos] * Original.FData[BasePos] +
3413-
FData[BasePos+1] * Original.FData[BasePos+1] +
3414-
FData[BasePos+2] * Original.FData[BasePos+2] +
3415-
FData[BasePos+3] * Original.FData[BasePos+3] +
3416-
FData[BasePos+4] * Original.FData[BasePos+4] +
3417-
FData[BasePos+5] * Original.FData[BasePos+5] +
3418-
FData[BasePos+6] * Original.FData[BasePos+6] +
3419-
FData[BasePos+7] * Original.FData[BasePos+7];
3420-
BasePos := BasePos + 8;
3421-
end;
3422-
3423-
while BasePos <= vHigh - 3 do
3424-
begin
3425-
Result := Result +
3426-
FData[BasePos] * Original.FData[BasePos] +
3427-
FData[BasePos+1] * Original.FData[BasePos+1] +
3428-
FData[BasePos+2] * Original.FData[BasePos+2] +
3429-
FData[BasePos+3] * Original.FData[BasePos+3];
3430-
BasePos := BasePos + 4;
3431-
end;
3432-
3433-
if BasePos <= vHigh then for I := BasePos to vHigh do
3434-
Result := Result + FData[I] * Original.FData[I];
3512+
Result := Self.DotProduct(Addr(Self.FData[0]), Addr(Original.FData[0]), Self.Size);
34353513
end;
34363514

34373515
function TVolume.SumDiff(Original: TVolume): T;
@@ -3460,11 +3538,9 @@ procedure TVolume.DebugDiff(Original: TVolume; Limit: Single);
34603538
vHigh: integer;
34613539
AuxDiff: Single;
34623540
begin
3463-
{$IFDEF Debug}
34643541
if Original.Size <> Self.Size then
3465-
raise Exception.Create('Sizes don''t match at SumDiff: ' +
3542+
raise Exception.Create('Sizes don''t match at DebugDiff: ' +
34663543
IntToStr(Self.Size) + ' and ' + IntToStr(Original.Size) + ' .');
3467-
{$ENDIF}
34683544
vHigh := High(FData);
34693545
for I := 0 to vHigh do
34703546
begin
@@ -8607,9 +8683,9 @@ class function TVolume.DotProduct(PtrA, PtrB: TNeuralFloatArrPtr; NumElements: i
86078683
Result := 0;
86088684
BasePos := 0;
86098685
vHigh := NumElements - 1;
8686+
{$IFDEF FPC}
86108687
AddrA := pointer(PtrA);
86118688
AddrB := pointer(PtrB);
8612-
86138689
while BasePos <= vHigh - 7 do
86148690
begin
86158691
Result := Result +
@@ -8637,6 +8713,7 @@ class function TVolume.DotProduct(PtrA, PtrB: TNeuralFloatArrPtr; NumElements: i
86378713
AddrA := AddrA + 4;
86388714
AddrB := AddrB + 4;
86398715
end;
8716+
{$ENDIF}
86408717

86418718
if BasePos <= vHigh then for I := BasePos to vHigh do
86428719
//Uncomment for debugging only: WriteLn(PtrA^[I]:8:6,' # ', PtrB^[I]:8:6,' # ', Result:8:6);

0 commit comments

Comments
 (0)