@@ -111,6 +111,8 @@ TVolume = class(TObject)
111
111
procedure SetTag (I: integer); { $IFDEF Release} inline; { $ENDIF}
112
112
function GetTags (x: integer): integer; { $IFDEF Release} inline; { $ENDIF}
113
113
procedure SetTags (x: integer; AValue: integer); { $IFDEF Release} inline; { $ENDIF}
114
+ class procedure MulAddPPVS (PtrA, PtrB: TNeuralFloatArrPtr; Value : T;
115
+ pSize: integer); { $IFDEF Release} inline; { $ENDIF}
114
116
public
115
117
// FData was made public to allow other fast operations
116
118
FData: array of T;
@@ -163,6 +165,7 @@ TVolume = class(TObject)
163
165
procedure VSqrt (); { $IFDEF Release} inline; { $ENDIF}
164
166
procedure MulAdd (Value : T; Original: TVolume); overload; { $IFDEF Release} inline; { $ENDIF}
165
167
procedure MulMulAdd (Value1, Value2: T; Original: TVolume); overload; { $IFDEF Release} inline; { $ENDIF}
168
+ class procedure MulMulAdd (PtrA, PtrB: TNeuralFloatArrPtr; Value1, Value2: T; pSize: integer); overload; { $IFDEF Release} inline; { $ENDIF}
166
169
procedure MulAdd (Value : T; PtrB: TNeuralFloatArrPtr); overload; { $IFDEF Release} inline; { $ENDIF}
167
170
procedure MulAdd (Original1, Original2: TVolume); overload; { $IFDEF Release} inline; { $ENDIF}
168
171
class procedure MulAdd (PtrA, PtrB: TNeuralFloatArrPtr; Value : T; pSize: integer); overload; { $IFDEF Release} inline; { $ENDIF}
@@ -2603,6 +2606,73 @@ procedure TVolume.InterleaveWithDepthFrom(Original: TVolume; NewDepth: integer);
2603
2606
end ;
2604
2607
end ;
2605
2608
end ;
2609
+ (*
2610
+ // this is a new version to be validated.
2611
+ var
2612
+ NewX: integer;
2613
+ I: integer;
2614
+ vHigh: integer;
2615
+ posX, posD, maxPosX: integer;
2616
+ NewDepth2, NewDepth3, NewDepth4, vHighM4: integer;
2617
+ SourcePtr, DestPtr: TNeuralFloatPtr;
2618
+ begin
2619
+ NewX := Original.FSize div NewDepth;
2620
+ Resize(NewX,1,NewDepth);
2621
+ NewDepth2 := NewDepth + NewDepth;
2622
+ NewDepth3 := NewDepth2 + NewDepth;
2623
+ NewDepth4 := NewDepth3 + NewDepth;
2624
+
2625
+ vHigh := High(FData);
2626
+ vHighM4 := vHigh - 4;
2627
+
2628
+ posX := 0;
2629
+ posD := 0;
2630
+
2631
+ maxPosX := NewX * NewDepth;
2632
+
2633
+ SourcePtr := Addr(Original.FData[0]);
2634
+ DestPtr := Addr(FData[posX + posD]);
2635
+
2636
+ //for I := 0 to vHigh do
2637
+ I := 0;
2638
+ while I <= vHigh do
2639
+ begin
2640
+ //posX := I mod NewX;
2641
+ //posD := I div NewX;
2642
+ //Self.Data[posX, 0, posD] := Original.FData[I];
2643
+ while ( (I<vHighM4) and (posX + NewDepth4 < maxPosX) ) do
2644
+ begin
2645
+ (DestPtr )^ := (SourcePtr)^;
2646
+ (DestPtr + NewDepth )^ := (SourcePtr+1)^;
2647
+ (DestPtr + NewDepth2)^ := (SourcePtr+2)^;
2648
+ (DestPtr + NewDepth3)^ := (SourcePtr+3)^;
2649
+ Inc(I, 4);
2650
+ Inc(posX, NewDepth4);
2651
+ Inc(SourcePtr,4);
2652
+ Inc(DestPtr, NewDepth4);
2653
+ end;
2654
+
2655
+ (DestPtr)^ := (SourcePtr)^;
2656
+ Inc(SourcePtr, 1);
2657
+ Inc(posX, NewDepth);
2658
+ Inc(I);
2659
+
2660
+ if I <= vHigh then
2661
+ begin
2662
+ if posX >= maxPosX then
2663
+ begin
2664
+ posX := 0;
2665
+ posD := posD + 1;
2666
+ DestPtr := Addr(FData[posX + posD]);
2667
+ end
2668
+ else
2669
+ begin
2670
+ Inc(DestPtr, NewDepth);
2671
+ end;
2672
+ end;
2673
+ end;
2674
+ end;
2675
+ *)
2606
2676
2607
2677
procedure TVolume.InterleaveWithXFrom (Original: TVolume; NewX: integer);
2608
2678
begin
@@ -2754,40 +2824,29 @@ procedure TVolume.MulAdd(Value: T; Original: TVolume);
2754
2824
end ;
2755
2825
2756
2826
procedure TVolume.MulMulAdd (Value1, Value2: T; Original: TVolume);
2757
- var
2758
- I: integer;
2759
- vHigh: integer;
2760
2827
begin
2761
- vHigh := High(FData);
2762
- for I := 0 to vHigh do
2763
- FData[I] := FData[I]*Value1 + Original.FData[I]*Value2;
2828
+ MulMulAdd(Addr(Self.FData[0 ]), Addr(Original.FData[0 ]), Value1, Value2, Self.Size);
2764
2829
end ;
2765
2830
2766
2831
procedure TVolume.MulAdd (Value : T; PtrB: TNeuralFloatArrPtr);
2767
- var
2768
- I: integer;
2769
- vHigh: integer;
2770
2832
begin
2771
- vHigh := High(FData);
2772
- for I := 0 to vHigh do
2773
- { $IFDEF FPC}
2774
- FData[I] += PtrB^[I]*Value ;
2775
- { $ELSE}
2776
- FData[I] := FData[I] + PtrB^[I]*Value ;
2777
- { $ENDIF}
2833
+ MulAddPPVS(TNeuralFloatArrPtr(Addr(Self.FData[0 ])), PtrB, Value , Self.Size);
2778
2834
end ;
2779
2835
2780
2836
procedure TVolume.MulAdd (Original1, Original2: TVolume);
2781
- var
2782
- I: integer;
2783
- vHigh: integer;
2784
2837
begin
2785
- vHigh := High(FData);
2786
- for I := 0 to vHigh do
2787
- FData[I] := FData[I] + Original1.FData[I] * Original2.FData[I];
2838
+ { $IFDEF Debug}
2839
+ if Original1.Size <> Self.Size then
2840
+ raise Exception.Create(' Sizes don'' t match at MulAdd parameter 1: ' +
2841
+ IntToStr(Self.Size) + ' and ' + IntToStr(Original1.Size) + ' .' );
2842
+ if Original2.Size <> Self.Size then
2843
+ raise Exception.Create(' Sizes don'' t match at MulAdd parameter 2: ' +
2844
+ IntToStr(Self.Size) + ' and ' + IntToStr(Original2.Size) + ' .' );
2845
+ { $ENDIF}
2846
+ MulAdd(Addr(Self.FData[0 ]), Addr(Original1.FData[0 ]), Addr(Original2.FData[0 ]), Self.Size);
2788
2847
end ;
2789
2848
2790
- class procedure TVolume.MulAdd (PtrA, PtrB: TNeuralFloatArrPtr; Value : T;
2849
+ class procedure TVolume.MulAddPPVS (PtrA, PtrB: TNeuralFloatArrPtr; Value : T;
2791
2850
pSize: integer);
2792
2851
var
2793
2852
I: integer;
@@ -2796,10 +2855,11 @@ class procedure TVolume.MulAdd(PtrA, PtrB: TNeuralFloatArrPtr; Value: T;
2796
2855
AddrA, AddrB: TNeuralFloatPtr;
2797
2856
begin
2798
2857
BasePos := 0 ;
2799
- AddrA := pointer(PtrA);
2800
- AddrB := pointer(PtrB);
2801
2858
vHigh := pSize - 1 ;
2802
2859
2860
+ { $IFDEF FPC}
2861
+ AddrA := pointer(PtrA);
2862
+ AddrB := pointer(PtrB);
2803
2863
while BasePos <= vHigh - 7 do
2804
2864
begin
2805
2865
(AddrA)^ := (AddrA)^ + (AddrB)^ * Value ;
@@ -2825,6 +2885,7 @@ class procedure TVolume.MulAdd(PtrA, PtrB: TNeuralFloatArrPtr; Value: T;
2825
2885
AddrA := AddrA + 4 ;
2826
2886
AddrB := AddrB + 4 ;
2827
2887
end ;
2888
+ { $ENDIF}
2828
2889
2829
2890
if BasePos <= vHigh then for I := BasePos to vHigh do
2830
2891
{ $IFDEF FPC}
@@ -2834,6 +2895,56 @@ class procedure TVolume.MulAdd(PtrA, PtrB: TNeuralFloatArrPtr; Value: T;
2834
2895
{ $ENDIF}
2835
2896
end ;
2836
2897
2898
+ class procedure TVolume.MulMulAdd (PtrA, PtrB: TNeuralFloatArrPtr; Value1,
2899
+ Value2: T; pSize: integer);
2900
+ var
2901
+ I: integer;
2902
+ vHigh: integer;
2903
+ BasePos: integer;
2904
+ AddrA, AddrB: TNeuralFloatPtr;
2905
+ begin
2906
+ BasePos := 0 ;
2907
+ vHigh := pSize - 1 ;
2908
+ { $IFDEF FPC}
2909
+ AddrA := pointer(PtrA);
2910
+ AddrB := pointer(PtrB);
2911
+ while BasePos <= vHigh - 7 do
2912
+ begin
2913
+ (AddrA)^ := (AddrA)^ * Value1 + (AddrB)^ * Value2;
2914
+ (AddrA+1 )^ := (AddrA+1 )^ * Value1 + (AddrB+1 )^ * Value2;
2915
+ (AddrA+2 )^ := (AddrA+2 )^ * Value1 + (AddrB+2 )^ * Value2;
2916
+ (AddrA+3 )^ := (AddrA+3 )^ * Value1 + (AddrB+3 )^ * Value2;
2917
+ (AddrA+4 )^ := (AddrA+4 )^ * Value1 + (AddrB+4 )^ * Value2;
2918
+ (AddrA+5 )^ := (AddrA+5 )^ * Value1 + (AddrB+5 )^ * Value2;
2919
+ (AddrA+6 )^ := (AddrA+6 )^ * Value1 + (AddrB+6 )^ * Value2;
2920
+ (AddrA+7 )^ := (AddrA+7 )^ * Value1 + (AddrB+7 )^ * Value2;
2921
+ BasePos := BasePos + 8 ;
2922
+ AddrA := AddrA + 8 ;
2923
+ AddrB := AddrB + 8 ;
2924
+ end ;
2925
+
2926
+ while BasePos <= vHigh - 3 do
2927
+ begin
2928
+ (AddrA)^ := (AddrA)^ * Value1 + (AddrB)^ * Value2;
2929
+ (AddrA+1 )^ := (AddrA+1 )^ * Value1 + (AddrB+1 )^ * Value2;
2930
+ (AddrA+2 )^ := (AddrA+2 )^ * Value1 + (AddrB+2 )^ * Value2;
2931
+ (AddrA+3 )^ := (AddrA+3 )^ * Value1 + (AddrB+3 )^ * Value2;
2932
+ BasePos := BasePos + 4 ;
2933
+ AddrA := AddrA + 4 ;
2934
+ AddrB := AddrB + 4 ;
2935
+ end ;
2936
+ { $ENDIF}
2937
+ if BasePos <= vHigh then for I := BasePos to vHigh do
2938
+ PtrA^[I] := PtrA^[I] * Value1 + PtrB^[I] * Value2;
2939
+ end ;
2940
+
2941
+
2942
+ class procedure TVolume.MulAdd (PtrA, PtrB: TNeuralFloatArrPtr; Value : T;
2943
+ pSize: integer);
2944
+ begin
2945
+ Self.MulAddPPVS(PtrA, PtrB, Value , pSize);
2946
+ end ;
2947
+
2837
2948
class procedure TVolume.MulAdd (PtrA, PtrB, PtrC: TNeuralFloatArrPtr;
2838
2949
pSize: integer);
2839
2950
var
@@ -2847,7 +2958,7 @@ class procedure TVolume.MulAdd(PtrA, PtrB, PtrC: TNeuralFloatArrPtr;
2847
2958
AddrB := pointer(PtrB);
2848
2959
AddrC := pointer(PtrC);
2849
2960
vHigh := pSize - 1 ;
2850
-
2961
+ { $IFDEF FPC }
2851
2962
while BasePos <= vHigh - 7 do
2852
2963
begin
2853
2964
(AddrA)^ := (AddrA)^ + (AddrB)^ * (AddrC)^;
@@ -2875,7 +2986,7 @@ class procedure TVolume.MulAdd(PtrA, PtrB, PtrC: TNeuralFloatArrPtr;
2875
2986
AddrB := AddrB + 4 ;
2876
2987
AddrC := AddrC + 4 ;
2877
2988
end ;
2878
-
2989
+ { $ENDIF }
2879
2990
if BasePos <= vHigh then for I := BasePos to vHigh do
2880
2991
{ $IFDEF FPC}
2881
2992
PtrA^[I] += PtrB^[I]*PtrC^[I];
@@ -3392,46 +3503,13 @@ procedure TVolume.CopyResizing(Original: TVolume; NewSizeX, NewSizeY: integer);
3392
3503
end ;
3393
3504
3394
3505
function TVolume.DotProduct (Original: TVolume): T;
3395
- var
3396
- I: integer;
3397
- vHigh: integer;
3398
- BasePos: integer;
3399
3506
begin
3400
3507
{ $IFDEF Debug}
3401
3508
if Original.Size <> Self.Size then
3402
3509
raise Exception.Create(' Sizes don'' t match at DotProduct: ' +
3403
3510
IntToStr(Self.Size) + ' and ' + IntToStr(Original.Size) + ' .' );
3404
3511
{ $ENDIF}
3405
- Result := 0 ;
3406
- vHigh := High(FData);
3407
- BasePos := 0 ;
3408
-
3409
- while BasePos <= vHigh - 7 do
3410
- begin
3411
- Result := Result +
3412
- FData[BasePos] * Original.FData[BasePos] +
3413
- FData[BasePos+1 ] * Original.FData[BasePos+1 ] +
3414
- FData[BasePos+2 ] * Original.FData[BasePos+2 ] +
3415
- FData[BasePos+3 ] * Original.FData[BasePos+3 ] +
3416
- FData[BasePos+4 ] * Original.FData[BasePos+4 ] +
3417
- FData[BasePos+5 ] * Original.FData[BasePos+5 ] +
3418
- FData[BasePos+6 ] * Original.FData[BasePos+6 ] +
3419
- FData[BasePos+7 ] * Original.FData[BasePos+7 ];
3420
- BasePos := BasePos + 8 ;
3421
- end ;
3422
-
3423
- while BasePos <= vHigh - 3 do
3424
- begin
3425
- Result := Result +
3426
- FData[BasePos] * Original.FData[BasePos] +
3427
- FData[BasePos+1 ] * Original.FData[BasePos+1 ] +
3428
- FData[BasePos+2 ] * Original.FData[BasePos+2 ] +
3429
- FData[BasePos+3 ] * Original.FData[BasePos+3 ];
3430
- BasePos := BasePos + 4 ;
3431
- end ;
3432
-
3433
- if BasePos <= vHigh then for I := BasePos to vHigh do
3434
- Result := Result + FData[I] * Original.FData[I];
3512
+ Result := Self.DotProduct(Addr(Self.FData[0 ]), Addr(Original.FData[0 ]), Self.Size);
3435
3513
end ;
3436
3514
3437
3515
function TVolume.SumDiff (Original: TVolume): T;
@@ -3460,11 +3538,9 @@ procedure TVolume.DebugDiff(Original: TVolume; Limit: Single);
3460
3538
vHigh: integer;
3461
3539
AuxDiff: Single;
3462
3540
begin
3463
- { $IFDEF Debug}
3464
3541
if Original.Size <> Self.Size then
3465
- raise Exception.Create(' Sizes don'' t match at SumDiff : ' +
3542
+ raise Exception.Create(' Sizes don'' t match at DebugDiff : ' +
3466
3543
IntToStr(Self.Size) + ' and ' + IntToStr(Original.Size) + ' .' );
3467
- { $ENDIF}
3468
3544
vHigh := High(FData);
3469
3545
for I := 0 to vHigh do
3470
3546
begin
@@ -8607,9 +8683,9 @@ class function TVolume.DotProduct(PtrA, PtrB: TNeuralFloatArrPtr; NumElements: i
8607
8683
Result := 0 ;
8608
8684
BasePos := 0 ;
8609
8685
vHigh := NumElements - 1 ;
8686
+ { $IFDEF FPC}
8610
8687
AddrA := pointer(PtrA);
8611
8688
AddrB := pointer(PtrB);
8612
-
8613
8689
while BasePos <= vHigh - 7 do
8614
8690
begin
8615
8691
Result := Result +
@@ -8637,6 +8713,7 @@ class function TVolume.DotProduct(PtrA, PtrB: TNeuralFloatArrPtr; NumElements: i
8637
8713
AddrA := AddrA + 4 ;
8638
8714
AddrB := AddrB + 4 ;
8639
8715
end ;
8716
+ { $ENDIF}
8640
8717
8641
8718
if BasePos <= vHigh then for I := BasePos to vHigh do
8642
8719
// Uncomment for debugging only: WriteLn(PtrA^[I]:8:6,' # ', PtrB^[I]:8:6,' # ', Result:8:6);
0 commit comments