@@ -111,6 +111,8 @@ TVolume = class(TObject)
111111 procedure SetTag (I: integer); { $IFDEF Release} inline; { $ENDIF}
112112 function GetTags (x: integer): integer; { $IFDEF Release} inline; { $ENDIF}
113113 procedure SetTags (x: integer; AValue: integer); { $IFDEF Release} inline; { $ENDIF}
114+ class procedure MulAddPPVS (PtrA, PtrB: TNeuralFloatArrPtr; Value : T;
115+ pSize: integer); { $IFDEF Release} inline; { $ENDIF}
114116 public
115117 // FData was made public to allow other fast operations
116118 FData: array of T;
@@ -163,6 +165,7 @@ TVolume = class(TObject)
163165 procedure VSqrt (); { $IFDEF Release} inline; { $ENDIF}
164166 procedure MulAdd (Value : T; Original: TVolume); overload; { $IFDEF Release} inline; { $ENDIF}
165167 procedure MulMulAdd (Value1, Value2: T; Original: TVolume); overload; { $IFDEF Release} inline; { $ENDIF}
168+ class procedure MulMulAdd (PtrA, PtrB: TNeuralFloatArrPtr; Value1, Value2: T; pSize: integer); overload; { $IFDEF Release} inline; { $ENDIF}
166169 procedure MulAdd (Value : T; PtrB: TNeuralFloatArrPtr); overload; { $IFDEF Release} inline; { $ENDIF}
167170 procedure MulAdd (Original1, Original2: TVolume); overload; { $IFDEF Release} inline; { $ENDIF}
168171 class procedure MulAdd (PtrA, PtrB: TNeuralFloatArrPtr; Value : T; pSize: integer); overload; { $IFDEF Release} inline; { $ENDIF}
@@ -2603,6 +2606,73 @@ procedure TVolume.InterleaveWithDepthFrom(Original: TVolume; NewDepth: integer);
26032606 end ;
26042607 end ;
26052608end ;
2609+ (*
2610+ // this is a new version to be validated.
2611+ var
2612+ NewX: integer;
2613+ I: integer;
2614+ vHigh: integer;
2615+ posX, posD, maxPosX: integer;
2616+ NewDepth2, NewDepth3, NewDepth4, vHighM4: integer;
2617+ SourcePtr, DestPtr: TNeuralFloatPtr;
2618+ begin
2619+ NewX := Original.FSize div NewDepth;
2620+ Resize(NewX,1,NewDepth);
2621+ NewDepth2 := NewDepth + NewDepth;
2622+ NewDepth3 := NewDepth2 + NewDepth;
2623+ NewDepth4 := NewDepth3 + NewDepth;
2624+
2625+ vHigh := High(FData);
2626+ vHighM4 := vHigh - 4;
2627+
2628+ posX := 0;
2629+ posD := 0;
2630+
2631+ maxPosX := NewX * NewDepth;
2632+
2633+ SourcePtr := Addr(Original.FData[0]);
2634+ DestPtr := Addr(FData[posX + posD]);
2635+
2636+ //for I := 0 to vHigh do
2637+ I := 0;
2638+ while I <= vHigh do
2639+ begin
2640+ //posX := I mod NewX;
2641+ //posD := I div NewX;
2642+ //Self.Data[posX, 0, posD] := Original.FData[I];
2643+ while ( (I<vHighM4) and (posX + NewDepth4 < maxPosX) ) do
2644+ begin
2645+ (DestPtr )^ := (SourcePtr)^;
2646+ (DestPtr + NewDepth )^ := (SourcePtr+1)^;
2647+ (DestPtr + NewDepth2)^ := (SourcePtr+2)^;
2648+ (DestPtr + NewDepth3)^ := (SourcePtr+3)^;
2649+ Inc(I, 4);
2650+ Inc(posX, NewDepth4);
2651+ Inc(SourcePtr,4);
2652+ Inc(DestPtr, NewDepth4);
2653+ end;
2654+
2655+ (DestPtr)^ := (SourcePtr)^;
2656+ Inc(SourcePtr, 1);
2657+ Inc(posX, NewDepth);
2658+ Inc(I);
2659+
2660+ if I <= vHigh then
2661+ begin
2662+ if posX >= maxPosX then
2663+ begin
2664+ posX := 0;
2665+ posD := posD + 1;
2666+ DestPtr := Addr(FData[posX + posD]);
2667+ end
2668+ else
2669+ begin
2670+ Inc(DestPtr, NewDepth);
2671+ end;
2672+ end;
2673+ end;
2674+ end;
2675+ *)
26062676
26072677procedure TVolume.InterleaveWithXFrom (Original: TVolume; NewX: integer);
26082678begin
@@ -2754,40 +2824,29 @@ procedure TVolume.MulAdd(Value: T; Original: TVolume);
27542824end ;
27552825
27562826procedure TVolume.MulMulAdd (Value1, Value2: T; Original: TVolume);
2757- var
2758- I: integer;
2759- vHigh: integer;
27602827begin
2761- vHigh := High(FData);
2762- for I := 0 to vHigh do
2763- FData[I] := FData[I]*Value1 + Original.FData[I]*Value2;
2828+ MulMulAdd(Addr(Self.FData[0 ]), Addr(Original.FData[0 ]), Value1, Value2, Self.Size);
27642829end ;
27652830
27662831procedure TVolume.MulAdd (Value : T; PtrB: TNeuralFloatArrPtr);
2767- var
2768- I: integer;
2769- vHigh: integer;
27702832begin
2771- vHigh := High(FData);
2772- for I := 0 to vHigh do
2773- { $IFDEF FPC}
2774- FData[I] += PtrB^[I]*Value ;
2775- { $ELSE}
2776- FData[I] := FData[I] + PtrB^[I]*Value ;
2777- { $ENDIF}
2833+ MulAddPPVS(TNeuralFloatArrPtr(Addr(Self.FData[0 ])), PtrB, Value , Self.Size);
27782834end ;
27792835
27802836procedure TVolume.MulAdd (Original1, Original2: TVolume);
2781- var
2782- I: integer;
2783- vHigh: integer;
27842837begin
2785- vHigh := High(FData);
2786- for I := 0 to vHigh do
2787- FData[I] := FData[I] + Original1.FData[I] * Original2.FData[I];
2838+ { $IFDEF Debug}
2839+ if Original1.Size <> Self.Size then
2840+ raise Exception.Create(' Sizes don'' t match at MulAdd parameter 1: ' +
2841+ IntToStr(Self.Size) + ' and ' + IntToStr(Original1.Size) + ' .' );
2842+ if Original2.Size <> Self.Size then
2843+ raise Exception.Create(' Sizes don'' t match at MulAdd parameter 2: ' +
2844+ IntToStr(Self.Size) + ' and ' + IntToStr(Original2.Size) + ' .' );
2845+ { $ENDIF}
2846+ MulAdd(Addr(Self.FData[0 ]), Addr(Original1.FData[0 ]), Addr(Original2.FData[0 ]), Self.Size);
27882847end ;
27892848
2790- class procedure TVolume.MulAdd (PtrA, PtrB: TNeuralFloatArrPtr; Value : T;
2849+ class procedure TVolume.MulAddPPVS (PtrA, PtrB: TNeuralFloatArrPtr; Value : T;
27912850 pSize: integer);
27922851var
27932852 I: integer;
@@ -2796,10 +2855,11 @@ class procedure TVolume.MulAdd(PtrA, PtrB: TNeuralFloatArrPtr; Value: T;
27962855 AddrA, AddrB: TNeuralFloatPtr;
27972856begin
27982857 BasePos := 0 ;
2799- AddrA := pointer(PtrA);
2800- AddrB := pointer(PtrB);
28012858 vHigh := pSize - 1 ;
28022859
2860+ { $IFDEF FPC}
2861+ AddrA := pointer(PtrA);
2862+ AddrB := pointer(PtrB);
28032863 while BasePos <= vHigh - 7 do
28042864 begin
28052865 (AddrA)^ := (AddrA)^ + (AddrB)^ * Value ;
@@ -2825,6 +2885,7 @@ class procedure TVolume.MulAdd(PtrA, PtrB: TNeuralFloatArrPtr; Value: T;
28252885 AddrA := AddrA + 4 ;
28262886 AddrB := AddrB + 4 ;
28272887 end ;
2888+ { $ENDIF}
28282889
28292890 if BasePos <= vHigh then for I := BasePos to vHigh do
28302891 { $IFDEF FPC}
@@ -2834,6 +2895,56 @@ class procedure TVolume.MulAdd(PtrA, PtrB: TNeuralFloatArrPtr; Value: T;
28342895 { $ENDIF}
28352896end ;
28362897
2898+ class procedure TVolume.MulMulAdd (PtrA, PtrB: TNeuralFloatArrPtr; Value1,
2899+ Value2: T; pSize: integer);
2900+ var
2901+ I: integer;
2902+ vHigh: integer;
2903+ BasePos: integer;
2904+ AddrA, AddrB: TNeuralFloatPtr;
2905+ begin
2906+ BasePos := 0 ;
2907+ vHigh := pSize - 1 ;
2908+ { $IFDEF FPC}
2909+ AddrA := pointer(PtrA);
2910+ AddrB := pointer(PtrB);
2911+ while BasePos <= vHigh - 7 do
2912+ begin
2913+ (AddrA)^ := (AddrA)^ * Value1 + (AddrB)^ * Value2;
2914+ (AddrA+1 )^ := (AddrA+1 )^ * Value1 + (AddrB+1 )^ * Value2;
2915+ (AddrA+2 )^ := (AddrA+2 )^ * Value1 + (AddrB+2 )^ * Value2;
2916+ (AddrA+3 )^ := (AddrA+3 )^ * Value1 + (AddrB+3 )^ * Value2;
2917+ (AddrA+4 )^ := (AddrA+4 )^ * Value1 + (AddrB+4 )^ * Value2;
2918+ (AddrA+5 )^ := (AddrA+5 )^ * Value1 + (AddrB+5 )^ * Value2;
2919+ (AddrA+6 )^ := (AddrA+6 )^ * Value1 + (AddrB+6 )^ * Value2;
2920+ (AddrA+7 )^ := (AddrA+7 )^ * Value1 + (AddrB+7 )^ * Value2;
2921+ BasePos := BasePos + 8 ;
2922+ AddrA := AddrA + 8 ;
2923+ AddrB := AddrB + 8 ;
2924+ end ;
2925+
2926+ while BasePos <= vHigh - 3 do
2927+ begin
2928+ (AddrA)^ := (AddrA)^ * Value1 + (AddrB)^ * Value2;
2929+ (AddrA+1 )^ := (AddrA+1 )^ * Value1 + (AddrB+1 )^ * Value2;
2930+ (AddrA+2 )^ := (AddrA+2 )^ * Value1 + (AddrB+2 )^ * Value2;
2931+ (AddrA+3 )^ := (AddrA+3 )^ * Value1 + (AddrB+3 )^ * Value2;
2932+ BasePos := BasePos + 4 ;
2933+ AddrA := AddrA + 4 ;
2934+ AddrB := AddrB + 4 ;
2935+ end ;
2936+ { $ENDIF}
2937+ if BasePos <= vHigh then for I := BasePos to vHigh do
2938+ PtrA^[I] := PtrA^[I] * Value1 + PtrB^[I] * Value2;
2939+ end ;
2940+
2941+
2942+ class procedure TVolume.MulAdd (PtrA, PtrB: TNeuralFloatArrPtr; Value : T;
2943+ pSize: integer);
2944+ begin
2945+ Self.MulAddPPVS(PtrA, PtrB, Value , pSize);
2946+ end ;
2947+
28372948class procedure TVolume.MulAdd (PtrA, PtrB, PtrC: TNeuralFloatArrPtr;
28382949 pSize: integer);
28392950var
@@ -2847,7 +2958,7 @@ class procedure TVolume.MulAdd(PtrA, PtrB, PtrC: TNeuralFloatArrPtr;
28472958 AddrB := pointer(PtrB);
28482959 AddrC := pointer(PtrC);
28492960 vHigh := pSize - 1 ;
2850-
2961+ { $IFDEF FPC }
28512962 while BasePos <= vHigh - 7 do
28522963 begin
28532964 (AddrA)^ := (AddrA)^ + (AddrB)^ * (AddrC)^;
@@ -2875,7 +2986,7 @@ class procedure TVolume.MulAdd(PtrA, PtrB, PtrC: TNeuralFloatArrPtr;
28752986 AddrB := AddrB + 4 ;
28762987 AddrC := AddrC + 4 ;
28772988 end ;
2878-
2989+ { $ENDIF }
28792990 if BasePos <= vHigh then for I := BasePos to vHigh do
28802991 { $IFDEF FPC}
28812992 PtrA^[I] += PtrB^[I]*PtrC^[I];
@@ -3392,46 +3503,13 @@ procedure TVolume.CopyResizing(Original: TVolume; NewSizeX, NewSizeY: integer);
33923503end ;
33933504
33943505function TVolume.DotProduct (Original: TVolume): T;
3395- var
3396- I: integer;
3397- vHigh: integer;
3398- BasePos: integer;
33993506begin
34003507 { $IFDEF Debug}
34013508 if Original.Size <> Self.Size then
34023509 raise Exception.Create(' Sizes don'' t match at DotProduct: ' +
34033510 IntToStr(Self.Size) + ' and ' + IntToStr(Original.Size) + ' .' );
34043511 { $ENDIF}
3405- Result := 0 ;
3406- vHigh := High(FData);
3407- BasePos := 0 ;
3408-
3409- while BasePos <= vHigh - 7 do
3410- begin
3411- Result := Result +
3412- FData[BasePos] * Original.FData[BasePos] +
3413- FData[BasePos+1 ] * Original.FData[BasePos+1 ] +
3414- FData[BasePos+2 ] * Original.FData[BasePos+2 ] +
3415- FData[BasePos+3 ] * Original.FData[BasePos+3 ] +
3416- FData[BasePos+4 ] * Original.FData[BasePos+4 ] +
3417- FData[BasePos+5 ] * Original.FData[BasePos+5 ] +
3418- FData[BasePos+6 ] * Original.FData[BasePos+6 ] +
3419- FData[BasePos+7 ] * Original.FData[BasePos+7 ];
3420- BasePos := BasePos + 8 ;
3421- end ;
3422-
3423- while BasePos <= vHigh - 3 do
3424- begin
3425- Result := Result +
3426- FData[BasePos] * Original.FData[BasePos] +
3427- FData[BasePos+1 ] * Original.FData[BasePos+1 ] +
3428- FData[BasePos+2 ] * Original.FData[BasePos+2 ] +
3429- FData[BasePos+3 ] * Original.FData[BasePos+3 ];
3430- BasePos := BasePos + 4 ;
3431- end ;
3432-
3433- if BasePos <= vHigh then for I := BasePos to vHigh do
3434- Result := Result + FData[I] * Original.FData[I];
3512+ Result := Self.DotProduct(Addr(Self.FData[0 ]), Addr(Original.FData[0 ]), Self.Size);
34353513end ;
34363514
34373515function TVolume.SumDiff (Original: TVolume): T;
@@ -3460,11 +3538,9 @@ procedure TVolume.DebugDiff(Original: TVolume; Limit: Single);
34603538 vHigh: integer;
34613539 AuxDiff: Single;
34623540begin
3463- { $IFDEF Debug}
34643541 if Original.Size <> Self.Size then
3465- raise Exception.Create(' Sizes don'' t match at SumDiff : ' +
3542+ raise Exception.Create(' Sizes don'' t match at DebugDiff : ' +
34663543 IntToStr(Self.Size) + ' and ' + IntToStr(Original.Size) + ' .' );
3467- { $ENDIF}
34683544 vHigh := High(FData);
34693545 for I := 0 to vHigh do
34703546 begin
@@ -8607,9 +8683,9 @@ class function TVolume.DotProduct(PtrA, PtrB: TNeuralFloatArrPtr; NumElements: i
86078683 Result := 0 ;
86088684 BasePos := 0 ;
86098685 vHigh := NumElements - 1 ;
8686+ { $IFDEF FPC}
86108687 AddrA := pointer(PtrA);
86118688 AddrB := pointer(PtrB);
8612-
86138689 while BasePos <= vHigh - 7 do
86148690 begin
86158691 Result := Result +
@@ -8637,6 +8713,7 @@ class function TVolume.DotProduct(PtrA, PtrB: TNeuralFloatArrPtr; NumElements: i
86378713 AddrA := AddrA + 4 ;
86388714 AddrB := AddrB + 4 ;
86398715 end ;
8716+ { $ENDIF}
86408717
86418718 if BasePos <= vHigh then for I := BasePos to vHigh do
86428719 // Uncomment for debugging only: WriteLn(PtrA^[I]:8:6,' # ', PtrB^[I]:8:6,' # ', Result:8:6);
0 commit comments