1
+ % harmonic-percussive-residual iterative
2
+ function [h , p , r ] = HPRI_SS(x , fs )
3
+ % HPSS first pass with hop 4096
4
+ hopLen = 4096 ;
5
+ winLen = hopLen * 2 ;
6
+ fftLen = winLen * 2 ;
7
+ win = sqrt(hann(winLen ," periodic" ));
8
+
9
+ % STFT of original signal
10
+ S = stft(x , " Window" , win , " OverlapLength" , hopLen , " FFTLength" , fftLen , " Centered" , true );
11
+ halfIdx = 1 : ceil(size(S ,1 )/2 ); % only half the STFT matters
12
+ Shalf = S(halfIdx , : );
13
+ Smag = abs(Shalf ); % use the magnitude STFT for creating masks
14
+
15
+ % median filters
16
+ lHarm = 0.2 /((fftLen - hopLen )/fs ); % 200ms in samples
17
+ H = movmedian(Smag , lHarm , 2 );
18
+ lPerc = 500 /(fs / fftLen ); % 500Hz in samples
19
+ P = movmedian(Smag , lPerc , 1 );
20
+
21
+ % binary masks with separation factor, Driedger et al. 2014
22
+ beta = 2 ;
23
+ Mh = (H ./(P + eps )) > beta ;
24
+ Mp = (P ./(H + eps )) >= beta ;
25
+ Mr = 1 - (H + P );
26
+
27
+ % recover the complex STFT H and P from S using the masks
28
+ H = Mh .* Shalf ;
29
+ P = Mp .* Shalf ;
30
+ R = Mr .* Shalf ;
31
+
32
+ % we previously dropped the redundant second half of the fft - recreate it
33
+ H = cat(1 , H , flipud(conj(H )));
34
+ P = cat(1 , P , flipud(conj(P )));
35
+ R = cat(1 , R , flipud(conj(R )));
36
+
37
+ % finally istft to convert back to audio
38
+ % final harmonic xh1
39
+ h = istft(H , " Window" , win , " OverlapLength" , hopLen , " FFTLength" , fftLen , " ConjugateSymmetric" , true );
40
+ % intermediate xp1, xr1
41
+ p_ = istft(P , " Window" , win , " OverlapLength" , hopLen , " FFTLength" , fftLen , " ConjugateSymmetric" , true );
42
+ r_ = istft(R , " Window" , win , " OverlapLength" , hopLen , " FFTLength" , fftLen , " ConjugateSymmetric" , true );
43
+
44
+ % now we repeat HPSS on xr1+xp1
45
+ x2 = p_ + r_ ;
46
+
47
+ hopLen = 256 ;
48
+ winLen = hopLen * 2 ;
49
+ fftLen = winLen * 2 ;
50
+ win = sqrt(hann(winLen ," periodic" ));
51
+
52
+ % STFT of original signal
53
+ S = stft(x2 , " Window" , win , " OverlapLength" , hopLen , " FFTLength" , fftLen , " Centered" , true );
54
+ halfIdx = 1 : ceil(size(S ,1 )/2 ); % only half the STFT matters
55
+ Shalf = S(halfIdx , : );
56
+ Smag = abs(Shalf ); % use the magnitude STFT for creating masks
57
+
58
+ % median filters
59
+ lHarm = 0.2 /((fftLen - hopLen )/fs ); % 200ms in samples
60
+ H = movmedian(Smag , lHarm , 2 );
61
+ lPerc = 500 /(fs / fftLen ); % 500Hz in samples
62
+ P = movmedian(Smag , lPerc , 1 );
63
+
64
+ % binary masks with separation factor, Driedger et al. 2014
65
+ beta = 2 ;
66
+ Mp = (P ./(H + eps )) >= beta ;
67
+ Mr = 1 - (H + P );
68
+
69
+ % recover the complex STFT H and P from S using the masks
70
+ P = Mp .* Shalf ;
71
+ R = Mr .* Shalf ;
72
+
73
+ % we previously dropped the redundant second half of the fft - recreate it
74
+ P = cat(1 , P , flipud(conj(P )));
75
+ R = cat(1 , R , flipud(conj(R )));
76
+
77
+ % finally istft to convert back to audio
78
+ % h_ = istft(H, "Window", win, "OverlapLength", hopLen, "FFTLength", fftLen, "ConjugateSymmetric", true);
79
+ p = istft(P , " Window" , win , " OverlapLength" , hopLen , " FFTLength" , fftLen , " ConjugateSymmetric" , true );
80
+ r = istft(R , " Window" , win , " OverlapLength" , hopLen , " FFTLength" , fftLen , " ConjugateSymmetric" , true );
81
+ end
0 commit comments