//prototype of SourceSeparation on the SuperCollider server
//Source Separation UGen deploys:
//Daniel D. Lee H. Sebastian Seung (2001) Algorithms for Non-negative Matrix Factorization
//Advances in Neural Information Processing Systems 13: Proceedings of the 2000 Conference. MIT Press. pp. 556–562.
//see also http://en.wikipedia.org/wiki/Non-negative_matrix_factorization
//PV_SourceSeparationMask is an implementation of spectral masking from the NMF data, following, e.g.
//B Wang, MD Plumbley (2005) Musical audio stream separation by non-negative matrix factorization. Proc. DMRN summer conf, 2005
//spectrum V = W*H as matrix multiplication of non-negative matrices; V is power spectrum, columns of W source spectral templates, rows of H mixing coefficients over time for a given source
//using FFT size 2048, so hop 1024, about 43 a second. Should support other FFT sizes, not extensively tested yet
(
n=1025; //half fftsize + 1 ; number useful bins for FFT of real signal
m=120; //120, about three seconds of sound //430 frames , about ten seconds of sound will be capturing in training
r=4; //4 sources; push up to 10 to get more separation, but beware, perfect musical source extraction is difficult!
w =Buffer.alloc(s,n*r,1);
h= Buffer.alloc(s,r*m,1);
//initialisation not needed, if trigger analysis, resets W and H buffers inside UGen itself
//~warrayprep = 1.0!(n*r); //Array.fill(n*r,{1.0});
//
////w = Buffer.alloc(s,n*r,1)
//w=Buffer.loadCollection(s,~warrayprep);
//
//~harrayprep = {gauss(1.0,0.1)}!(r*m);
//
//h=Buffer.loadCollection(s,~harrayprep);
)
//when triggered, will collect samples for 430 frames, then run the source separation in a background thread. W and H matrices will then include the necessary data for source separation via spectral masking
(
a = {
var source = SoundIn.ar;
var fft = FFT(LocalBuf(2048),source);
SourceSeparation(fft,r,m,1,w,h);
}.play
)
//once calculated, you can free it (wait for the NRT process to complete)
a.free; //or you could actualy retrigger it as a live UGen for multiple source separations over time
//there can be large scale factors in some cases; W*H may be reasonable but W could be big... normalisation may be required
w.plot(maxval:10) //numsources fft masks
h.plot(maxval:10) //mixing matrix, amplitude of each source for each frame
(
b = {
var source = SoundIn.ar;
var fft = FFT(LocalBuf(2048),source);
fft = PV_SourceSeparationMask(fft,r,MouseX.kr(0,r-0.0001),0,w,h);
IFFT(fft);
}.play
)
b.free;
//working on a sound file
~soundfile = Buffer.read(s,"sounds/a11wlk01.wav");
//"/data/sussex/talks/ismir2012/quiz/extract1.wav"
~soundfile = Buffer.read(s,"/data/audio/bigaudio/toanalyse/gospastic4bar.wav");
~soundfile.numChannels //1 expected below, must Mix PlayBuf otherwise before passing to SourceSeparation UGen, mono only
//when triggered, will collect samples for 430 frames, then run the source separation in a background thread. W and H matrices will then include the necessary data for source separation via spectral masking
(
a ={
var source = PlayBuf.ar(1,~soundfile); //will go to silence once played through
var fft = FFT(LocalBuf(2048),source);
SourceSeparation(fft,r,m,1,w,h);
}.play
)
//once NRT NMF calculation done
a.free;
//use H to control playback over time; must stop and restart b to hear from beginning again, could try and set up a loop/retriggering via arguments
(
b={
var source = PlayBuf.ar(1,~soundfile,doneAction:2);
var fft = FFT(LocalBuf(2048),source);
//mouse chooses source to listen to
fft = PV_SourceSeparationMask(fft,r,MouseX.kr(0,r-0.0001),1,w,h);
IFFT(fft);
}.play
)
b.free