Import fmit upstream version 0.97.6
[fmit.git] / libs / Music / FreqAnalysis.cpp
1 // Copyright 2004-07 "Gilles Degottex"
2
3 // This file is part of "Music"
4
5 // "Music" is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU Lesser General Public License as published by
7 // the Free Software Foundation; either version 2.1 of the License, or
8 // (at your option) any later version.
9 //
10 // "Music" is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
19
20 #include "FreqAnalysis.h"
21
22 #include <assert.h>
23 #include <iostream>
24 using namespace std;
25 using namespace Math;
26 #include "Music.h"
27 #include <CppAddons/Fit.h>
28
29 namespace Music
30 {
31         double PeakRefinementLogParabola(const vector<std::complex<double> > spectrum, int peak_index)
32         {
33                 assert(peak_index>0 && peak_index<int(spectrum.size()-1));
34
35                 if(!(is_peak(spectrum, peak_index)))
36                 {
37                         if(peak_index+1<int(spectrum.size()-1) && is_peak(spectrum, peak_index+1))
38                                 peak_index++;
39                         else if(peak_index-1>0 && is_peak(spectrum, peak_index-1))
40                                 peak_index--;
41                 }
42
43                 double a,b,c,xapex,yapex;
44                 FitParabola(peak_index-1, log(mod(spectrum[peak_index-1])+numeric_limits<double>::min()),
45                                         peak_index, log(mod(spectrum[peak_index])+numeric_limits<double>::min()),
46                                         peak_index+1, log(mod(spectrum[peak_index+1])+numeric_limits<double>::min()),
47                                         a, b, c, xapex, yapex);
48
49                 return xapex;
50         }
51
52         /*!
53          * M. Abe and J. O. Smith III
54          * “Design Criteria for Simple Sinusoidal Parameter Estimation based on Quadratic
55          * Interpolation of FFT Magnitude Peaks AM/FM Rate Estimation and Bias Correction
56          * for Time-Varying Sinusoidal Modeling,”
57          * Convention Paper Audio Engineering Society,
58          * Dept. of Music, Stanford University, August, (2004).
59          */
60         double PeakRefinementLogParabolaUnbiased(const vector<std::complex<double> > spectrum, int peak_index, double zp)
61         {
62                 double f = PeakRefinementLogParabola(spectrum, peak_index);
63
64                 double df = f - peak_index;
65
66                 double c0 = 0.247560;           // hann coefs
67                 double c1 = 0.084372;
68
69                 double xi_zp = c0/(zp*zp) + c1/(zp*zp*zp*zp);
70                 double df2 = xi_zp*(df-0.5)*(df+0.5)*df;
71
72 //              double c2 = −0.090608;
73 //              double c3 = −0.055781;
74 //              double nu_zp = c2/(zp*zp*zp*zp) + c3/(zp*zp*zp*zp*zp*zp);
75 //              double da = nu_zp*df*df;
76
77                 return f+df2;
78         }
79
80         vector<Harmonic> GetHarmonicStruct(const vector<complex<double> >& spectrum, double approx_f0, int nb_harm, double used_zp, double offset_tresh)
81         {
82                 double approx_f0_rel = approx_f0*spectrum.size()/double(GetSamplingRate());
83                 assert(approx_f0_rel>1 && approx_f0_rel<=spectrum.size()/2-1);
84
85                 vector<Harmonic> harms;
86
87                 for(int h=1; h<=nb_harm && int(h*approx_f0_rel)<int(spectrum.size()/2)-1; h++)
88                 {
89                         int c = int(h*approx_f0_rel);
90                         int llimit = int((h-offset_tresh)*approx_f0_rel);
91                         llimit = max(llimit, 1);
92                         int rlimit = int((h+offset_tresh)*approx_f0_rel);
93                         rlimit = min(rlimit, int(spectrum.size()/2)-1);
94                         int peak_index = -1;
95
96                         if(is_peak(spectrum, c))
97                                 peak_index = c;
98                         else
99                         {
100                                 int cl = c;
101                                 bool cl_is_peak = false;
102                                 while(!cl_is_peak && cl-1>llimit)
103                                         cl_is_peak = is_peak(spectrum, --cl);
104
105                                 int cr = c;
106                                 bool cr_is_peak = false;
107                                 while(!cr_is_peak && cr+1<rlimit)
108                                         cr_is_peak = is_peak(spectrum, ++cr);
109
110                                 if(cl_is_peak)
111                                 {
112                                         if(cr_is_peak)
113                                                 peak_index = (c-cl<cr-c)?cl:cr;
114                                         else
115                                                 peak_index = cl;
116                                 }
117                                 else if(cr_is_peak)
118                                         peak_index = cr;
119                         }
120
121                         if(peak_index>0)
122                         {
123                                 Harmonic harm;
124                                 harm.mod = mod(spectrum[peak_index]);
125                                 harm.freq = PeakRefinementLogParabolaUnbiased(spectrum, peak_index, used_zp)*double(GetSamplingRate())/spectrum.size();
126                                 harm.harm_number = h;
127                                 harms.push_back(harm);
128                         }
129                 }
130
131                 return harms;
132         }
133
134         double FundFreqRefinementOfHarmonicStruct(const vector<complex<double> >& spectrum, double approx_f0, int nb_harm, double used_zp)
135         {
136                 double approx_f0_rel = approx_f0*spectrum.size()/double(GetSamplingRate());
137                 assert(approx_f0_rel>1 && approx_f0_rel<=spectrum.size()/2-1);
138
139                 vector<Harmonic> harms = GetHarmonicStruct(spectrum, approx_f0, nb_harm, used_zp, 0.2);
140
141                 if(harms.empty())
142                         return 0.0;
143
144                 double sum_f0 = 0.0;
145
146                 for(size_t i=0; i<harms.size(); i++)
147                         sum_f0 += harms[i].freq/harms[i].harm_number;                           // TODO mod weigthed ???
148
149                 return sum_f0/harms.size();
150         }
151
152 #if 0
153         void SingleResConvolutionTransform::init()
154         {
155                 if(GetSamplingRate()<=0)        return;
156
157                 for(size_t h=0; h<size(); h++)
158                         if(m_convolutions[h]!=NULL)
159                                 delete m_convolutions[h];
160
161                 m_components.resize(GetNbSemitones());
162                 m_convolutions.resize(GetNbSemitones());
163                 m_harmonics.resize(GetNbSemitones());
164                 for(size_t h=0; h<size(); h++)
165                         m_convolutions[h] = new Convolution(m_latency_factor, m_gauss_factor, int(h)+GetSemitoneMin());
166         }
167
168         SingleResConvolutionTransform::SingleResConvolutionTransform(double latency_factor, double gauss_factor)
169         : Transform(0.0, 0.0)
170         , m_latency_factor(latency_factor)
171         , m_gauss_factor(gauss_factor)
172         {
173                 m_convolutions.resize(size());
174                 m_harmonics.resize(size());
175                 for(size_t h=0; h<size(); h++)
176                         m_convolutions[h] = NULL;
177                 init();
178         }
179         void SingleResConvolutionTransform::apply(const deque<double>& buff)
180         {
181                 for(size_t h=0; h<size(); h++)
182                 {
183                         m_is_fondamental[h] = false;
184                         m_convolutions[h]->apply(buff);
185                         m_harmonics[h] = m_convolutions[h]->m_value;
186                         m_components[h] = mod(m_harmonics[h]);
187                 }
188         }
189         SingleResConvolutionTransform::~SingleResConvolutionTransform()
190         {
191                 for(size_t i=0; i<m_convolutions.size(); i++)
192                         delete m_convolutions[i];
193         }
194
195 // NeuralNetGaussAlgo
196         void NeuralNetGaussAlgo::init()
197         {
198                 cerr << "NeuralNetGaussAlgo::init" << endl;
199
200                 SingleResConvolutionTransform::init();
201
202 //              m_fwd_plan = rfftw_create_plan(m_size, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_OUT_OF_PLACE | FFTW_USE_WISDOM);
203         }
204         NeuralNetGaussAlgo::NeuralNetGaussAlgo(double latency_factor, double gauss_factor)
205         : SingleResConvolutionTransform(latency_factor, gauss_factor)
206         {
207                 init();
208         }
209
210         void NeuralNetGaussAlgo::apply(const deque<double>& buff)
211         {
212 //              cerr << "NeuralNetGaussAlgo::apply " << m_components_treshold << endl;
213
214                 m_components_max = 0.0;
215                 for(size_t h=0; h<size(); h++)
216                 {
217                         m_convolutions[h]->apply(buff);
218                         m_harmonics[h] = m_convolutions[h]->m_value;
219                         m_components[h] = mod(m_harmonics[h]);
220                         m_components_max = max(m_components_max, m_components[h]);
221                 }
222
223                 m_first_fond = UNDEFINED_SEMITONE;
224                 for(size_t h=0; h<size(); h++)
225                 {
226                         m_is_fondamental[h] = false;
227
228                         if(m_components[h]/m_components_max>m_components_treshold && m_first_fond==UNDEFINED_SEMITONE)
229                         {
230                                 m_first_fond = h;
231                                 m_is_fondamental[h] = true;
232                         }
233                 }
234         }
235
236         NeuralNetGaussAlgo::~NeuralNetGaussAlgo()
237         {
238         }
239
240 // MonophonicAlgo
241         MonophonicAlgo::MonophonicAlgo(double latency_factor, double gauss_factor)
242         : SingleResConvolutionTransform(latency_factor, gauss_factor)
243         {
244                 init();
245         }
246         int MonophonicAlgo::getSampleAlgoLatency() const
247         {
248                 return m_convolutions[0]->size();
249         }
250         void MonophonicAlgo::apply(const deque<double>& buff)
251         {
252                 for(size_t h=0; h<m_is_fondamental.size(); h++)
253                         m_is_fondamental[h] = false;
254
255                 m_volume_max = 0.0;
256                 m_components_max = 0.0;
257                 m_first_fond = -1;
258
259 //              cout << "buff size=" << buff.size() << " size=" << m_convolutions[m_convolutions.size()-1]->size() << endl;
260
261                 int h;
262                 for(h=size()-1; h>=0 && buff.size()>=m_convolutions[h]->size(); h--)
263                 {
264                         size_t i=0;
265                         if(h!=int(size())-1) i=m_convolutions[h+1]->size();
266                         for(; i<m_convolutions[h]->size(); i++)
267                                 m_volume_max = max(m_volume_max, abs(buff[i]));
268
269                         if(m_volume_max > getVolumeTreshold())
270                         {
271                                 m_convolutions[h]->apply(buff);
272
273                                 double formant_mod = mod(m_convolutions[h]->m_value);
274
275 //                              cerr<<formant_mod<<" "<<getComponentsTreshold()<<endl;
276
277 //                              if(formant_mod > getComponentsTreshold())
278                                 {
279 //                                      m_components[h] = min(formant_mod, max_value);
280                                         m_components[h] = formant_mod;
281                                         m_components_max = max(m_components_max, m_components[h]);
282                                         m_first_fond = h;
283                                 }
284 //                              else m_components[h] = 0.0;
285                         }
286                         else m_components[h] = 0.0;
287                 }
288                 for(;h>=0; h--)
289                         m_components[h] = 0.0;
290
291                 // smash all components below a treshold of the max component
292 //              for(size_t h=0; h<size(); h++)
293 //                      if(m_components[h] < m_dominant_treshold*m_components_max)
294 //                              m_components[h] = 0.0;
295 //
296                 // the note is the first resulting component
297 //              for(size_t h=0; m_first_fond==-1 && h<size(); h++)
298 //                      if(m_components[h] > 0.0)
299 //                              m_first_fond = h;
300
301                 // correction: the note is the nearest maximum of m_note
302 //              if(m_first_fond!=-1)
303 //                      while(m_first_fond+1<int(size()) && m_components[m_first_fond+1] > m_components[m_first_fond])
304 //                              m_first_fond++;
305
306 //              cerr << "m_first_fond=" << m_first_fond << endl;
307         }
308
309         TwoVoiceMHT::TwoVoiceMHT(double AFreq, int dataBySecond, double rep, double win_factor, int minHT, int maxHT)
310         : m_mht(new MHT(AFreq, dataBySecond, rep, win_factor, minHT, maxHT))
311         , m_last_sol(m_mht->m_components.size())
312         {
313                 int nbHT = maxHT - minHT + 1;
314                 m_components.resize(nbHT);
315
316                 for(size_t i=0; i<m_last_sol.size(); i++)
317                         m_last_sol[i] = complex<double>(0.0,0.0);
318         }
319         void TwoVoiceMHT::apply(deque<double>& buff)
320         {
321                 //      cout << "TwoVoiceMHT::apply" << endl;
322
323                 m_mht->apply(buff);
324
325                 //      double earingTreshold = 0.05;
326                 //      double modArgTreshold = 0.2;
327                 //      double modModTreshold = 0.2;
328                 //      ComputeDiffs(m_mht->m_components, fp, argpfp, modfp);
329
330                 //      int count = 0;
331                 for(size_t h=0;h<m_components.size(); h++)
332                 {
333                         //              if(m_mht->m_components[h]!=complex<double>(0.0,0.0) && count<2)
334                         {
335                                 m_components[h] = m_mht->m_components[h];
336
337                                 //                      count++;
338                                 //                      if(fabs(argpfp[0][h])>modArgTreshold)   count++;
339                         }
340                         //              else    m_components[h] = complex<double>(0.0,0.0);
341                 }
342
343                 //      m_last_sol = m_mht->m_components;
344         }
345         TwoVoiceMHT::~TwoVoiceMHT()
346         {
347                 delete m_mht;
348         }
349
350         RemoveSyncMHT::RemoveSyncMHT(double AFreq, int dataBySecond, double rep, double win_factor, int minHT, int maxHT)
351         : m_mht(new MHT(AFreq, dataBySecond, rep, win_factor, minHT, maxHT))
352         , m_last_sol(m_mht->m_components.size())
353         {
354                 int nbHT = maxHT - minHT + 1;
355                 m_components.resize(nbHT);
356
357                 for(size_t i=0; i<m_last_sol.size(); i++)
358                         m_last_sol[i] = complex<double>(0.0,0.0);
359         }
360         void RemoveSyncMHT::apply(deque<double>& buff)
361         {
362                 m_mht->apply(buff);
363
364                 double earingTreshold = 0.05;
365                 double syncArgTreshold = 0.3;   // 0.02 0.25 0.2
366                 //      double syncModTreshold = 0.2;   // 0.05 0.1 0.3
367
368                 double fourier_amplitude = 0.0;
369                 for(size_t h=0; h<m_mht->m_components.size(); h++)
370                         fourier_amplitude = max(fourier_amplitude, normm(m_mht->m_components[h]));
371                 vector<int> notes;
372
373                 for(size_t h=0; h<m_mht->m_components.size(); h++)                       // for each half tone
374                 {
375                         bool is_fond = false;
376
377                         if(normm(m_mht->m_components[h])>earingTreshold*fourier_amplitude) // if we can ear it
378                         {
379                                 is_fond = true;
380
381                                 // search for syncronisation with each discovered fondamentals
382                                 for(size_t i=0; i<notes.size() && is_fond; i++)
383                                 {
384                                         double rk = m_mht->m_convolutions[h]->m_freq/m_mht->m_convolutions[notes[i]]->m_freq;
385                                         int k = int(rk+0.5);
386                                         if(abs(k-rk)<0.05) // TODO              // if k is nearly an integer, a potential harmonic
387                                         {
388                                                 complex<double> ft = m_mht->m_components[notes[i]] / normm(m_mht->m_components[notes[i]]);
389                                                 complex<double> ftm1 = m_last_sol[notes[i]] / normm(m_last_sol[notes[i]]);
390                                                 complex<double> rpt = m_mht->m_components[h]/pow(ft, k);
391                                                 complex<double> rptm1 = m_last_sol[h]/pow(ftm1, k);
392                                                 //                                      if(h==25 && k==4)
393                                                 //                                              cout << abs(log(normm(rpt))-log(normm(rptm1))) << " ";
394                                                 //                                              cout << k << "=(arg=" << abs(arg(rpt)-arg(rptm1)) << " mod=" << abs(log(normm(rpt))-log(normm(rptm1))) << ") ";
395                                                 is_fond = abs(arg(rpt)-arg(rptm1)) > syncArgTreshold;
396                                                 //                                      is_fond = is_fond || abs(log(normm(rpt))-log(normm(rptm1))) > syncModTreshold;
397                                         }
398
399                                         //                              is_fond = false;
400                                 }
401
402                                 if(is_fond) notes.push_back(h);       // it's a fondamentals
403                         }
404
405                         //              cout << endl;
406
407                         if(is_fond)             m_components[h] = m_mht->m_components[h];
408                         else                    m_components[h] = complex<double>(0.0,0.0);
409                 }
410
411                 m_last_sol = m_mht->m_sol;
412         }
413         RemoveSyncMHT::~RemoveSyncMHT()
414         {
415                 delete m_mht;
416         }
417
418 #if 0
419         NeuralNetMHT::NeuralNetMHT(double AFreq, int dataBySecond, double rep, double win_factor, int minHT, int maxHT, const string& file_name)
420         : m_mht(new MHT(AFreq, dataBySecond, rep, win_factor, minHT, maxHT))
421         , m_nn(new LayeredNeuralNet<TypeNeuron>())
422         {
423                 m_nbHT = maxHT - minHT + 1;
424                 m_components.resize(m_nbHT);
425
426                 m_nn->load(file_name.c_str());
427
428                 assert(m_nbHT==m_nn->getInputLayer()->getNeurons().size());
429                 assert(m_nbHT==m_nn->getOutputLayer()->getNeurons().size());
430
431                 cout << "topology: " << m_nn->getInputLayer()->getNeurons().size();
432                 for(LayeredNeuralNet<TypeNeuron>::LayerIterator it = ++(m_nn->m_layerList.begin()); it !=m_nn->m_layerList.end(); it++)
433                         cout << " => " << (*it)->getNeurons().size();
434                 cout << "   [" << m_nn->getNbWeights() << " weights]" << endl;
435         }
436         void NeuralNetMHT::apply(deque<double>& buff)
437         {
438                 //      cout << "NeuralNetMHT::apply" << endl;
439
440                 m_mht->apply(buff);
441
442                 vector<double> inputs(m_nbHT);
443
444                 for(int h=0; h<m_nbHT; h++)
445                         inputs[h] = normm(m_mht->m_components[h]);
446
447                 m_nn->computeOutputs(inputs);
448
449                 for(int h=0; h<m_nbHT; h++)
450                         m_components[h] = complex<double>(m_nn->getOutputLayer()->getNeurons()[h].o);
451         }
452         NeuralNetMHT::~NeuralNetMHT()
453         {
454                 delete m_nn;
455                 delete m_mht;
456         }
457 #endif
458
459 #endif
460 }