Import upstream version 0.99.2
[fmit.git] / libs / Music / FreqAnalysis.cpp
1 // Copyright 2004-07 "Gilles Degottex"
2
3 // This file is part of "Music"
4
5 // "Music" is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU Lesser General Public License as published by
7 // the Free Software Foundation; either version 2.1 of the License, or
8 // (at your option) any later version.
9 //
10 // "Music" is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
19
20 #include "FreqAnalysis.h"
21
22 #include <assert.h>
23 #include <iostream>
24 using namespace std;
25 using namespace Math;
26 #include "Music.h"
27 #include <CppAddons/Fit.h>
28
29 namespace Music
30 {
31         double PeakRefinementLogParabola(const vector<std::complex<double> > spectrum, int peak_index)
32         {
33                 assert(peak_index>0 && peak_index<int(spectrum.size()-1));
34
35                 if(!(is_peak(spectrum, peak_index)))
36                 {
37                         if(peak_index+1<int(spectrum.size()-1) && is_peak(spectrum, peak_index+1))
38                                 peak_index++;
39                         else if(peak_index-1>0 && is_peak(spectrum, peak_index-1))
40                                 peak_index--;
41                 }
42
43                 double a,b,c,xapex,yapex;
44                 FitParabola(peak_index-1, log(mod(spectrum[peak_index-1])+numeric_limits<double>::min()),
45                                         peak_index, log(mod(spectrum[peak_index])+numeric_limits<double>::min()),
46                                         peak_index+1, log(mod(spectrum[peak_index+1])+numeric_limits<double>::min()),
47                                         a, b, c, xapex, yapex);
48
49                 return xapex;
50         }
51
52         /*!
53          * M. Abe and J. O. Smith III
54          * “Design Criteria for Simple Sinusoidal Parameter Estimation based on Quadratic
55          * Interpolation of FFT Magnitude Peaks AM/FM Rate Estimation and Bias Correction
56          * for Time-Varying Sinusoidal Modeling,”
57          * Convention Paper Audio Engineering Society,
58          * Dept. of Music, Stanford University, August, (2004).
59          */
60         double PeakRefinementLogParabolaUnbiased(const vector<std::complex<double> > spectrum, int peak_index, double zp)
61         {
62                 double f = PeakRefinementLogParabola(spectrum, peak_index);
63
64                 double df = f - peak_index;
65
66                 double c0 = 0.247560;           // hann coefs
67                 double c1 = 0.084372;
68
69                 double xi_zp = c0/(zp*zp) + c1/(zp*zp*zp*zp);
70                 double df2 = xi_zp*(df-0.5)*(df+0.5)*df;
71
72 //              double c2 = −0.090608;
73 //              double c3 = −0.055781;
74 //              double nu_zp = c2/(zp*zp*zp*zp) + c3/(zp*zp*zp*zp*zp*zp);
75 //              double da = nu_zp*df*df;
76
77                 return f+df2;
78         }
79
80         vector<Harmonic> GetHarmonicStruct(const vector<complex<double> >& spectrum, double approx_f0, int nb_harm, double used_zp, double offset_tresh)
81         {
82                 double approx_f0_rel = approx_f0*spectrum.size()/double(GetSamplingRate());
83                 assert(approx_f0_rel>1 && approx_f0_rel<=spectrum.size()/2-1);
84
85                 vector<Harmonic> harms;
86
87                 for(int h=1; h<=nb_harm && int(h*approx_f0_rel)<int(spectrum.size()/2)-1; h++)
88                 {
89                         int c = int(h*approx_f0_rel);
90                         int llimit = int((h-offset_tresh)*approx_f0_rel);
91                         llimit = max(llimit, 1);
92                         int rlimit = int((h+offset_tresh)*approx_f0_rel);
93                         rlimit = min(rlimit, int(spectrum.size()/2)-1);
94                         int peak_index = -1;
95
96                         if(is_peak(spectrum, c))
97                                 peak_index = c;
98                         else
99                         {
100                                 int cl = c;
101                                 bool cl_is_peak = false;
102                                 while(!cl_is_peak && cl-1>llimit)
103                                         cl_is_peak = is_peak(spectrum, --cl);
104
105                                 int cr = c;
106                                 bool cr_is_peak = false;
107                                 while(!cr_is_peak && cr+1<rlimit)
108                                         cr_is_peak = is_peak(spectrum, ++cr);
109
110                                 if(cl_is_peak)
111                                 {
112                                         if(cr_is_peak)
113                                                 peak_index = (c-cl<cr-c)?cl:cr;
114                                         else
115                                                 peak_index = cl;
116                                 }
117                                 else if(cr_is_peak)
118                                         peak_index = cr;
119                         }
120
121                         if(peak_index>0)
122                         {
123                                 Harmonic harm;
124                                 harm.mod = mod(spectrum[peak_index]);
125                                 harm.freq = PeakRefinementLogParabolaUnbiased(spectrum, peak_index, used_zp)*double(GetSamplingRate())/spectrum.size();
126                                 harm.harm_number = h;
127                                 harms.push_back(harm);
128                         }
129                 }
130
131                 return harms;
132         }
133
134         double FundFreqRefinementOfHarmonicStruct(const vector<complex<double> >& spectrum, double approx_f0, int nb_harm, double used_zp)
135         {
136                 vector<Harmonic> harms = GetHarmonicStruct(spectrum, approx_f0, nb_harm, used_zp, 0.2);
137
138                 if(harms.empty())
139                         return 0.0;
140
141                 double sum_f0 = 0.0;
142
143                 for(size_t i=0; i<harms.size(); i++)
144                         sum_f0 += harms[i].freq/harms[i].harm_number; // TODO mod weigthed ???
145
146                 return sum_f0/harms.size();
147         }
148
149 #if 0
150         void SingleResConvolutionTransform::init()
151         {
152                 if(GetSamplingRate()<=0)        return;
153
154                 for(size_t h=0; h<size(); h++)
155                         if(m_convolutions[h]!=NULL)
156                                 delete m_convolutions[h];
157
158                 m_components.resize(GetNbSemitones());
159                 m_convolutions.resize(GetNbSemitones());
160                 m_harmonics.resize(GetNbSemitones());
161                 for(size_t h=0; h<size(); h++)
162                         m_convolutions[h] = new Convolution(m_latency_factor, m_gauss_factor, int(h)+GetSemitoneMin());
163         }
164
165         SingleResConvolutionTransform::SingleResConvolutionTransform(double latency_factor, double gauss_factor)
166         : Transform(0.0, 0.0)
167         , m_latency_factor(latency_factor)
168         , m_gauss_factor(gauss_factor)
169         {
170                 m_convolutions.resize(size());
171                 m_harmonics.resize(size());
172                 for(size_t h=0; h<size(); h++)
173                         m_convolutions[h] = NULL;
174                 init();
175         }
176         void SingleResConvolutionTransform::apply(const deque<double>& buff)
177         {
178                 for(size_t h=0; h<size(); h++)
179                 {
180                         m_is_fondamental[h] = false;
181                         m_convolutions[h]->apply(buff);
182                         m_harmonics[h] = m_convolutions[h]->m_value;
183                         m_components[h] = mod(m_harmonics[h]);
184                 }
185         }
186         SingleResConvolutionTransform::~SingleResConvolutionTransform()
187         {
188                 for(size_t i=0; i<m_convolutions.size(); i++)
189                         delete m_convolutions[i];
190         }
191
192 // NeuralNetGaussAlgo
193         void NeuralNetGaussAlgo::init()
194         {
195                 cerr << "NeuralNetGaussAlgo::init" << endl;
196
197                 SingleResConvolutionTransform::init();
198
199 //              m_fwd_plan = rfftw_create_plan(m_size, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_OUT_OF_PLACE | FFTW_USE_WISDOM);
200         }
201         NeuralNetGaussAlgo::NeuralNetGaussAlgo(double latency_factor, double gauss_factor)
202         : SingleResConvolutionTransform(latency_factor, gauss_factor)
203         {
204                 init();
205         }
206
207         void NeuralNetGaussAlgo::apply(const deque<double>& buff)
208         {
209 //              cerr << "NeuralNetGaussAlgo::apply " << m_components_treshold << endl;
210
211                 m_components_max = 0.0;
212                 for(size_t h=0; h<size(); h++)
213                 {
214                         m_convolutions[h]->apply(buff);
215                         m_harmonics[h] = m_convolutions[h]->m_value;
216                         m_components[h] = mod(m_harmonics[h]);
217                         m_components_max = max(m_components_max, m_components[h]);
218                 }
219
220                 m_first_fond = UNDEFINED_SEMITONE;
221                 for(size_t h=0; h<size(); h++)
222                 {
223                         m_is_fondamental[h] = false;
224
225                         if(m_components[h]/m_components_max>m_components_treshold && m_first_fond==UNDEFINED_SEMITONE)
226                         {
227                                 m_first_fond = h;
228                                 m_is_fondamental[h] = true;
229                         }
230                 }
231         }
232
233         NeuralNetGaussAlgo::~NeuralNetGaussAlgo()
234         {
235         }
236
237 // MonophonicAlgo
238         MonophonicAlgo::MonophonicAlgo(double latency_factor, double gauss_factor)
239         : SingleResConvolutionTransform(latency_factor, gauss_factor)
240         {
241                 init();
242         }
243         int MonophonicAlgo::getSampleAlgoLatency() const
244         {
245                 return m_convolutions[0]->size();
246         }
247         void MonophonicAlgo::apply(const deque<double>& buff)
248         {
249                 for(size_t h=0; h<m_is_fondamental.size(); h++)
250                         m_is_fondamental[h] = false;
251
252                 m_volume_max = 0.0;
253                 m_components_max = 0.0;
254                 m_first_fond = -1;
255
256 //              cout << "buff size=" << buff.size() << " size=" << m_convolutions[m_convolutions.size()-1]->size() << endl;
257
258                 int h;
259                 for(h=size()-1; h>=0 && buff.size()>=m_convolutions[h]->size(); h--)
260                 {
261                         size_t i=0;
262                         if(h!=int(size())-1) i=m_convolutions[h+1]->size();
263                         for(; i<m_convolutions[h]->size(); i++)
264                                 m_volume_max = max(m_volume_max, abs(buff[i]));
265
266                         if(m_volume_max > getVolumeTreshold())
267                         {
268                                 m_convolutions[h]->apply(buff);
269
270                                 double formant_mod = mod(m_convolutions[h]->m_value);
271
272 //                              cerr<<formant_mod<<" "<<getComponentsTreshold()<<endl;
273
274 //                              if(formant_mod > getComponentsTreshold())
275                                 {
276 //                                      m_components[h] = min(formant_mod, max_value);
277                                         m_components[h] = formant_mod;
278                                         m_components_max = max(m_components_max, m_components[h]);
279                                         m_first_fond = h;
280                                 }
281 //                              else m_components[h] = 0.0;
282                         }
283                         else m_components[h] = 0.0;
284                 }
285                 for(;h>=0; h--)
286                         m_components[h] = 0.0;
287
288                 // smash all components below a treshold of the max component
289 //              for(size_t h=0; h<size(); h++)
290 //                      if(m_components[h] < m_dominant_treshold*m_components_max)
291 //                              m_components[h] = 0.0;
292 //
293                 // the note is the first resulting component
294 //              for(size_t h=0; m_first_fond==-1 && h<size(); h++)
295 //                      if(m_components[h] > 0.0)
296 //                              m_first_fond = h;
297
298                 // correction: the note is the nearest maximum of m_note
299 //              if(m_first_fond!=-1)
300 //                      while(m_first_fond+1<int(size()) && m_components[m_first_fond+1] > m_components[m_first_fond])
301 //                              m_first_fond++;
302
303 //              cerr << "m_first_fond=" << m_first_fond << endl;
304         }
305
306         TwoVoiceMHT::TwoVoiceMHT(double AFreq, int dataBySecond, double rep, double win_factor, int minHT, int maxHT)
307         : m_mht(new MHT(AFreq, dataBySecond, rep, win_factor, minHT, maxHT))
308         , m_last_sol(m_mht->m_components.size())
309         {
310                 int nbHT = maxHT - minHT + 1;
311                 m_components.resize(nbHT);
312
313                 for(size_t i=0; i<m_last_sol.size(); i++)
314                         m_last_sol[i] = complex<double>(0.0,0.0);
315         }
316         void TwoVoiceMHT::apply(deque<double>& buff)
317         {
318                 //      cout << "TwoVoiceMHT::apply" << endl;
319
320                 m_mht->apply(buff);
321
322                 //      double earingTreshold = 0.05;
323                 //      double modArgTreshold = 0.2;
324                 //      double modModTreshold = 0.2;
325                 //      ComputeDiffs(m_mht->m_components, fp, argpfp, modfp);
326
327                 //      int count = 0;
328                 for(size_t h=0;h<m_components.size(); h++)
329                 {
330                         //              if(m_mht->m_components[h]!=complex<double>(0.0,0.0) && count<2)
331                         {
332                                 m_components[h] = m_mht->m_components[h];
333
334                                 //                      count++;
335                                 //                      if(fabs(argpfp[0][h])>modArgTreshold)   count++;
336                         }
337                         //              else    m_components[h] = complex<double>(0.0,0.0);
338                 }
339
340                 //      m_last_sol = m_mht->m_components;
341         }
342         TwoVoiceMHT::~TwoVoiceMHT()
343         {
344                 delete m_mht;
345         }
346
347         RemoveSyncMHT::RemoveSyncMHT(double AFreq, int dataBySecond, double rep, double win_factor, int minHT, int maxHT)
348         : m_mht(new MHT(AFreq, dataBySecond, rep, win_factor, minHT, maxHT))
349         , m_last_sol(m_mht->m_components.size())
350         {
351                 int nbHT = maxHT - minHT + 1;
352                 m_components.resize(nbHT);
353
354                 for(size_t i=0; i<m_last_sol.size(); i++)
355                         m_last_sol[i] = complex<double>(0.0,0.0);
356         }
357         void RemoveSyncMHT::apply(deque<double>& buff)
358         {
359                 m_mht->apply(buff);
360
361                 double earingTreshold = 0.05;
362                 double syncArgTreshold = 0.3;   // 0.02 0.25 0.2
363                 //      double syncModTreshold = 0.2;   // 0.05 0.1 0.3
364
365                 double fourier_amplitude = 0.0;
366                 for(size_t h=0; h<m_mht->m_components.size(); h++)
367                         fourier_amplitude = max(fourier_amplitude, normm(m_mht->m_components[h]));
368                 vector<int> notes;
369
370                 for(size_t h=0; h<m_mht->m_components.size(); h++)                       // for each half tone
371                 {
372                         bool is_fond = false;
373
374                         if(normm(m_mht->m_components[h])>earingTreshold*fourier_amplitude) // if we can ear it
375                         {
376                                 is_fond = true;
377
378                                 // search for syncronisation with each discovered fondamentals
379                                 for(size_t i=0; i<notes.size() && is_fond; i++)
380                                 {
381                                         double rk = m_mht->m_convolutions[h]->m_freq/m_mht->m_convolutions[notes[i]]->m_freq;
382                                         int k = int(rk+0.5);
383                                         if(abs(k-rk)<0.05) // TODO              // if k is nearly an integer, a potential harmonic
384                                         {
385                                                 complex<double> ft = m_mht->m_components[notes[i]] / normm(m_mht->m_components[notes[i]]);
386                                                 complex<double> ftm1 = m_last_sol[notes[i]] / normm(m_last_sol[notes[i]]);
387                                                 complex<double> rpt = m_mht->m_components[h]/pow(ft, k);
388                                                 complex<double> rptm1 = m_last_sol[h]/pow(ftm1, k);
389                                                 //                                      if(h==25 && k==4)
390                                                 //                                              cout << abs(log(normm(rpt))-log(normm(rptm1))) << " ";
391                                                 //                                              cout << k << "=(arg=" << abs(arg(rpt)-arg(rptm1)) << " mod=" << abs(log(normm(rpt))-log(normm(rptm1))) << ") ";
392                                                 is_fond = abs(arg(rpt)-arg(rptm1)) > syncArgTreshold;
393                                                 //                                      is_fond = is_fond || abs(log(normm(rpt))-log(normm(rptm1))) > syncModTreshold;
394                                         }
395
396                                         //                              is_fond = false;
397                                 }
398
399                                 if(is_fond) notes.push_back(h);       // it's a fondamentals
400                         }
401
402                         //              cout << endl;
403
404                         if(is_fond)             m_components[h] = m_mht->m_components[h];
405                         else                    m_components[h] = complex<double>(0.0,0.0);
406                 }
407
408                 m_last_sol = m_mht->m_sol;
409         }
410         RemoveSyncMHT::~RemoveSyncMHT()
411         {
412                 delete m_mht;
413         }
414
415 #if 0
416         NeuralNetMHT::NeuralNetMHT(double AFreq, int dataBySecond, double rep, double win_factor, int minHT, int maxHT, const string& file_name)
417         : m_mht(new MHT(AFreq, dataBySecond, rep, win_factor, minHT, maxHT))
418         , m_nn(new LayeredNeuralNet<TypeNeuron>())
419         {
420                 m_nbHT = maxHT - minHT + 1;
421                 m_components.resize(m_nbHT);
422
423                 m_nn->load(file_name.c_str());
424
425                 assert(m_nbHT==m_nn->getInputLayer()->getNeurons().size());
426                 assert(m_nbHT==m_nn->getOutputLayer()->getNeurons().size());
427
428                 cout << "topology: " << m_nn->getInputLayer()->getNeurons().size();
429                 for(LayeredNeuralNet<TypeNeuron>::LayerIterator it = ++(m_nn->m_layerList.begin()); it !=m_nn->m_layerList.end(); it++)
430                         cout << " => " << (*it)->getNeurons().size();
431                 cout << "   [" << m_nn->getNbWeights() << " weights]" << endl;
432         }
433         void NeuralNetMHT::apply(deque<double>& buff)
434         {
435                 //      cout << "NeuralNetMHT::apply" << endl;
436
437                 m_mht->apply(buff);
438
439                 vector<double> inputs(m_nbHT);
440
441                 for(int h=0; h<m_nbHT; h++)
442                         inputs[h] = normm(m_mht->m_components[h]);
443
444                 m_nn->computeOutputs(inputs);
445
446                 for(int h=0; h<m_nbHT; h++)
447                         m_components[h] = complex<double>(m_nn->getOutputLayer()->getNeurons()[h].o);
448         }
449         NeuralNetMHT::~NeuralNetMHT()
450         {
451                 delete m_nn;
452                 delete m_mht;
453         }
454 #endif
455
456 #endif
457 }