libs/Music/FreqAnalysis.cpp

   1 // Copyright 2004-07 "Gilles Degottex"
   2
   3 // This file is part of "Music"
   4
   5 // "Music" is free software; you can redistribute it and/or modify
   6 // it under the terms of the GNU Lesser General Public License as published by
   7 // the Free Software Foundation; either version 2.1 of the License, or
   8 // (at your option) any later version.
   9 //
  10 // "Music" is distributed in the hope that it will be useful,
  11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13 // GNU Lesser General Public License for more details.
  14 //
  15 // You should have received a copy of the GNU Lesser General Public License
  16 // along with this program; if not, write to the Free Software
  17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18
  19
  20 #include "FreqAnalysis.h"
  21
  22 #include <assert.h>
  23 #include <iostream>
  24 using namespace std;
  25 using namespace Math;
  26 #include "Music.h"
  27 #include <CppAddons/Fit.h>
  28
  29 namespace Music
  30 {
  31         double PeakRefinementLogParabola(const vector<std::complex<double> > spectrum, int peak_index)
  32         {
  33                 assert(peak_index>0 && peak_index<int(spectrum.size()-1));
  34
  35                 if(!(is_peak(spectrum, peak_index)))
  36                 {
  37                         if(peak_index+1<int(spectrum.size()-1) && is_peak(spectrum, peak_index+1))
  38                                 peak_index++;
  39                         else if(peak_index-1>0 && is_peak(spectrum, peak_index-1))
  40                                 peak_index--;
  41                 }
  42
  43                 double a,b,c,xapex,yapex;
  44                 FitParabola(peak_index-1, log(mod(spectrum[peak_index-1])+numeric_limits<double>::min()),
  45                                         peak_index, log(mod(spectrum[peak_index])+numeric_limits<double>::min()),
  46                                         peak_index+1, log(mod(spectrum[peak_index+1])+numeric_limits<double>::min()),
  47                                         a, b, c, xapex, yapex);
  48
  49                 return xapex;
  50         }
  51
  52         /*!
  53          * M. Abe and J. O. Smith III
  54          * “Design Criteria for Simple Sinusoidal Parameter Estimation based on Quadratic
  55          * Interpolation of FFT Magnitude Peaks AM/FM Rate Estimation and Bias Correction
  56          * for Time-Varying Sinusoidal Modeling,”
  57          * Convention Paper Audio Engineering Society,
  58          * Dept. of Music, Stanford University, August, (2004).
  59          */
  60         double PeakRefinementLogParabolaUnbiased(const vector<std::complex<double> > spectrum, int peak_index, double zp)
  61         {
  62                 double f = PeakRefinementLogParabola(spectrum, peak_index);
  63
  64                 double df = f - peak_index;
  65
  66                 double c0 = 0.247560;           // hann coefs
  67                 double c1 = 0.084372;
  68
  69                 double xi_zp = c0/(zp*zp) + c1/(zp*zp*zp*zp);
  70                 double df2 = xi_zp*(df-0.5)*(df+0.5)*df;
  71
  72 //              double c2 = −0.090608;
  73 //              double c3 = −0.055781;
  74 //              double nu_zp = c2/(zp*zp*zp*zp) + c3/(zp*zp*zp*zp*zp*zp);
  75 //              double da = nu_zp*df*df;
  76
  77                 return f+df2;
  78         }
  79
  80         vector<Harmonic> GetHarmonicStruct(const vector<complex<double> >& spectrum, double approx_f0, int nb_harm, double used_zp, double offset_tresh)
  81         {
  82                 double approx_f0_rel = approx_f0*spectrum.size()/double(GetSamplingRate());
  83                 assert(approx_f0_rel>1 && approx_f0_rel<=spectrum.size()/2-1);
  84
  85                 vector<Harmonic> harms;
  86
  87                 for(int h=1; h<=nb_harm && int(h*approx_f0_rel)<int(spectrum.size()/2)-1; h++)
  88                 {
  89                         int c = int(h*approx_f0_rel);
  90                         int llimit = int((h-offset_tresh)*approx_f0_rel);
  91                         llimit = max(llimit, 1);
  92                         int rlimit = int((h+offset_tresh)*approx_f0_rel);
  93                         rlimit = min(rlimit, int(spectrum.size()/2)-1);
  94                         int peak_index = -1;
  95
  96                         if(is_peak(spectrum, c))
  97                                 peak_index = c;
  98                         else
  99                         {
 100                                 int cl = c;
 101                                 bool cl_is_peak = false;
 102                                 while(!cl_is_peak && cl-1>llimit)
 103                                         cl_is_peak = is_peak(spectrum, --cl);
 104
 105                                 int cr = c;
 106                                 bool cr_is_peak = false;
 107                                 while(!cr_is_peak && cr+1<rlimit)
 108                                         cr_is_peak = is_peak(spectrum, ++cr);
 109
 110                                 if(cl_is_peak)
 111                                 {
 112                                         if(cr_is_peak)
 113                                                 peak_index = (c-cl<cr-c)?cl:cr;
 114                                         else
 115                                                 peak_index = cl;
 116                                 }
 117                                 else if(cr_is_peak)
 118                                         peak_index = cr;
 119                         }
 120
 121                         if(peak_index>0)
 122                         {
 123                                 Harmonic harm;
 124                                 harm.mod = mod(spectrum[peak_index]);
 125                                 harm.freq = PeakRefinementLogParabolaUnbiased(spectrum, peak_index, used_zp)*double(GetSamplingRate())/spectrum.size();
 126                                 harm.harm_number = h;
 127                                 harms.push_back(harm);
 128                         }
 129                 }
 130
 131                 return harms;
 132         }
 133
 134         double FundFreqRefinementOfHarmonicStruct(const vector<complex<double> >& spectrum, double approx_f0, int nb_harm, double used_zp)
 135         {
 136                 vector<Harmonic> harms = GetHarmonicStruct(spectrum, approx_f0, nb_harm, used_zp, 0.2);
 137
 138                 if(harms.empty())
 139                         return 0.0;
 140
 141                 double sum_f0 = 0.0;
 142
 143                 for(size_t i=0; i<harms.size(); i++)
 144                         sum_f0 += harms[i].freq/harms[i].harm_number; // TODO mod weigthed ???
 145
 146                 return sum_f0/harms.size();
 147         }
 148
 149 #if 0
 150         void SingleResConvolutionTransform::init()
 151         {
 152                 if(GetSamplingRate()<=0)        return;
 153
 154                 for(size_t h=0; h<size(); h++)
 155                         if(m_convolutions[h]!=NULL)
 156                                 delete m_convolutions[h];
 157
 158                 m_components.resize(GetNbSemitones());
 159                 m_convolutions.resize(GetNbSemitones());
 160                 m_harmonics.resize(GetNbSemitones());
 161                 for(size_t h=0; h<size(); h++)
 162                         m_convolutions[h] = new Convolution(m_latency_factor, m_gauss_factor, int(h)+GetSemitoneMin());
 163         }
 164
 165         SingleResConvolutionTransform::SingleResConvolutionTransform(double latency_factor, double gauss_factor)
 166         : Transform(0.0, 0.0)
 167         , m_latency_factor(latency_factor)
 168         , m_gauss_factor(gauss_factor)
 169         {
 170                 m_convolutions.resize(size());
 171                 m_harmonics.resize(size());
 172                 for(size_t h=0; h<size(); h++)
 173                         m_convolutions[h] = NULL;
 174                 init();
 175         }
 176         void SingleResConvolutionTransform::apply(const deque<double>& buff)
 177         {
 178                 for(size_t h=0; h<size(); h++)
 179                 {
 180                         m_is_fondamental[h] = false;
 181                         m_convolutions[h]->apply(buff);
 182                         m_harmonics[h] = m_convolutions[h]->m_value;
 183                         m_components[h] = mod(m_harmonics[h]);
 184                 }
 185         }
 186         SingleResConvolutionTransform::~SingleResConvolutionTransform()
 187         {
 188                 for(size_t i=0; i<m_convolutions.size(); i++)
 189                         delete m_convolutions[i];
 190         }
 191
 192 // NeuralNetGaussAlgo
 193         void NeuralNetGaussAlgo::init()
 194         {
 195                 cerr << "NeuralNetGaussAlgo::init" << endl;
 196
 197                 SingleResConvolutionTransform::init();
 198
 199 //              m_fwd_plan = rfftw_create_plan(m_size, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_OUT_OF_PLACE | FFTW_USE_WISDOM);
 200         }
 201         NeuralNetGaussAlgo::NeuralNetGaussAlgo(double latency_factor, double gauss_factor)
 202         : SingleResConvolutionTransform(latency_factor, gauss_factor)
 203         {
 204                 init();
 205         }
 206
 207         void NeuralNetGaussAlgo::apply(const deque<double>& buff)
 208         {
 209 //              cerr << "NeuralNetGaussAlgo::apply " << m_components_treshold << endl;
 210
 211                 m_components_max = 0.0;
 212                 for(size_t h=0; h<size(); h++)
 213                 {
 214                         m_convolutions[h]->apply(buff);
 215                         m_harmonics[h] = m_convolutions[h]->m_value;
 216                         m_components[h] = mod(m_harmonics[h]);
 217                         m_components_max = max(m_components_max, m_components[h]);
 218                 }
 219
 220                 m_first_fond = UNDEFINED_SEMITONE;
 221                 for(size_t h=0; h<size(); h++)
 222                 {
 223                         m_is_fondamental[h] = false;
 224
 225                         if(m_components[h]/m_components_max>m_components_treshold && m_first_fond==UNDEFINED_SEMITONE)
 226                         {
 227                                 m_first_fond = h;
 228                                 m_is_fondamental[h] = true;
 229                         }
 230                 }
 231         }
 232
 233         NeuralNetGaussAlgo::~NeuralNetGaussAlgo()
 234         {
 235         }
 236
 237 // MonophonicAlgo
 238         MonophonicAlgo::MonophonicAlgo(double latency_factor, double gauss_factor)
 239         : SingleResConvolutionTransform(latency_factor, gauss_factor)
 240         {
 241                 init();
 242         }
 243         int MonophonicAlgo::getSampleAlgoLatency() const
 244         {
 245                 return m_convolutions[0]->size();
 246         }
 247         void MonophonicAlgo::apply(const deque<double>& buff)
 248         {
 249                 for(size_t h=0; h<m_is_fondamental.size(); h++)
 250                         m_is_fondamental[h] = false;
 251
 252                 m_volume_max = 0.0;
 253                 m_components_max = 0.0;
 254                 m_first_fond = -1;
 255
 256 //              cout << "buff size=" << buff.size() << " size=" << m_convolutions[m_convolutions.size()-1]->size() << endl;
 257
 258                 int h;
 259                 for(h=size()-1; h>=0 && buff.size()>=m_convolutions[h]->size(); h--)
 260                 {
 261                         size_t i=0;
 262                         if(h!=int(size())-1) i=m_convolutions[h+1]->size();
 263                         for(; i<m_convolutions[h]->size(); i++)
 264                                 m_volume_max = max(m_volume_max, abs(buff[i]));
 265
 266                         if(m_volume_max > getVolumeTreshold())
 267                         {
 268                                 m_convolutions[h]->apply(buff);
 269
 270                                 double formant_mod = mod(m_convolutions[h]->m_value);
 271
 272 //                              cerr<<formant_mod<<" "<<getComponentsTreshold()<<endl;
 273
 274 //                              if(formant_mod > getComponentsTreshold())
 275                                 {
 276 //                                      m_components[h] = min(formant_mod, max_value);
 277                                         m_components[h] = formant_mod;
 278                                         m_components_max = max(m_components_max, m_components[h]);
 279                                         m_first_fond = h;
 280                                 }
 281 //                              else m_components[h] = 0.0;
 282                         }
 283                         else m_components[h] = 0.0;
 284                 }
 285                 for(;h>=0; h--)
 286                         m_components[h] = 0.0;
 287
 288                 // smash all components below a treshold of the max component
 289 //              for(size_t h=0; h<size(); h++)
 290 //                      if(m_components[h] < m_dominant_treshold*m_components_max)
 291 //                              m_components[h] = 0.0;
 292 //
 293                 // the note is the first resulting component
 294 //              for(size_t h=0; m_first_fond==-1 && h<size(); h++)
 295 //                      if(m_components[h] > 0.0)
 296 //                              m_first_fond = h;
 297
 298                 // correction: the note is the nearest maximum of m_note
 299 //              if(m_first_fond!=-1)
 300 //                      while(m_first_fond+1<int(size()) && m_components[m_first_fond+1] > m_components[m_first_fond])
 301 //                              m_first_fond++;
 302
 303 //              cerr << "m_first_fond=" << m_first_fond << endl;
 304         }
 305
 306         TwoVoiceMHT::TwoVoiceMHT(double AFreq, int dataBySecond, double rep, double win_factor, int minHT, int maxHT)
 307         : m_mht(new MHT(AFreq, dataBySecond, rep, win_factor, minHT, maxHT))
 308         , m_last_sol(m_mht->m_components.size())
 309         {
 310                 int nbHT = maxHT - minHT + 1;
 311                 m_components.resize(nbHT);
 312
 313                 for(size_t i=0; i<m_last_sol.size(); i++)
 314                         m_last_sol[i] = complex<double>(0.0,0.0);
 315         }
 316         void TwoVoiceMHT::apply(deque<double>& buff)
 317         {
 318                 //      cout << "TwoVoiceMHT::apply" << endl;
 319
 320                 m_mht->apply(buff);
 321
 322                 //      double earingTreshold = 0.05;
 323                 //      double modArgTreshold = 0.2;
 324                 //      double modModTreshold = 0.2;
 325                 //      ComputeDiffs(m_mht->m_components, fp, argpfp, modfp);
 326
 327                 //      int count = 0;
 328                 for(size_t h=0;h<m_components.size(); h++)
 329                 {
 330                         //              if(m_mht->m_components[h]!=complex<double>(0.0,0.0) && count<2)
 331                         {
 332                                 m_components[h] = m_mht->m_components[h];
 333
 334                                 //                      count++;
 335                                 //                      if(fabs(argpfp[0][h])>modArgTreshold)   count++;
 336                         }
 337                         //              else    m_components[h] = complex<double>(0.0,0.0);
 338                 }
 339
 340                 //      m_last_sol = m_mht->m_components;
 341         }
 342         TwoVoiceMHT::~TwoVoiceMHT()
 343         {
 344                 delete m_mht;
 345         }
 346
 347         RemoveSyncMHT::RemoveSyncMHT(double AFreq, int dataBySecond, double rep, double win_factor, int minHT, int maxHT)
 348         : m_mht(new MHT(AFreq, dataBySecond, rep, win_factor, minHT, maxHT))
 349         , m_last_sol(m_mht->m_components.size())
 350         {
 351                 int nbHT = maxHT - minHT + 1;
 352                 m_components.resize(nbHT);
 353
 354                 for(size_t i=0; i<m_last_sol.size(); i++)
 355                         m_last_sol[i] = complex<double>(0.0,0.0);
 356         }
 357         void RemoveSyncMHT::apply(deque<double>& buff)
 358         {
 359                 m_mht->apply(buff);
 360
 361                 double earingTreshold = 0.05;
 362                 double syncArgTreshold = 0.3;   // 0.02 0.25 0.2
 363                 //      double syncModTreshold = 0.2;   // 0.05 0.1 0.3
 364
 365                 double fourier_amplitude = 0.0;
 366                 for(size_t h=0; h<m_mht->m_components.size(); h++)
 367                         fourier_amplitude = max(fourier_amplitude, normm(m_mht->m_components[h]));
 368                 vector<int> notes;
 369
 370                 for(size_t h=0; h<m_mht->m_components.size(); h++)                       // for each half tone
 371                 {
 372                         bool is_fond = false;
 373
 374                         if(normm(m_mht->m_components[h])>earingTreshold*fourier_amplitude) // if we can ear it
 375                         {
 376                                 is_fond = true;
 377
 378                                 // search for syncronisation with each discovered fondamentals
 379                                 for(size_t i=0; i<notes.size() && is_fond; i++)
 380                                 {
 381                                         double rk = m_mht->m_convolutions[h]->m_freq/m_mht->m_convolutions[notes[i]]->m_freq;
 382                                         int k = int(rk+0.5);
 383                                         if(abs(k-rk)<0.05) // TODO              // if k is nearly an integer, a potential harmonic
 384                                         {
 385                                                 complex<double> ft = m_mht->m_components[notes[i]] / normm(m_mht->m_components[notes[i]]);
 386                                                 complex<double> ftm1 = m_last_sol[notes[i]] / normm(m_last_sol[notes[i]]);
 387                                                 complex<double> rpt = m_mht->m_components[h]/pow(ft, k);
 388                                                 complex<double> rptm1 = m_last_sol[h]/pow(ftm1, k);
 389                                                 //                                      if(h==25 && k==4)
 390                                                 //                                              cout << abs(log(normm(rpt))-log(normm(rptm1))) << " ";
 391                                                 //                                              cout << k << "=(arg=" << abs(arg(rpt)-arg(rptm1)) << " mod=" << abs(log(normm(rpt))-log(normm(rptm1))) << ") ";
 392                                                 is_fond = abs(arg(rpt)-arg(rptm1)) > syncArgTreshold;
 393                                                 //                                      is_fond = is_fond || abs(log(normm(rpt))-log(normm(rptm1))) > syncModTreshold;
 394                                         }
 395
 396                                         //                              is_fond = false;
 397                                 }
 398
 399                                 if(is_fond) notes.push_back(h);       // it's a fondamentals
 400                         }
 401
 402                         //              cout << endl;
 403
 404                         if(is_fond)             m_components[h] = m_mht->m_components[h];
 405                         else                    m_components[h] = complex<double>(0.0,0.0);
 406                 }
 407
 408                 m_last_sol = m_mht->m_sol;
 409         }
 410         RemoveSyncMHT::~RemoveSyncMHT()
 411         {
 412                 delete m_mht;
 413         }
 414
 415 #if 0
 416         NeuralNetMHT::NeuralNetMHT(double AFreq, int dataBySecond, double rep, double win_factor, int minHT, int maxHT, const string& file_name)
 417         : m_mht(new MHT(AFreq, dataBySecond, rep, win_factor, minHT, maxHT))
 418         , m_nn(new LayeredNeuralNet<TypeNeuron>())
 419         {
 420                 m_nbHT = maxHT - minHT + 1;
 421                 m_components.resize(m_nbHT);
 422
 423                 m_nn->load(file_name.c_str());
 424
 425                 assert(m_nbHT==m_nn->getInputLayer()->getNeurons().size());
 426                 assert(m_nbHT==m_nn->getOutputLayer()->getNeurons().size());
 427
 428                 cout << "topology: " << m_nn->getInputLayer()->getNeurons().size();
 429                 for(LayeredNeuralNet<TypeNeuron>::LayerIterator it = ++(m_nn->m_layerList.begin()); it !=m_nn->m_layerList.end(); it++)
 430                         cout << " => " << (*it)->getNeurons().size();
 431                 cout << "   [" << m_nn->getNbWeights() << " weights]" << endl;
 432         }
 433         void NeuralNetMHT::apply(deque<double>& buff)
 434         {
 435                 //      cout << "NeuralNetMHT::apply" << endl;
 436
 437                 m_mht->apply(buff);
 438
 439                 vector<double> inputs(m_nbHT);
 440
 441                 for(int h=0; h<m_nbHT; h++)
 442                         inputs[h] = normm(m_mht->m_components[h]);
 443
 444                 m_nn->computeOutputs(inputs);
 445
 446                 for(int h=0; h<m_nbHT; h++)
 447                         m_components[h] = complex<double>(m_nn->getOutputLayer()->getNeurons()[h].o);
 448         }
 449         NeuralNetMHT::~NeuralNetMHT()
 450         {
 451                 delete m_nn;
 452                 delete m_mht;
 453         }
 454 #endif
 455
 456 #endif
 457 }