libs/Music/FreqAnalysis.cpp

   1 // Copyright 2004-07 "Gilles Degottex"
   2
   3 // This file is part of "Music"
   4
   5 // "Music" is free software; you can redistribute it and/or modify
   6 // it under the terms of the GNU Lesser General Public License as published by
   7 // the Free Software Foundation; either version 2.1 of the License, or
   8 // (at your option) any later version.
   9 //
  10 // "Music" is distributed in the hope that it will be useful,
  11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13 // GNU Lesser General Public License for more details.
  14 //
  15 // You should have received a copy of the GNU Lesser General Public License
  16 // along with this program; if not, write to the Free Software
  17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18
  19
  20 #include "FreqAnalysis.h"
  21
  22 #include <assert.h>
  23 #include <iostream>
  24 using namespace std;
  25 using namespace Math;
  26 #include "Music.h"
  27 #include <CppAddons/Fit.h>
  28
  29 namespace Music
  30 {
  31         double PeakRefinementLogParabola(const vector<std::complex<double> > spectrum, int peak_index)
  32         {
  33                 assert(peak_index>0 && peak_index<int(spectrum.size()-1));
  34
  35                 if(!(is_peak(spectrum, peak_index)))
  36                 {
  37                         if(peak_index+1<int(spectrum.size()-1) && is_peak(spectrum, peak_index+1))
  38                                 peak_index++;
  39                         else if(peak_index-1>0 && is_peak(spectrum, peak_index-1))
  40                                 peak_index--;
  41                 }
  42
  43                 double a,b,c,xapex,yapex;
  44                 FitParabola(peak_index-1, log(mod(spectrum[peak_index-1])+numeric_limits<double>::min()),
  45                                         peak_index, log(mod(spectrum[peak_index])+numeric_limits<double>::min()),
  46                                         peak_index+1, log(mod(spectrum[peak_index+1])+numeric_limits<double>::min()),
  47                                         a, b, c, xapex, yapex);
  48
  49                 return xapex;
  50         }
  51
  52         /*!
  53          * M. Abe and J. O. Smith III
  54          * “Design Criteria for Simple Sinusoidal Parameter Estimation based on Quadratic
  55          * Interpolation of FFT Magnitude Peaks AM/FM Rate Estimation and Bias Correction
  56          * for Time-Varying Sinusoidal Modeling,”
  57          * Convention Paper Audio Engineering Society,
  58          * Dept. of Music, Stanford University, August, (2004).
  59          */
  60         double PeakRefinementLogParabolaUnbiased(const vector<std::complex<double> > spectrum, int peak_index, double zp)
  61         {
  62                 double f = PeakRefinementLogParabola(spectrum, peak_index);
  63
  64                 double df = f - peak_index;
  65
  66                 double c0 = 0.247560;           // hann coefs
  67                 double c1 = 0.084372;
  68
  69                 double xi_zp = c0/(zp*zp) + c1/(zp*zp*zp*zp);
  70                 double df2 = xi_zp*(df-0.5)*(df+0.5)*df;
  71
  72 //              double c2 = −0.090608;
  73 //              double c3 = −0.055781;
  74 //              double nu_zp = c2/(zp*zp*zp*zp) + c3/(zp*zp*zp*zp*zp*zp);
  75 //              double da = nu_zp*df*df;
  76
  77                 return f+df2;
  78         }
  79
  80         vector<Harmonic> GetHarmonicStruct(const vector<complex<double> >& spectrum, double approx_f0, int nb_harm, double used_zp, double offset_tresh)
  81         {
  82                 double approx_f0_rel = approx_f0*spectrum.size()/double(GetSamplingRate());
  83                 assert(approx_f0_rel>1 && approx_f0_rel<=spectrum.size()/2-1);
  84
  85                 vector<Harmonic> harms;
  86
  87                 for(int h=1; h<=nb_harm && int(h*approx_f0_rel)<int(spectrum.size()/2)-1; h++)
  88                 {
  89                         int c = int(h*approx_f0_rel);
  90                         int llimit = int((h-offset_tresh)*approx_f0_rel);
  91                         llimit = max(llimit, 1);
  92                         int rlimit = int((h+offset_tresh)*approx_f0_rel);
  93                         rlimit = min(rlimit, int(spectrum.size()/2)-1);
  94                         int peak_index = -1;
  95
  96                         if(is_peak(spectrum, c))
  97                                 peak_index = c;
  98                         else
  99                         {
 100                                 int cl = c;
 101                                 bool cl_is_peak = false;
 102                                 while(!cl_is_peak && cl-1>llimit)
 103                                         cl_is_peak = is_peak(spectrum, --cl);
 104
 105                                 int cr = c;
 106                                 bool cr_is_peak = false;
 107                                 while(!cr_is_peak && cr+1<rlimit)
 108                                         cr_is_peak = is_peak(spectrum, ++cr);
 109
 110                                 if(cl_is_peak)
 111                                 {
 112                                         if(cr_is_peak)
 113                                                 peak_index = (c-cl<cr-c)?cl:cr;
 114                                         else
 115                                                 peak_index = cl;
 116                                 }
 117                                 else if(cr_is_peak)
 118                                         peak_index = cr;
 119                         }
 120
 121                         if(peak_index>0)
 122                         {
 123                                 Harmonic harm;
 124                                 harm.mod = mod(spectrum[peak_index]);
 125                                 harm.freq = PeakRefinementLogParabolaUnbiased(spectrum, peak_index, used_zp)*double(GetSamplingRate())/spectrum.size();
 126                                 harm.harm_number = h;
 127                                 harms.push_back(harm);
 128                         }
 129                 }
 130
 131                 return harms;
 132         }
 133
 134         double FundFreqRefinementOfHarmonicStruct(const vector<complex<double> >& spectrum, double approx_f0, int nb_harm, double used_zp)
 135         {
 136                 double approx_f0_rel = approx_f0*spectrum.size()/double(GetSamplingRate());
 137                 assert(approx_f0_rel>1 && approx_f0_rel<=spectrum.size()/2-1);
 138
 139                 vector<Harmonic> harms = GetHarmonicStruct(spectrum, approx_f0, nb_harm, used_zp, 0.2);
 140
 141                 if(harms.empty())
 142                         return 0.0;
 143
 144                 double sum_f0 = 0.0;
 145
 146                 for(size_t i=0; i<harms.size(); i++)
 147                         sum_f0 += harms[i].freq/harms[i].harm_number;                           // TODO mod weigthed ???
 148
 149                 return sum_f0/harms.size();
 150         }
 151
 152 #if 0
 153         void SingleResConvolutionTransform::init()
 154         {
 155                 if(GetSamplingRate()<=0)        return;
 156
 157                 for(size_t h=0; h<size(); h++)
 158                         if(m_convolutions[h]!=NULL)
 159                                 delete m_convolutions[h];
 160
 161                 m_components.resize(GetNbSemitones());
 162                 m_convolutions.resize(GetNbSemitones());
 163                 m_harmonics.resize(GetNbSemitones());
 164                 for(size_t h=0; h<size(); h++)
 165                         m_convolutions[h] = new Convolution(m_latency_factor, m_gauss_factor, int(h)+GetSemitoneMin());
 166         }
 167
 168         SingleResConvolutionTransform::SingleResConvolutionTransform(double latency_factor, double gauss_factor)
 169         : Transform(0.0, 0.0)
 170         , m_latency_factor(latency_factor)
 171         , m_gauss_factor(gauss_factor)
 172         {
 173                 m_convolutions.resize(size());
 174                 m_harmonics.resize(size());
 175                 for(size_t h=0; h<size(); h++)
 176                         m_convolutions[h] = NULL;
 177                 init();
 178         }
 179         void SingleResConvolutionTransform::apply(const deque<double>& buff)
 180         {
 181                 for(size_t h=0; h<size(); h++)
 182                 {
 183                         m_is_fondamental[h] = false;
 184                         m_convolutions[h]->apply(buff);
 185                         m_harmonics[h] = m_convolutions[h]->m_value;
 186                         m_components[h] = mod(m_harmonics[h]);
 187                 }
 188         }
 189         SingleResConvolutionTransform::~SingleResConvolutionTransform()
 190         {
 191                 for(size_t i=0; i<m_convolutions.size(); i++)
 192                         delete m_convolutions[i];
 193         }
 194
 195 // NeuralNetGaussAlgo
 196         void NeuralNetGaussAlgo::init()
 197         {
 198                 cerr << "NeuralNetGaussAlgo::init" << endl;
 199
 200                 SingleResConvolutionTransform::init();
 201
 202 //              m_fwd_plan = rfftw_create_plan(m_size, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_OUT_OF_PLACE | FFTW_USE_WISDOM);
 203         }
 204         NeuralNetGaussAlgo::NeuralNetGaussAlgo(double latency_factor, double gauss_factor)
 205         : SingleResConvolutionTransform(latency_factor, gauss_factor)
 206         {
 207                 init();
 208         }
 209
 210         void NeuralNetGaussAlgo::apply(const deque<double>& buff)
 211         {
 212 //              cerr << "NeuralNetGaussAlgo::apply " << m_components_treshold << endl;
 213
 214                 m_components_max = 0.0;
 215                 for(size_t h=0; h<size(); h++)
 216                 {
 217                         m_convolutions[h]->apply(buff);
 218                         m_harmonics[h] = m_convolutions[h]->m_value;
 219                         m_components[h] = mod(m_harmonics[h]);
 220                         m_components_max = max(m_components_max, m_components[h]);
 221                 }
 222
 223                 m_first_fond = UNDEFINED_SEMITONE;
 224                 for(size_t h=0; h<size(); h++)
 225                 {
 226                         m_is_fondamental[h] = false;
 227
 228                         if(m_components[h]/m_components_max>m_components_treshold && m_first_fond==UNDEFINED_SEMITONE)
 229                         {
 230                                 m_first_fond = h;
 231                                 m_is_fondamental[h] = true;
 232                         }
 233                 }
 234         }
 235
 236         NeuralNetGaussAlgo::~NeuralNetGaussAlgo()
 237         {
 238         }
 239
 240 // MonophonicAlgo
 241         MonophonicAlgo::MonophonicAlgo(double latency_factor, double gauss_factor)
 242         : SingleResConvolutionTransform(latency_factor, gauss_factor)
 243         {
 244                 init();
 245         }
 246         int MonophonicAlgo::getSampleAlgoLatency() const
 247         {
 248                 return m_convolutions[0]->size();
 249         }
 250         void MonophonicAlgo::apply(const deque<double>& buff)
 251         {
 252                 for(size_t h=0; h<m_is_fondamental.size(); h++)
 253                         m_is_fondamental[h] = false;
 254
 255                 m_volume_max = 0.0;
 256                 m_components_max = 0.0;
 257                 m_first_fond = -1;
 258
 259 //              cout << "buff size=" << buff.size() << " size=" << m_convolutions[m_convolutions.size()-1]->size() << endl;
 260
 261                 int h;
 262                 for(h=size()-1; h>=0 && buff.size()>=m_convolutions[h]->size(); h--)
 263                 {
 264                         size_t i=0;
 265                         if(h!=int(size())-1) i=m_convolutions[h+1]->size();
 266                         for(; i<m_convolutions[h]->size(); i++)
 267                                 m_volume_max = max(m_volume_max, abs(buff[i]));
 268
 269                         if(m_volume_max > getVolumeTreshold())
 270                         {
 271                                 m_convolutions[h]->apply(buff);
 272
 273                                 double formant_mod = mod(m_convolutions[h]->m_value);
 274
 275 //                              cerr<<formant_mod<<" "<<getComponentsTreshold()<<endl;
 276
 277 //                              if(formant_mod > getComponentsTreshold())
 278                                 {
 279 //                                      m_components[h] = min(formant_mod, max_value);
 280                                         m_components[h] = formant_mod;
 281                                         m_components_max = max(m_components_max, m_components[h]);
 282                                         m_first_fond = h;
 283                                 }
 284 //                              else m_components[h] = 0.0;
 285                         }
 286                         else m_components[h] = 0.0;
 287                 }
 288                 for(;h>=0; h--)
 289                         m_components[h] = 0.0;
 290
 291                 // smash all components below a treshold of the max component
 292 //              for(size_t h=0; h<size(); h++)
 293 //                      if(m_components[h] < m_dominant_treshold*m_components_max)
 294 //                              m_components[h] = 0.0;
 295 //
 296                 // the note is the first resulting component
 297 //              for(size_t h=0; m_first_fond==-1 && h<size(); h++)
 298 //                      if(m_components[h] > 0.0)
 299 //                              m_first_fond = h;
 300
 301                 // correction: the note is the nearest maximum of m_note
 302 //              if(m_first_fond!=-1)
 303 //                      while(m_first_fond+1<int(size()) && m_components[m_first_fond+1] > m_components[m_first_fond])
 304 //                              m_first_fond++;
 305
 306 //              cerr << "m_first_fond=" << m_first_fond << endl;
 307         }
 308
 309         TwoVoiceMHT::TwoVoiceMHT(double AFreq, int dataBySecond, double rep, double win_factor, int minHT, int maxHT)
 310         : m_mht(new MHT(AFreq, dataBySecond, rep, win_factor, minHT, maxHT))
 311         , m_last_sol(m_mht->m_components.size())
 312         {
 313                 int nbHT = maxHT - minHT + 1;
 314                 m_components.resize(nbHT);
 315
 316                 for(size_t i=0; i<m_last_sol.size(); i++)
 317                         m_last_sol[i] = complex<double>(0.0,0.0);
 318         }
 319         void TwoVoiceMHT::apply(deque<double>& buff)
 320         {
 321                 //      cout << "TwoVoiceMHT::apply" << endl;
 322
 323                 m_mht->apply(buff);
 324
 325                 //      double earingTreshold = 0.05;
 326                 //      double modArgTreshold = 0.2;
 327                 //      double modModTreshold = 0.2;
 328                 //      ComputeDiffs(m_mht->m_components, fp, argpfp, modfp);
 329
 330                 //      int count = 0;
 331                 for(size_t h=0;h<m_components.size(); h++)
 332                 {
 333                         //              if(m_mht->m_components[h]!=complex<double>(0.0,0.0) && count<2)
 334                         {
 335                                 m_components[h] = m_mht->m_components[h];
 336
 337                                 //                      count++;
 338                                 //                      if(fabs(argpfp[0][h])>modArgTreshold)   count++;
 339                         }
 340                         //              else    m_components[h] = complex<double>(0.0,0.0);
 341                 }
 342
 343                 //      m_last_sol = m_mht->m_components;
 344         }
 345         TwoVoiceMHT::~TwoVoiceMHT()
 346         {
 347                 delete m_mht;
 348         }
 349
 350         RemoveSyncMHT::RemoveSyncMHT(double AFreq, int dataBySecond, double rep, double win_factor, int minHT, int maxHT)
 351         : m_mht(new MHT(AFreq, dataBySecond, rep, win_factor, minHT, maxHT))
 352         , m_last_sol(m_mht->m_components.size())
 353         {
 354                 int nbHT = maxHT - minHT + 1;
 355                 m_components.resize(nbHT);
 356
 357                 for(size_t i=0; i<m_last_sol.size(); i++)
 358                         m_last_sol[i] = complex<double>(0.0,0.0);
 359         }
 360         void RemoveSyncMHT::apply(deque<double>& buff)
 361         {
 362                 m_mht->apply(buff);
 363
 364                 double earingTreshold = 0.05;
 365                 double syncArgTreshold = 0.3;   // 0.02 0.25 0.2
 366                 //      double syncModTreshold = 0.2;   // 0.05 0.1 0.3
 367
 368                 double fourier_amplitude = 0.0;
 369                 for(size_t h=0; h<m_mht->m_components.size(); h++)
 370                         fourier_amplitude = max(fourier_amplitude, normm(m_mht->m_components[h]));
 371                 vector<int> notes;
 372
 373                 for(size_t h=0; h<m_mht->m_components.size(); h++)                       // for each half tone
 374                 {
 375                         bool is_fond = false;
 376
 377                         if(normm(m_mht->m_components[h])>earingTreshold*fourier_amplitude) // if we can ear it
 378                         {
 379                                 is_fond = true;
 380
 381                                 // search for syncronisation with each discovered fondamentals
 382                                 for(size_t i=0; i<notes.size() && is_fond; i++)
 383                                 {
 384                                         double rk = m_mht->m_convolutions[h]->m_freq/m_mht->m_convolutions[notes[i]]->m_freq;
 385                                         int k = int(rk+0.5);
 386                                         if(abs(k-rk)<0.05) // TODO              // if k is nearly an integer, a potential harmonic
 387                                         {
 388                                                 complex<double> ft = m_mht->m_components[notes[i]] / normm(m_mht->m_components[notes[i]]);
 389                                                 complex<double> ftm1 = m_last_sol[notes[i]] / normm(m_last_sol[notes[i]]);
 390                                                 complex<double> rpt = m_mht->m_components[h]/pow(ft, k);
 391                                                 complex<double> rptm1 = m_last_sol[h]/pow(ftm1, k);
 392                                                 //                                      if(h==25 && k==4)
 393                                                 //                                              cout << abs(log(normm(rpt))-log(normm(rptm1))) << " ";
 394                                                 //                                              cout << k << "=(arg=" << abs(arg(rpt)-arg(rptm1)) << " mod=" << abs(log(normm(rpt))-log(normm(rptm1))) << ") ";
 395                                                 is_fond = abs(arg(rpt)-arg(rptm1)) > syncArgTreshold;
 396                                                 //                                      is_fond = is_fond || abs(log(normm(rpt))-log(normm(rptm1))) > syncModTreshold;
 397                                         }
 398
 399                                         //                              is_fond = false;
 400                                 }
 401
 402                                 if(is_fond) notes.push_back(h);       // it's a fondamentals
 403                         }
 404
 405                         //              cout << endl;
 406
 407                         if(is_fond)             m_components[h] = m_mht->m_components[h];
 408                         else                    m_components[h] = complex<double>(0.0,0.0);
 409                 }
 410
 411                 m_last_sol = m_mht->m_sol;
 412         }
 413         RemoveSyncMHT::~RemoveSyncMHT()
 414         {
 415                 delete m_mht;
 416         }
 417
 418 #if 0
 419         NeuralNetMHT::NeuralNetMHT(double AFreq, int dataBySecond, double rep, double win_factor, int minHT, int maxHT, const string& file_name)
 420         : m_mht(new MHT(AFreq, dataBySecond, rep, win_factor, minHT, maxHT))
 421         , m_nn(new LayeredNeuralNet<TypeNeuron>())
 422         {
 423                 m_nbHT = maxHT - minHT + 1;
 424                 m_components.resize(m_nbHT);
 425
 426                 m_nn->load(file_name.c_str());
 427
 428                 assert(m_nbHT==m_nn->getInputLayer()->getNeurons().size());
 429                 assert(m_nbHT==m_nn->getOutputLayer()->getNeurons().size());
 430
 431                 cout << "topology: " << m_nn->getInputLayer()->getNeurons().size();
 432                 for(LayeredNeuralNet<TypeNeuron>::LayerIterator it = ++(m_nn->m_layerList.begin()); it !=m_nn->m_layerList.end(); it++)
 433                         cout << " => " << (*it)->getNeurons().size();
 434                 cout << "   [" << m_nn->getNbWeights() << " weights]" << endl;
 435         }
 436         void NeuralNetMHT::apply(deque<double>& buff)
 437         {
 438                 //      cout << "NeuralNetMHT::apply" << endl;
 439
 440                 m_mht->apply(buff);
 441
 442                 vector<double> inputs(m_nbHT);
 443
 444                 for(int h=0; h<m_nbHT; h++)
 445                         inputs[h] = normm(m_mht->m_components[h]);
 446
 447                 m_nn->computeOutputs(inputs);
 448
 449                 for(int h=0; h<m_nbHT; h++)
 450                         m_components[h] = complex<double>(m_nn->getOutputLayer()->getNeurons()[h].o);
 451         }
 452         NeuralNetMHT::~NeuralNetMHT()
 453         {
 454                 delete m_nn;
 455                 delete m_mht;
 456         }
 457 #endif
 458
 459 #endif
 460 }