1 // Copyright 2004-07 "Gilles Degottex"
3 // This file is part of "Music"
5 // "Music" is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU Lesser General Public License as published by
7 // the Free Software Foundation; either version 2.1 of the License, or
8 // (at your option) any later version.
10 // "Music" is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU Lesser General Public License for more details.
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 #include "FreqAnalysis.h"
27 #include <CppAddons/Fit.h>
31 double PeakRefinementLogParabola(const vector<std::complex<double> > spectrum, int peak_index)
33 assert(peak_index>0 && peak_index<int(spectrum.size()-1));
35 if(!(is_peak(spectrum, peak_index)))
37 if(peak_index+1<int(spectrum.size()-1) && is_peak(spectrum, peak_index+1))
39 else if(peak_index-1>0 && is_peak(spectrum, peak_index-1))
43 double a,b,c,xapex,yapex;
44 FitParabola(peak_index-1, log(mod(spectrum[peak_index-1])+numeric_limits<double>::min()),
45 peak_index, log(mod(spectrum[peak_index])+numeric_limits<double>::min()),
46 peak_index+1, log(mod(spectrum[peak_index+1])+numeric_limits<double>::min()),
47 a, b, c, xapex, yapex);
53 * M. Abe and J. O. Smith III
54 * “Design Criteria for Simple Sinusoidal Parameter Estimation based on Quadratic
55 * Interpolation of FFT Magnitude Peaks AM/FM Rate Estimation and Bias Correction
56 * for Time-Varying Sinusoidal Modeling,”
57 * Convention Paper Audio Engineering Society,
58 * Dept. of Music, Stanford University, August, (2004).
60 double PeakRefinementLogParabolaUnbiased(const vector<std::complex<double> > spectrum, int peak_index, double zp)
62 double f = PeakRefinementLogParabola(spectrum, peak_index);
64 double df = f - peak_index;
66 double c0 = 0.247560; // hann coefs
69 double xi_zp = c0/(zp*zp) + c1/(zp*zp*zp*zp);
70 double df2 = xi_zp*(df-0.5)*(df+0.5)*df;
72 // double c2 = −0.090608;
73 // double c3 = −0.055781;
74 // double nu_zp = c2/(zp*zp*zp*zp) + c3/(zp*zp*zp*zp*zp*zp);
75 // double da = nu_zp*df*df;
80 vector<Harmonic> GetHarmonicStruct(const vector<complex<double> >& spectrum, double approx_f0, int nb_harm, double used_zp, double offset_tresh)
82 double approx_f0_rel = approx_f0*spectrum.size()/double(GetSamplingRate());
83 assert(approx_f0_rel>1 && approx_f0_rel<=spectrum.size()/2-1);
85 vector<Harmonic> harms;
87 for(int h=1; h<=nb_harm && int(h*approx_f0_rel)<int(spectrum.size()/2)-1; h++)
89 int c = int(h*approx_f0_rel);
90 int llimit = int((h-offset_tresh)*approx_f0_rel);
91 llimit = max(llimit, 1);
92 int rlimit = int((h+offset_tresh)*approx_f0_rel);
93 rlimit = min(rlimit, int(spectrum.size()/2)-1);
96 if(is_peak(spectrum, c))
101 bool cl_is_peak = false;
102 while(!cl_is_peak && cl-1>llimit)
103 cl_is_peak = is_peak(spectrum, --cl);
106 bool cr_is_peak = false;
107 while(!cr_is_peak && cr+1<rlimit)
108 cr_is_peak = is_peak(spectrum, ++cr);
113 peak_index = (c-cl<cr-c)?cl:cr;
124 harm.mod = mod(spectrum[peak_index]);
125 harm.freq = PeakRefinementLogParabolaUnbiased(spectrum, peak_index, used_zp)*double(GetSamplingRate())/spectrum.size();
126 harm.harm_number = h;
127 harms.push_back(harm);
134 double FundFreqRefinementOfHarmonicStruct(const vector<complex<double> >& spectrum, double approx_f0, int nb_harm, double used_zp)
136 vector<Harmonic> harms = GetHarmonicStruct(spectrum, approx_f0, nb_harm, used_zp, 0.2);
143 for(size_t i=0; i<harms.size(); i++)
144 sum_f0 += harms[i].freq/harms[i].harm_number; // TODO mod weigthed ???
146 return sum_f0/harms.size();
150 void SingleResConvolutionTransform::init()
152 if(GetSamplingRate()<=0) return;
154 for(size_t h=0; h<size(); h++)
155 if(m_convolutions[h]!=NULL)
156 delete m_convolutions[h];
158 m_components.resize(GetNbSemitones());
159 m_convolutions.resize(GetNbSemitones());
160 m_harmonics.resize(GetNbSemitones());
161 for(size_t h=0; h<size(); h++)
162 m_convolutions[h] = new Convolution(m_latency_factor, m_gauss_factor, int(h)+GetSemitoneMin());
165 SingleResConvolutionTransform::SingleResConvolutionTransform(double latency_factor, double gauss_factor)
166 : Transform(0.0, 0.0)
167 , m_latency_factor(latency_factor)
168 , m_gauss_factor(gauss_factor)
170 m_convolutions.resize(size());
171 m_harmonics.resize(size());
172 for(size_t h=0; h<size(); h++)
173 m_convolutions[h] = NULL;
176 void SingleResConvolutionTransform::apply(const deque<double>& buff)
178 for(size_t h=0; h<size(); h++)
180 m_is_fondamental[h] = false;
181 m_convolutions[h]->apply(buff);
182 m_harmonics[h] = m_convolutions[h]->m_value;
183 m_components[h] = mod(m_harmonics[h]);
186 SingleResConvolutionTransform::~SingleResConvolutionTransform()
188 for(size_t i=0; i<m_convolutions.size(); i++)
189 delete m_convolutions[i];
192 // NeuralNetGaussAlgo
193 void NeuralNetGaussAlgo::init()
195 cerr << "NeuralNetGaussAlgo::init" << endl;
197 SingleResConvolutionTransform::init();
199 // m_fwd_plan = rfftw_create_plan(m_size, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_OUT_OF_PLACE | FFTW_USE_WISDOM);
201 NeuralNetGaussAlgo::NeuralNetGaussAlgo(double latency_factor, double gauss_factor)
202 : SingleResConvolutionTransform(latency_factor, gauss_factor)
207 void NeuralNetGaussAlgo::apply(const deque<double>& buff)
209 // cerr << "NeuralNetGaussAlgo::apply " << m_components_treshold << endl;
211 m_components_max = 0.0;
212 for(size_t h=0; h<size(); h++)
214 m_convolutions[h]->apply(buff);
215 m_harmonics[h] = m_convolutions[h]->m_value;
216 m_components[h] = mod(m_harmonics[h]);
217 m_components_max = max(m_components_max, m_components[h]);
220 m_first_fond = UNDEFINED_SEMITONE;
221 for(size_t h=0; h<size(); h++)
223 m_is_fondamental[h] = false;
225 if(m_components[h]/m_components_max>m_components_treshold && m_first_fond==UNDEFINED_SEMITONE)
228 m_is_fondamental[h] = true;
233 NeuralNetGaussAlgo::~NeuralNetGaussAlgo()
238 MonophonicAlgo::MonophonicAlgo(double latency_factor, double gauss_factor)
239 : SingleResConvolutionTransform(latency_factor, gauss_factor)
243 int MonophonicAlgo::getSampleAlgoLatency() const
245 return m_convolutions[0]->size();
247 void MonophonicAlgo::apply(const deque<double>& buff)
249 for(size_t h=0; h<m_is_fondamental.size(); h++)
250 m_is_fondamental[h] = false;
253 m_components_max = 0.0;
256 // cout << "buff size=" << buff.size() << " size=" << m_convolutions[m_convolutions.size()-1]->size() << endl;
259 for(h=size()-1; h>=0 && buff.size()>=m_convolutions[h]->size(); h--)
262 if(h!=int(size())-1) i=m_convolutions[h+1]->size();
263 for(; i<m_convolutions[h]->size(); i++)
264 m_volume_max = max(m_volume_max, abs(buff[i]));
266 if(m_volume_max > getVolumeTreshold())
268 m_convolutions[h]->apply(buff);
270 double formant_mod = mod(m_convolutions[h]->m_value);
272 // cerr<<formant_mod<<" "<<getComponentsTreshold()<<endl;
274 // if(formant_mod > getComponentsTreshold())
276 // m_components[h] = min(formant_mod, max_value);
277 m_components[h] = formant_mod;
278 m_components_max = max(m_components_max, m_components[h]);
281 // else m_components[h] = 0.0;
283 else m_components[h] = 0.0;
286 m_components[h] = 0.0;
288 // smash all components below a treshold of the max component
289 // for(size_t h=0; h<size(); h++)
290 // if(m_components[h] < m_dominant_treshold*m_components_max)
291 // m_components[h] = 0.0;
293 // the note is the first resulting component
294 // for(size_t h=0; m_first_fond==-1 && h<size(); h++)
295 // if(m_components[h] > 0.0)
298 // correction: the note is the nearest maximum of m_note
299 // if(m_first_fond!=-1)
300 // while(m_first_fond+1<int(size()) && m_components[m_first_fond+1] > m_components[m_first_fond])
303 // cerr << "m_first_fond=" << m_first_fond << endl;
306 TwoVoiceMHT::TwoVoiceMHT(double AFreq, int dataBySecond, double rep, double win_factor, int minHT, int maxHT)
307 : m_mht(new MHT(AFreq, dataBySecond, rep, win_factor, minHT, maxHT))
308 , m_last_sol(m_mht->m_components.size())
310 int nbHT = maxHT - minHT + 1;
311 m_components.resize(nbHT);
313 for(size_t i=0; i<m_last_sol.size(); i++)
314 m_last_sol[i] = complex<double>(0.0,0.0);
316 void TwoVoiceMHT::apply(deque<double>& buff)
318 // cout << "TwoVoiceMHT::apply" << endl;
322 // double earingTreshold = 0.05;
323 // double modArgTreshold = 0.2;
324 // double modModTreshold = 0.2;
325 // ComputeDiffs(m_mht->m_components, fp, argpfp, modfp);
328 for(size_t h=0;h<m_components.size(); h++)
330 // if(m_mht->m_components[h]!=complex<double>(0.0,0.0) && count<2)
332 m_components[h] = m_mht->m_components[h];
335 // if(fabs(argpfp[0][h])>modArgTreshold) count++;
337 // else m_components[h] = complex<double>(0.0,0.0);
340 // m_last_sol = m_mht->m_components;
342 TwoVoiceMHT::~TwoVoiceMHT()
347 RemoveSyncMHT::RemoveSyncMHT(double AFreq, int dataBySecond, double rep, double win_factor, int minHT, int maxHT)
348 : m_mht(new MHT(AFreq, dataBySecond, rep, win_factor, minHT, maxHT))
349 , m_last_sol(m_mht->m_components.size())
351 int nbHT = maxHT - minHT + 1;
352 m_components.resize(nbHT);
354 for(size_t i=0; i<m_last_sol.size(); i++)
355 m_last_sol[i] = complex<double>(0.0,0.0);
357 void RemoveSyncMHT::apply(deque<double>& buff)
361 double earingTreshold = 0.05;
362 double syncArgTreshold = 0.3; // 0.02 0.25 0.2
363 // double syncModTreshold = 0.2; // 0.05 0.1 0.3
365 double fourier_amplitude = 0.0;
366 for(size_t h=0; h<m_mht->m_components.size(); h++)
367 fourier_amplitude = max(fourier_amplitude, normm(m_mht->m_components[h]));
370 for(size_t h=0; h<m_mht->m_components.size(); h++) // for each half tone
372 bool is_fond = false;
374 if(normm(m_mht->m_components[h])>earingTreshold*fourier_amplitude) // if we can ear it
378 // search for syncronisation with each discovered fondamentals
379 for(size_t i=0; i<notes.size() && is_fond; i++)
381 double rk = m_mht->m_convolutions[h]->m_freq/m_mht->m_convolutions[notes[i]]->m_freq;
383 if(abs(k-rk)<0.05) // TODO // if k is nearly an integer, a potential harmonic
385 complex<double> ft = m_mht->m_components[notes[i]] / normm(m_mht->m_components[notes[i]]);
386 complex<double> ftm1 = m_last_sol[notes[i]] / normm(m_last_sol[notes[i]]);
387 complex<double> rpt = m_mht->m_components[h]/pow(ft, k);
388 complex<double> rptm1 = m_last_sol[h]/pow(ftm1, k);
390 // cout << abs(log(normm(rpt))-log(normm(rptm1))) << " ";
391 // cout << k << "=(arg=" << abs(arg(rpt)-arg(rptm1)) << " mod=" << abs(log(normm(rpt))-log(normm(rptm1))) << ") ";
392 is_fond = abs(arg(rpt)-arg(rptm1)) > syncArgTreshold;
393 // is_fond = is_fond || abs(log(normm(rpt))-log(normm(rptm1))) > syncModTreshold;
399 if(is_fond) notes.push_back(h); // it's a fondamentals
404 if(is_fond) m_components[h] = m_mht->m_components[h];
405 else m_components[h] = complex<double>(0.0,0.0);
408 m_last_sol = m_mht->m_sol;
410 RemoveSyncMHT::~RemoveSyncMHT()
416 NeuralNetMHT::NeuralNetMHT(double AFreq, int dataBySecond, double rep, double win_factor, int minHT, int maxHT, const string& file_name)
417 : m_mht(new MHT(AFreq, dataBySecond, rep, win_factor, minHT, maxHT))
418 , m_nn(new LayeredNeuralNet<TypeNeuron>())
420 m_nbHT = maxHT - minHT + 1;
421 m_components.resize(m_nbHT);
423 m_nn->load(file_name.c_str());
425 assert(m_nbHT==m_nn->getInputLayer()->getNeurons().size());
426 assert(m_nbHT==m_nn->getOutputLayer()->getNeurons().size());
428 cout << "topology: " << m_nn->getInputLayer()->getNeurons().size();
429 for(LayeredNeuralNet<TypeNeuron>::LayerIterator it = ++(m_nn->m_layerList.begin()); it !=m_nn->m_layerList.end(); it++)
430 cout << " => " << (*it)->getNeurons().size();
431 cout << " [" << m_nn->getNbWeights() << " weights]" << endl;
433 void NeuralNetMHT::apply(deque<double>& buff)
435 // cout << "NeuralNetMHT::apply" << endl;
439 vector<double> inputs(m_nbHT);
441 for(int h=0; h<m_nbHT; h++)
442 inputs[h] = normm(m_mht->m_components[h]);
444 m_nn->computeOutputs(inputs);
446 for(int h=0; h<m_nbHT; h++)
447 m_components[h] = complex<double>(m_nn->getOutputLayer()->getNeurons()[h].o);
449 NeuralNetMHT::~NeuralNetMHT()