合成混合语音(纯净语音+噪声)和IBM

有纯净语音,有噪声,怎样合成不同信噪比下的混合语音呢?!

function mixwav_IBM(S,db,nois)numChan = 64; %通道数FS = 8000;    %数据的采样频率gamma = gen_gammaton(FS,numChan);ft_env = fir1(1024,[70,400]/FS*2,'bandpass');[v,fs] = wavread(['babble.wav']);v = resample(v,FS,fs);                noi.wav = repmat(v,fix(FS*10/length(v))+1,1);noi.wav = noi.wav(1:FS*10);[~,gf] = fgammaton(noi.wav',gamma,FS,numChan);noi.cch = cochleagram(gf');[w,fsw] = wavread(['F50A1.WAV']); w = resample(w,FS,fsw); [~,gf] = fgammaton(w',gamma,FS,numChan);scch = cochleagram(gf');v = noi.wav;nLen = min(length(w),length(v));w = w(1:nLen); %w是纯净的语音v = v(1:nLen); %v是噪声db = 0;%混合语音的信噪比是0db[m,v] = wavmix(w,v,db);%m是混合语音的波形winsize = 256;  shiftsize = 80;  window = hamming(winsize);  winc_w = getframe(w,winsize,shiftsize,window);%纯净语音分帧  winc_v = getframe(v,winsize,shiftsize,window); %噪声分帧winc_m = getframe(m,winsize,shiftsize,window); %混合语音分帧w_fft = abs(fft(winc_w));%纯净语音的fftv_fft = abs(fft(winc_v));%噪声语音的fftm_fft = abs(fft(winc_m));%混合语音的fftamp = zeros(size(w_fft));%掩码的size和原始频谱图的size一样大amp(w_fft>v_fft) = 1;wavwrite(m,FS,'F50A1_babble.WAV');figure;imagesc(w_fft);title('纯净语音')figure;imagesc(v_fft);title('噪声')figure;imagesc(m_fft);title('混合语音')figure;imagesc(amp);title('IBM')figure;imagesc(m_fft.*amp);title('IBM盖在混合语音语谱图上得到降噪之后的语音语谱')close(figure);%如果正确,那么将IBM盖在混合语音的语谱图上 则会得到和纯净语音一样的语谱图%纯净语音的语谱图与IBM之后的语谱还是不太一样
end      
function [m,v] = wavmix(w,v,db)amp = sqrt(sum(w.^2)/sum(v.^2)/10^(db/10));m = w + v*amp;v = v*amp;
end
  function winc = getframe(wav,winsize,shiftsize,window)  slen = length(wav);  numframe = ceil(slen/shiftsize);  winc = zeros(winsize,numframe);  for n = 1:numframe  st = (n-1)*shiftsize;  ed = min(st+winsize,slen);  winc(1:ed-st,n) = wav(st+1:ed);  winc(:,n) = winc(:,n).*window;          end  
end
function gt = gen_gammaton(sampFreq, numChannel)% Create gammatone filterbank% Written by Yang Shao, and adapted by Xiaojia Zhao in Oct'11if ~exist('sampFreq', 'var')sampFreq = 8000; % sampling frequency, default is 8000endif  ~exist('numChannel', 'var')numChannel = 64; % number of channels, default is 64endstepBound=200000; filterOrder=4;     % filter orderphase(1:numChannel)=zeros(numChannel,1);        % original phaseerb_b=hz2erb([50,sampFreq/2]);       % upper and lower bound of ERBerb=[erb_b(1):diff(erb_b)/(numChannel-1):erb_b(2)];     % ERB segmentcf=erb2hz(erb);       % center frequencyb=1.019*24.7*(4.37*cf/1000+1);       % rate of decaygt=zeros(numChannel,stepBound);tmp_t=[1:stepBound]/sampFreq;for i=1:numChannel%gain=10^((loudness(cf(i))-60)/20)/3*(2*pi*b(i)/sampFreq).^4;gain = (2*pi*b(i)/sampFreq).^4/3;gt(i,:)=gain*sampFreq^3*tmp_t.^(filterOrder-1).*exp(-2*pi*b(i)*tmp_t).*cos(2*pi*cf(i)*tmp_t+phase(i));end
end
function erb=hz2erb(hz)% Convert normal frequency scale in hz to ERB-rate scale.% Units are number of Hz and number of ERBs.% ERB stands for Equivalent Rectangular Bandwidth.% Written by ZZ Jin, and adapted by DLW in Jan'07erb=21.4*log10(4.37e-3*hz+1);end
function hz=erb2hz(erb)% Convert ERB-rate scale to normal frequency scale.% Units are number of ERBs and number of Hz.% ERB stands for Equivalent Rectangular Bandwidth.% Written by ZZ Jin, and adapted by DLW in Jan'07hz=(10.^(erb/21.4)-1)/4.37e-3;
end
function [f, tmp]=fgammaton(sig, gamma, sampFreq, numChannel)% Filter input signal with a gammatone filterbank and generate GF features% sig: input signal% gamma: gammtone filter impulse responses% sampFreq: sampling frequency% numChannel: number of channels% Written by Yang Shao, and adapted by Xiaojia Zhao in Oct'11[ignore,stepBound]=size(sig);d = 2^ceil(log2(length(sig)));tmp =ifft((fft((ones(numChannel,1)*sig)',d).*fft(gamma(:,1:stepBound)',d)));tmp = tmp(1:stepBound,:);f=(abs(resample(abs(tmp),100,sampFreq)')).^(1/3);
end
function a = cochleagram(r, winLength,winShift)% Generate a cochleagram from responses of a Gammatone filterbank.% It gives the log energy of T-F units% The first variable is required.% winLength: window (frame) length in samples% Written by ZZ Jin, and adapted by DLW in Jan'07if nargin < 2winLength = 320;      % default window length in sample points which is 20 ms for 16 KHz sampling frequencyend[numChan,sigLength] = size(r);     % number of channels and input signal lengthif nargin < 3 winShift = winLength/2;        % frame shift (default is half frame)end   increment = winLength/winShift;    % special treatment for first increment-1 framesM = floor(sigLength/winShift);     % number of time framescoswin = (1 + cos(2*pi*(0:winLength-1)/winLength - pi))/2;  % calculate a raised cosine window% calculate energy for each frame in each channela = zeros(numChan,M);for m = 1:M      for i = 1:numChanif m < increment        % shorter frame lengths for beginning framesa(i,m) = r(i,1:m*winShift)*r(i,1:m*winShift)';elsestartpoint = (m-increment)*winShift;a(i,m) = (r(i,startpoint+1:startpoint+winLength)).*coswin*((r(i,startpoint+1:startpoint+winLength)).*coswin)';%             a(i,m) = sum(abs((r(i,startpoint+1:startpoint+winLength))).*coswin);%*((r(i,startpoint+1:startpoint+winLength)))';endendend
end

这里写图片描述

经过掩码掩盖之后,那么怎样从频谱图合成降噪之后的语音呢?!

function wav = syn_mask(n,tt)
% n 噪声谱,n = stft(wav)
% tt 为估计的mask,语音部分为1,其余为0,大小与n相同
if nargin < 2tt = ones(size(n));
end
if size(tt,1)~=129tt = tt';
end
if size(n,1)~=129n = n';
end
wav = istft(n.*tt);
% ang = angle(n);
% wav = istft(exp(tt').*(cos(ang)+1i*sin(ang)))';
function wav = syn_spec(n,tt)
% n 噪声谱,n = stft(wav)
% tt 为估计的幅度谱,大小与n相同
if size(tt,1)==129tt = tt';
end
%wav = istft(n.*tt);
ang = angle(n);
wav = istft((tt').*(cos(ang')+1i*sin(ang')))';`这里写代码片`
function d = stft(x)
s = length(x);
f = 256;win=[0.0800
0.0801
0.0806
0.0813
0.0822
0.0835
0.0850
0.0868
0.0889
0.0913
0.0939
0.0968
0.1000
0.1034
0.1071
0.1111
0.1153
0.1198
0.1245
0.1295
0.1347
0.1402
0.1459
0.1519
0.1581
0.1645
0.1712
0.1781
0.1852
0.1925
0.2001
0.2078
0.2157
0.2239
0.2322
0.2407
0.2494
0.2583
0.2673
0.2765
0.2859
0.2954
0.3051
0.3149
0.3249
0.3350
0.3452
0.3555
0.3659
0.3765
0.3871
0.3979
0.4087
0.4196
0.4305
0.4416
0.4527
0.4638
0.4750
0.4863
0.4976
0.5089
0.5202
0.5315
0.5428
0.5542
0.5655
0.5768
0.5881
0.5993
0.6106
0.6217
0.6329
0.6439
0.6549
0.6659
0.6767
0.6875
0.6982
0.7088
0.7193
0.7297
0.7400
0.7501
0.7601
0.7700
0.7797
0.7893
0.7988
0.8081
0.8172
0.8262
0.8350
0.8436
0.8520
0.8602
0.8683
0.8761
0.8837
0.8912
0.8984
0.9054
0.9121
0.9187
0.9250
0.9311
0.9369
0.9426
0.9479
0.9530
0.9579
0.9625
0.9669
0.9710
0.9748
0.9784
0.9817
0.9847
0.9875
0.9899
0.9922
0.9941
0.9958
0.9972
0.9983
0.9991
0.9997
1.0000
1.0000
0.9997
0.9991
0.9983
0.9972
0.9958
0.9941
0.9922
0.9899
0.9875
0.9847
0.9817
0.9784
0.9748
0.9710
0.9669
0.9625
0.9579
0.9530
0.9479
0.9426
0.9369
0.9311
0.9250
0.9187
0.9121
0.9054
0.8984
0.8912
0.8837
0.8761
0.8683
0.8602
0.8520
0.8436
0.8350
0.8262
0.8172
0.8081
0.7988
0.7893
0.7797
0.7700
0.7601
0.7501
0.7400
0.7297
0.7193
0.7088
0.6982
0.6875
0.6767
0.6659
0.6549
0.6439
0.6329
0.6217
0.6106
0.5993
0.5881
0.5768
0.5655
0.5542
0.5428
0.5315
0.5202
0.5089
0.4976
0.4863
0.4750
0.4638
0.4527
0.4416
0.4305
0.4196
0.4087
0.3979
0.3871
0.3765
0.3659
0.3555
0.3452
0.3350
0.3249
0.3149
0.3051
0.2954
0.2859
0.2765
0.2673
0.2583
0.2494
0.2407
0.2322
0.2239
0.2157
0.2078
0.2001
0.1925
0.1852
0.1781
0.1712
0.1645
0.1581
0.1519
0.1459
0.1402
0.1347
0.1295
0.1245
0.1198
0.1153
0.1111
0.1071
0.1034
0.1000
0.0968
0.0939
0.0913
0.0889
0.0868
0.0850
0.0835
0.0822
0.0813
0.0806
0.0801
0.0800];c = 1;
h = 80;% pre-allocate output array
d = complex(zeros((1+f/2),1+fix((s-f)/h)));for b = 0:h:(s-f)u = win.*x((b+1):(b+f));t = fft(u);d(:,c) = t(1:(1+f/2))';c = c+1;
end;
function x = istft(d)
% X = istft(D, F, W, H)                   Inverse short-time Fourier transform.
%   Performs overlap-add resynthesis from the short-time Fourier transform 
%   data in D.  Each column of D is taken as the result of an F-point 
%   fft; each successive frame was offset by H points. Data is 
%   hamm-windowed at W pts.  
%       W = 0 gives a rectangular window; W as a vector uses that as window.
% dpwe 1994may24.  Uses built-in 'ifft' etc.
% $Header: /homes/dpwe/public_html/resources/matlab/pvoc/RCS/istft.m,v 1.4 2009/01/07 04:20:00 dpwe Exp $ftsize = 256;
h = 80;
s = size(d);cols = s(2);
xlen = ftsize + (cols-1)*h;
x = zeros(1,xlen);win=[0.0800
0.0801
0.0806
0.0813
0.0822
0.0835
0.0850
0.0868
0.0889
0.0913
0.0939
0.0968
0.1000
0.1034
0.1071
0.1111
0.1153
0.1198
0.1245
0.1295
0.1347
0.1402
0.1459
0.1519
0.1581
0.1645
0.1712
0.1781
0.1852
0.1925
0.2001
0.2078
0.2157
0.2239
0.2322
0.2407
0.2494
0.2583
0.2673
0.2765
0.2859
0.2954
0.3051
0.3149
0.3249
0.3350
0.3452
0.3555
0.3659
0.3765
0.3871
0.3979
0.4087
0.4196
0.4305
0.4416
0.4527
0.4638
0.4750
0.4863
0.4976
0.5089
0.5202
0.5315
0.5428
0.5542
0.5655
0.5768
0.5881
0.5993
0.6106
0.6217
0.6329
0.6439
0.6549
0.6659
0.6767
0.6875
0.6982
0.7088
0.7193
0.7297
0.7400
0.7501
0.7601
0.7700
0.7797
0.7893
0.7988
0.8081
0.8172
0.8262
0.8350
0.8436
0.8520
0.8602
0.8683
0.8761
0.8837
0.8912
0.8984
0.9054
0.9121
0.9187
0.9250
0.9311
0.9369
0.9426
0.9479
0.9530
0.9579
0.9625
0.9669
0.9710
0.9748
0.9784
0.9817
0.9847
0.9875
0.9899
0.9922
0.9941
0.9958
0.9972
0.9983
0.9991
0.9997
1.0000
1.0000
0.9997
0.9991
0.9983
0.9972
0.9958
0.9941
0.9922
0.9899
0.9875
0.9847
0.9817
0.9784
0.9748
0.9710
0.9669
0.9625
0.9579
0.9530
0.9479
0.9426
0.9369
0.9311
0.9250
0.9187
0.9121
0.9054
0.8984
0.8912
0.8837
0.8761
0.8683
0.8602
0.8520
0.8436
0.8350
0.8262
0.8172
0.8081
0.7988
0.7893
0.7797
0.7700
0.7601
0.7501
0.7400
0.7297
0.7193
0.7088
0.6982
0.6875
0.6767
0.6659
0.6549
0.6439
0.6329
0.6217
0.6106
0.5993
0.5881
0.5768
0.5655
0.5542
0.5428
0.5315
0.5202
0.5089
0.4976
0.4863
0.4750
0.4638
0.4527
0.4416
0.4305
0.4196
0.4087
0.3979
0.3871
0.3765
0.3659
0.3555
0.3452
0.3350
0.3249
0.3149
0.3051
0.2954
0.2859
0.2765
0.2673
0.2583
0.2494
0.2407
0.2322
0.2239
0.2157
0.2078
0.2001
0.1925
0.1852
0.1781
0.1712
0.1645
0.1581
0.1519
0.1459
0.1402
0.1347
0.1295
0.1245
0.1198
0.1153
0.1111
0.1071
0.1034
0.1000
0.0968
0.0939
0.0913
0.0889
0.0868
0.0850
0.0835
0.0822
0.0813
0.0806
0.0801
0.0800]';for b = 0:h:(h*(cols-1))ft = d(:,1+b/h)';ft = [ft, conj(ft([((ftsize/2)):-1:2]))];px = real(ifft(ft));x((b+1):(b+ftsize)) = x((b+1):(b+ftsize))+px.*win;
end;


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部