关于scikits.talkbox.feature.mfcc的学习解释

import numpy as npfrom scipy.io import loadmat
#使用模块scipy.io的loadmat可以实现python对mat数据的读写。
from scipy.signal import lfilter, hamming
#lfilter数字滤波器  hamming汉明窗
from scipy.fftpack import fft
#快速傅里叶变换之后生成复数
from scipy.fftpack.realtransforms import dct
#离散余弦变换(DCT),大多书自然信号(声音、图像)的能量都集中在离散余弦变换后的低频部分。
from scikits.talkbox import segment_axisfrom mel import hz2mel
#从赫兹转换为梅尔刻度def trfbank(fs, nfft, lowfreq, linsc, logsc, nlinfilt, nlogfilt):"""计算用于MFCC的三角滤波器."""# 过滤器总数nfilt = nlinfilt + nlogfilt#------------------------# 计算滤波器组#------------------------# 计算谱中三角滤波器的起点\中间点\终止点# 范围freqs = np.zeros(nfilt+2)freqs[:nlinfilt] = lowfreq + np.arange(nlinfilt) * linscfreqs[nlinfilt:] = freqs[nlinfilt-1] * logsc ** np.arange(1, nlogfilt + 3)heights = 2./(freqs[2:] - freqs[0:-2])# 计算滤波器组系数 (in fft domain, in bins)fbank = np.zeros((nfilt, nfft))# FFT bins (in Hz)nfreqs = np.arange(nfft) / (1. * nfft) * fsfor i in range(nfilt):low = freqs[i]cen = freqs[i+1]hi = freqs[i+2]lid = np.arange(np.floor(low * nfft / fs) + 1,np.floor(cen * nfft / fs) + 1, dtype=np.int)lslope = heights[i] / (cen - low)rid = np.arange(np.floor(cen * nfft / fs) + 1,np.floor(hi * nfft / fs) + 1, dtype=np.int)rslope = heights[i] / (hi - cen)fbank[i][lid] = lslope * (nfreqs[lid] - low)fbank[i][rid] = rslope * (hi - nfreqs[rid])return fbank, freqs#返回 滤波器和频率def mfcc(input, nwin=256, nfft=512, fs=16000, nceps=13):"""计算Mel频率倒谱系数.参数----------输入: ndarray计算系数的输入返回-------ceps: ndarrayMel倒谱系数mspec: ndarraymel域中的对数谱。Notes-----
MFCC计算如下:*时域预处理(预强调)*用Hamming窗加窗计算频谱幅度*在谱域中用三角形滤波信号滤波器组,其滤波器在mel比例,并且在mel比例中具有相等的带宽                   *计算对数谱的DCTReferences----------.. [1] S.B. Davis and P. Mermelstein, "Comparison of parametricrepresentations for monosyllabic word recognition in continuouslyspoken sentences", IEEE Trans. Acoustics. Speech, Signal Proc.ASSP-28 (4): 357-366, August 1980."""# MFCC参数:取自听觉工具箱over = nwin - 160# 预加重因子(考虑嘴唇水平辐射的-6dB/倍频程衰减)prefac = 0.97#lowfreq = 400 / 3.lowfreq = 133.33#highfreq = 6855.4976linsc = 200/3.logsc = 1.0711703nlinfil = 13nlogfil = 27nfil = nlinfil + nlogfilw = hamming(nwin, sym=0)fbank = trfbank(fs, nfft, lowfreq, linsc, logsc, nlinfil, nlogfil)[0]#------------------# 计算MFCC#------------------extract = preemp(input, prefac)framed = segment_axis(extract, nwin, over) * w# 计算频谱幅度spec = np.abs(fft(framed, nfft, axis=-1))# 通过三角形滤波器组过滤频谱mspec = np.log10(np.dot(spec, fbank.T))# 使用DCT“压缩”系数(频谱->倒谱域)ceps = dct(mspec, type=2, norm='ortho', axis=-1)[:, :nceps]return ceps, mspec, specdef preemp(input, p):"""预加重滤波器."""return lfilter([1., -p], 1, input)if __name__ == '__main__':extract = loadmat('extract.mat')['extract']ceps = mfcc(extract)


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部