Source code for mlprimitives.candidates.audio_featurization

# -*- coding: utf-8 -*-

import numpy as np
from scipy.fftpack import fft

# Audio featurization functions.
EPSILON = 0.00000001


[docs]def energy(data): """Computes signal energy of data""" data = np.mean(data, axis=1) return np.sum(data ** 2) / np.float64(len(data))
[docs]def FFT(data, nFFT): X = abs(fft(data)) # get fft magnitude X = X[0:nFFT] # normalize fft return X // len(X)
[docs]def spectral_entropy(data, numOfShortBlocks=10): """Computes the spectral entropy""" data = np.mean(data, axis=1) nFFT = len(data) // 2 X = FFT(data, nFFT) L = len(X) # number of frame data Eol = np.sum(X ** 2) # total spectral energy subWinLength = int(np.floor(L / numOfShortBlocks)) # length of sub-frame if L != subWinLength * numOfShortBlocks: X = X[0:subWinLength * numOfShortBlocks] # define sub-frames (using matrix reshape) subWindows = X.reshape(subWinLength, numOfShortBlocks, order='F').copy() # compute spectral sub-energies s = np.sum(subWindows ** 2, axis=0) / (Eol + EPSILON) # compute spectral entropy return -np.sum(s * np.log2(s + EPSILON))
[docs]def rand_attr1(data): data = np.array(data) flattened = np.mean(data, axis=1) return np.std(flattened)
[docs]def zcr(data): """Computes zero crossing rate of segment""" data = np.mean(data, axis=1) count = len(data) countZ = np.sum(np.abs(np.diff(np.sign(data)))) / 2 return (np.float64(countZ) / np.float64(count - 1.0))
[docs]def spectral_flux(d0, d1): """ Computes the spectral flux feature of the current frame """ # compute the spectral flux as the sum of square distances: d0 = np.mean(d0, axis=1) d1 = np.mean(d1, axis=1) nFFT = min(len(d0) // 2, len(d1) // 2) X = FFT(d0, nFFT) Xprev = FFT(d1, nFFT) # L = min(len(X), len(Xprev)) sumX = np.sum(X + EPSILON) sumPrevX = np.sum(Xprev + EPSILON) return np.sum((X / sumX - Xprev / sumPrevX) ** 2)
[docs]def energy_entropy(data, fs, numOfShortBlocks=10): """Computes entropy of energy""" data = np.mean(data, axis=1) Eol = np.sum(data ** 2) # total data energy L = len(data) subWinLength = int(np.floor(L / numOfShortBlocks)) if L != subWinLength * numOfShortBlocks: data = data[0:subWinLength * numOfShortBlocks] # subWindows is of size [numOfShortBlocks x L] subWindows = data.reshape(subWinLength, numOfShortBlocks, order='F').copy() # Compute normalized sub-data energies: s = np.sum(subWindows ** 2, axis=0) / (Eol + EPSILON) # Compute entropy of the normalized sub-data energies: Entropy = -np.sum(s * np.log2(s + EPSILON)) return Entropy
[docs]def spectral_centroid_and_spread(data, fs): """Computes spectral centroid of frame (given abs(FFT))""" data = np.mean(data, axis=1) nFFT = len(data) // 2 X = FFT(data, nFFT) ind = (np.arange(1, len(X) + 1)) * (fs / (2.0 * len(X))) Xt = X.copy() Xt = Xt / Xt.max() NUM = np.sum(ind * Xt) DEN = np.sum(Xt) + EPSILON # Centroid: C = (NUM / DEN) # Spread: S = np.sqrt(np.sum(((ind - C) ** 2) * Xt) / DEN) # Normalize: C = C / (fs / 2.0) S = S / (fs / 2.0) return (C, S)
[docs]def spectral_rolloff(data, coeff): """Computes spectral roll-off""" data = np.mean(data, axis=1) nFFT = len(data) // 2 X = FFT(data, nFFT) totalEnergy = np.sum(X ** 2) fftLength = len(X) Thres = coeff * totalEnergy # Find the spectral rolloff as the frequency position where the # respective spectral energy is equal to c*totalEnergy CumSum = np.cumsum(X ** 2) + EPSILON [a, ] = np.nonzero(CumSum > Thres) if len(a) > 0: mC = np.float64(a[0]) / (float(fftLength)) else: mC = 0.0 return mC
[docs]def featurize_segments(segments, sample_frequency): features = [] for i in range(len(segments) - 1): segment = segments[i] centroid_and_spread = spectral_centroid_and_spread(segment, sample_frequency) features.extend([ energy(segment), energy_entropy(segment, sample_frequency), spectral_entropy(segment), spectral_flux(segment, segments[i + 1]), centroid_and_spread[0], centroid_and_spread[1], spectral_rolloff(segment, .90), rand_attr1(segment), zcr(segment) ]) return features
[docs]def featurize_audio(X, sample_frequencies): audio_features = [] # Featurize for segments, sample_frequency in zip(X, sample_frequencies): features = featurize_segments(segments, sample_frequency) audio_features.append(features) return audio_features