Source code for brian2hears.sounds

from builtins import range
import array as pyarray
import time

import numpy as np
from numpy.fft import fft, ifft, fftfreq
from numpy.random import randn
import matplotlib.pyplot as plt
from scipy.signal import fftconvolve, lfilter
from scipy.special import factorial

try:
    from samplerate import resample
    have_resample = True
except (ImportError):
    have_resample = False

from brian2 import (Hz, ms, second, check_units,
                    have_same_dimensions, get_unit, DimensionMismatchError)

from .bufferable import Bufferable
from .prefs import get_samplerate
from .db import dB, dB_type, dB_error

pygame_loaded = False


__all__ = ['BaseSound', 'Sound',
           'pinknoise','brownnoise','powerlawnoise',
           'whitenoise', 'irns', 'irno', 
           'tone', 'click', 'clicks', 'silence', 'sequence', 'harmoniccomplex',
           'loadsound', 'savesound', 'play', 'vowel'
           ]


_mixer_status = [-1,-1]

[docs]class BaseSound(Bufferable): ''' Base class for Sound and OnlineSound ''' pass
[docs]class Sound(BaseSound, np.ndarray): ''' Class for working with sounds, including loading/saving, manipulating and playing. For an overview, see :ref:`sounds_overview`. **Initialisation** The following arguments are used to initialise a sound object ``data`` Can be a filename, an array, a function or a sequence (list or tuple). If its a filename, the sound file (WAV or AIFF) will be loaded. If its an array, it should have shape ``(nsamples, nchannels)``. If its a function, it should be a function f(t). If its a sequence, the items in the sequence can be filenames, functions, arrays or Sound objects. The output will be a multi-channel sound with channels the corresponding sound for each element of the sequence. ``samplerate=None`` The samplerate, if necessary, will use the default (for an array or function) or the samplerate of the data (for a filename). ``duration=None`` The duration of the sound, if initialising with a function. **Loading, saving and playing** .. automethod:: load .. automethod:: save .. automethod:: play **Properties** .. autoattribute:: duration .. autoattribute:: nsamples .. autoattribute:: nchannels .. autoattribute:: times .. autoattribute:: left .. autoattribute:: right .. automethod:: channel **Generating sounds** All sound generating methods can be used with durations arguments in samples (int) or units (e.g. 500*ms). One can also set the number of channels by setting the keyword argument nchannels to the desired value. Notice that for noise the channels will be generated independantly. .. automethod:: tone .. automethod:: whitenoise .. automethod:: powerlawnoise .. automethod:: brownnoise .. automethod:: pinknoise .. automethod:: silence .. automethod:: click .. automethod:: clicks .. automethod:: harmoniccomplex .. automethod:: vowel **Timing and sequencing** .. automethod:: sequence(*sounds, samplerate=None) .. automethod:: repeat .. automethod:: extended .. automethod:: shifted .. automethod:: resized **Slicing** One can slice sound objects in various ways, for example ``sound[100*ms:200*ms]`` returns the part of the sound between 100 ms and 200 ms (not including the right hand end point). If the sound is less than 200 ms long it will be zero padded. You can also set values using slicing, e.g. ``sound[:50*ms] = 0`` will silence the first 50 ms of the sound. The syntax is the same as usual for Python slicing. In addition, you can select a subset of the channels by doing, for example, ``sound[:, -5:]`` would be the last 5 channels. For time indices, either times or samples can be given, e.g. ``sound[:100]`` gives the first 100 samples. In addition, steps can be used for example to reverse a sound as ``sound[::-1]``. Note that slicing with units of time rather than samples will only work in Python 3. In Python 2, you can get the same effect by writing, for example, ``sound[slice(0*ms, 10*ms)]``. This is a change from the original version of ``brian.hears``. **Arithmetic operations** Standard arithemetical operations and numpy functions work as you would expect with sounds, e.g. ``sound1+sound2``, ``3*sound`` or ``abs(sound)``. **Level** .. autoattribute:: level .. automethod:: atlevel .. autoattribute:: maxlevel .. automethod:: atmaxlevel **Ramping** .. automethod:: ramp .. automethod:: ramped **Plotting** .. automethod:: spectrogram .. automethod:: spectrum ''' duration = property(fget=lambda self:len(self) / self.samplerate, doc='The length of the sound in seconds.') nsamples = property(fget=lambda self:len(self), doc='The number of samples in the sound.') times = property(fget=lambda self:np.arange(len(self), dtype=float) / self.samplerate, doc='An array of times (in seconds) corresponding to each sample.') nchannels = property(fget=lambda self:self.shape[1], doc='The number of channels in the sound.') left = property(fget=lambda self:self.channel(0), doc='The left channel for a stereo sound.') right = property(fget=lambda self:self.channel(1), doc='The right channel for a stereo sound.') @check_units(samplerate=Hz, duration=second) def __new__(cls, data, samplerate=None, duration=None): if isinstance(data, np.ndarray): samplerate = get_samplerate(samplerate) # if samplerate is None: # raise ValueError('Must specify samplerate to initialise Sound with array.') if duration is not None: raise ValueError('Cannot specify duration when initialising Sound with array.') x = np.array(data, dtype=float) elif isinstance(data, str): if duration is not None: raise ValueError('Cannot specify duration when initialising Sound from file.') if samplerate is not None: raise ValueError('Cannot specify samplerate when initialising Sound from a file.') x = Sound.load(data) samplerate = x.samplerate elif callable(data): samplerate = get_samplerate(samplerate) # if samplerate is None: # raise ValueError('Must specify samplerate to initialise Sound with function.') if duration is None: raise ValueError('Must specify duration to initialise Sound with function.') L = int(np.rint(duration * samplerate)) t = np.arange(L, dtype=float) / samplerate x = data(t) elif isinstance(data, (list, tuple)): kwds = {} if samplerate is not None: kwds['samplerate'] = samplerate if duration is not None: kwds['duration'] = duration channels = tuple(Sound(c, **kwds) for c in data) x = np.hstack(channels) samplerate = channels[0].samplerate else: raise TypeError('Cannot initialise Sound with data of class ' + str(data.__class__)) if len(x.shape)==1: x.shape = (len(x), 1) x = x.view(cls) x.samplerate = samplerate x.buffer_init() return x def __array_wrap__(self, obj, context=None): handled = False x = np.ndarray.__array_wrap__(self, obj, context) if not hasattr(x, 'samplerate') and hasattr(self, 'samplerate'): x.samplerate = self.samplerate if context is not None: ufunc = context[0] args = context[1] return x def __array_finalize__(self,obj): if obj is None: return self.samplerate = getattr(obj, 'samplerate', None) def buffer_init(self): pass def buffer_fetch(self, start, end): if start<0: raise IndexError('Can only use positive indices in buffer.') samples = end-start X = np.asarray(self)[start:end, :] if X.shape[0]<samples: X = np.vstack((X, np.zeros((samples-X.shape[0], X.shape[1])))) return X
[docs] def channel(self, n): ''' Returns the nth channel of the sound. ''' return Sound(self[:, n], self.samplerate)
def __add__(self, other): if isinstance(other, Sound): if int(other.samplerate) > int(self.samplerate): self = self.resample(other.samplerate) elif int(other.samplerate) < int(self.samplerate): other = other.resample(self.samplerate) if len(self) > len(other): other = other.resized(len(self)) elif len(self) < len(other): self = self.resized(len(other)) return Sound(np.ndarray.__add__(self, other), samplerate=self.samplerate) else: x = np.ndarray.__add__(self, other) return Sound(x, self.samplerate) __radd__ = __add__ def __getitem__(self, key): channel = slice(None) if isinstance(key, tuple): channel = key[1] key = key[0] if isinstance(key, int): return np.ndarray.__getitem__(self, key) if isinstance(key, float): return np.ndarray.__getitem__(self, round(key*self.samplerate)) sliceattr = [v for v in [key.start, key.stop] if v is not None] slicedims = np.array([have_same_dimensions(flag, second) for flag in sliceattr]) attrisint = np.array([isinstance(v, int) for v in sliceattr]) s = sum(attrisint) if s!=0 and s!=len(sliceattr): raise ValueError('Slice attributes must be all ints or all times') if s==len(sliceattr): # all ints start = key.start or 0 stop = key.stop or self.shape[0] step = key.step or 1 if start>=0 and stop<=self.shape[0]: return Sound(np.ndarray.__getitem__(self, (key, channel)), self.samplerate) else: startpad = max(-start, 0) endpad = max(stop-self.shape[0], 0) startmid = max(start, 0) endmid = min(stop, self.shape[0]) atstart = np.zeros((startpad, self.shape[1])) atend = np.zeros((endpad, self.shape[1])) return Sound(np.vstack((atstart, np.asarray(self)[startmid:endmid:step], atend)), self.samplerate) if not slicedims.all(): raise DimensionMismatchError('Slicing', *[get_unit(d) for d in sliceattr]) start = key.start or 0*ms stop = key.stop or self.duration step = key.step or 1 if int(step)!=step: #resampling raise NotImplementedError start = int(np.rint(start*self.samplerate)) stop = int(np.rint(stop*self.samplerate)) return self.__getitem__((slice(start,stop,step),channel)) def __setitem__(self,key,value): channel=slice(None) if isinstance(key,tuple): channel=key[1] key=key[0] if isinstance(key,int): return np.ndarray.__setitem__(self,(key,channel),value) if isinstance(key,float): return np.ndarray.__setitem__(self,(int(np.rint(key*self.samplerate)),channel),value) sliceattr = [v for v in [key.start, key.step, key.stop] if v is not None] slicedims = np.array([have_same_dimensions(flag, second) for flag in sliceattr]) attrisint = np.array([isinstance(v, int) for v in sliceattr]) s = sum(attrisint) if s!=0 and s!=len(sliceattr): raise ValueError('Slice attributes must be all ints or all times') if s==len(sliceattr): # all ints # If value is a mono sound its shape will be (N, 1) but the numpy # setitem will have shape (N,) so in this case it's a shape mismatch # so we squeeze the array to make sure this doesn't happen. if isinstance(value,Sound) and channel!=slice(None): value=value.squeeze() return np.asarray(self).__setitem__((key,channel),value) # returns None if not slicedims.all(): raise DimensionMismatchError('Slicing', *[get_unit(d) for d in sliceattr]) if key.__getattribute__('step') is not None: # resampling? raise NotImplementedError start = key.start stop = key.stop or self.duration if (start is not None and start<0*ms) or stop > self.duration: raise IndexError('Slice bigger than Sound object') if start is not None: start = int(np.rint(start*self.samplerate)) stop = int(np.rint(stop*self.samplerate)) return self.__setitem__((slice(start,stop),channel),value)
[docs] def extended(self, duration): ''' Returns the Sound with length extended by the given duration, which can be the number of samples or a length of time in seconds. ''' duration = get_duration(duration, self.samplerate) return self[:self.nsamples+duration]
[docs] def resized(self, L): ''' Returns the Sound with length extended (or contracted) to have L samples. ''' if L == len(self): return self elif L < len(self): return Sound(self[:L, :], samplerate=self.samplerate) else: padding = np.zeros((L - len(self), self.nchannels)) return Sound(np.concatenate((self, padding)), samplerate=self.samplerate)
[docs] def shifted(self, duration, fractional=False, filter_length=2048): ''' Returns the sound delayed by duration, which can be the number of samples or a length of time in seconds. Normally, only integer numbers of samples will be used, but if ``fractional=True`` then the filtering method from `http://www.labbookpages.co.uk/audio/beamforming/fractionalDelay.html <http://www.labbookpages.co.uk/audio/beamforming/fractionalDelay.html>`__ will be used (introducing some small numerical errors). With this method, you can specify the ``filter_length``, larger values are slower but more accurate, especially at higher frequencies. The large default value of 2048 samples provides good accuracy for sounds with frequencies above 20 Hz, but not for lower frequency sounds. If you are restricted to high frequency sounds, a smaller value will be more efficient. Note that if ``fractional=True`` then ``duration`` is assumed to be a time not a number of samples. ''' if not fractional: if not isinstance(duration, int): duration = int(np.rint(duration*self.samplerate)) if duration>=0: y = np.vstack((np.zeros((duration, self.nchannels)), self)) return Sound(y, samplerate=self.samplerate) else: return self[-duration:, :] else: if self.nchannels>1: sounds = [self.channel(i).shifted(duration, fractional=True, filter_length=filter_length) for i in range(self.nchannels)] return Sound(np.hstack(sounds), samplerate=self.samplerate) # Adapted from # http://www.labbookpages.co.uk/audio/beamforming/fractionalDelay.html delay = duration*self.samplerate if delay>=0: idelay = int(delay) elif delay<0: idelay = -int(-delay) delay -= idelay centre_tap = filter_length // 2 t = np.arange(filter_length) x = t-delay if abs(round(delay)-delay)<1e-10: tap_weight = np.array(x==centre_tap, dtype=float) else: sinc = np.sin(np.pi*(x-centre_tap))/(np.pi*(x-centre_tap)) window = 0.54-0.46*np.cos(2.0*np.pi*(x+0.5)/filter_length) # Hamming window tap_weight = window*sinc if filter_length<256: y = np.convolve(tap_weight, self.flatten()) else: y = fftconvolve(tap_weight, self.flatten()) y = y[filter_length/2:-filter_length/2] sound = Sound(y, self.samplerate) sound = sound.shifted(idelay) return sound
[docs] def repeat(self, n): ''' Repeats the sound n times ''' x = np.vstack((self,)*n) return Sound(x, samplerate=self.samplerate)
@check_units(samplerate=Hz) def resample(self, samplerate, resample_type='sinc_best'): ''' Returns a resampled version of the sound. ''' if not have_resample: raise ImportError('Need samplerate package for resampling') y = np.array(resample(self, float(samplerate / self.samplerate), resample_type), dtype=np.float64) return Sound(y, samplerate=samplerate) def _init_mixer(self): global _mixer_status if _mixer_status==[-1,-1] or _mixer_status[0]!= self.nchannels or _mixer_status[1] != self.samplerate: pygame.mixer.quit() pygame.mixer.init(int(self.samplerate), -16, self.nchannels) _mixer_status=[self.nchannels,self.samplerate]
[docs] def play(self, normalise=False, sleep=False): ''' Plays the sound (normalised to avoid clipping if required). If sleep=True then the function will wait until the sound has finished playing before returning. ''' global pygame, pygame_loaded if not pygame_loaded: try: import pygame except ImportError: raise ImportError("Playing sounds requires the pygame module to be installed") pygame_loaded = True if self.nchannels>2: raise ValueError("Can only play sounds with 1 or 2 channels.") self._init_mixer() if normalise: a = np.amax(np.abs(self)) else: a = 1 x = np.array((2 ** 15 - 1) * np.clip(self / a, -1, 1), dtype=np.int16) if self.nchannels==1: x.shape = x.size # Make sure pygame receives an array in C-order x = pygame.sndarray.make_sound(np.ascontiguousarray(x)) x.play() if sleep: time.sleep(self.duration)
[docs] def spectrogram(self, low=None, high=None, log_power=True, other = None, **kwds): ''' Plots a spectrogram of the sound Arguments: ``low=None``, ``high=None`` If these are left unspecified, it shows the full spectrogram, otherwise it shows only between ``low`` and ``high`` in Hz. ``log_power=True`` If True the colour represents the log of the power. ``**kwds`` Are passed to Pylab's ``specgram`` command. Returns the values returned by pylab's ``specgram``, namely ``(pxx, freqs, bins, im)`` where ``pxx`` is a 2D array of powers, ``freqs`` is the corresponding frequencies, ``bins`` are the time bins, and ``im`` is the image axis. ''' if self.nchannels>1: raise ValueError('Can only plot spectrograms for mono sounds.') if other is not None: x = self.flatten()-other.flatten() else: x = self.flatten() pxx, freqs, bins, im = plt.specgram(x, Fs=float(self.samplerate), **kwds) if low is not None or high is not None: restricted = True if low is None: low = 0*Hz if high is None: high = np.amax(freqs)*Hz I = np.logical_and(low <= freqs, freqs <= high) I2 = np.where(I)[0] I2 = [max(min(I2) - 1, 0), min(max(I2) + 1, len(freqs) - 1)] Z = pxx[I2[0]:I2[-1], :] else: restricted = False Z = pxx if log_power: Z[Z < 1e-20] = 1e-20 # no zeros because we take logs Z = 10 * np.log10(Z) Z = np.flipud(Z) if restricted: plt.imshow(Z, extent=(0, np.amax(bins), freqs[I2[0]], freqs[I2[-1]]), origin='upper', aspect='auto') else: plt.imshow(Z, extent=(0, np.amax(bins), freqs[0], freqs[-1]), origin='upper', aspect='auto') plt.xlabel('Time (s)') plt.ylabel('Frequency (Hz)') return (pxx, freqs*Hz, bins*second, im)
[docs] @check_units(low=Hz, high=Hz) def spectrum(self, low=None, high=None, log_power=True, display=False): ''' Returns the spectrum of the sound and optionally plots it. Arguments: ``low``, ``high`` If these are left unspecified, it shows the full spectrum, otherwise it shows only between ``low`` and ``high`` in Hz. ``log_power=True`` If True it returns the log of the power. ``display=False`` Whether to plot the output. Returns ``(Z, freqs, phase)`` where ``Z`` is a 1D array of powers, ``freqs`` is the corresponding frequencies, ``phase`` is the unwrapped phase of spectrum. ''' if self.nchannels>1: raise ValueError('Can only plot spectrum for mono sounds.') # Flatten array, fft operates on the last axis by default sp = fft(np.array(self).flatten()) freqs = np.array(range(len(sp)), dtype=np.float64) / len(sp) * np.float64(self.samplerate) pxx = abs(sp) ** 2 phase = np.unwrap(np.mod(np.angle(sp), 2 * np.pi)) if low is not None or high is not None: restricted = True if low is None: low = 0*Hz if high is None: high = np.amax(freqs)*Hz I = np.logical_and(low <= freqs, freqs <= high) I2 = np.where(I)[0] Z = pxx[I2] freqs = freqs[I2] phase = phase[I2] else: restricted = False Z = pxx if log_power: Z[Z < 1e-20] = 1e-20 # no zeros because we take logs Z = 10 * np.log10(Z) if display: Zp = Z[freqs>0] phasep = phase[freqs>0] freqsp = freqs[freqs>0] plt.subplot(211) plt.semilogx(freqsp, Zp) plt.grid() plt.xlim((freqsp[0], freqsp[-1])) plt.xlabel('Frequency (Hz)') plt.ylabel('Power (dB/Hz)') if log_power else plt.ylabel('Power') plt.subplot(212) plt.semilogx(freqsp, phasep) plt.grid() plt.xlim((freqsp[0], freqsp[-1])) plt.xlabel('Frequency (Hz)') plt.ylabel('Phase (rad)') plt.tight_layout() return (Z, freqs*Hz, phase)
def get_level(self): ''' Returns level in dB SPL (RMS) assuming array is in Pascals. In the case of multi-channel sounds, returns an array of levels for each channel, otherwise returns a float. ''' if self.nchannels==1: rms_value = np.sqrt(np.mean((np.asarray(self)-np.mean(np.asarray(self)))**2)) rms_dB = 20.0*np.log10(rms_value/2e-5) return rms_dB*dB else: return np.array(tuple(self.channel(i).get_level() for i in range(self.nchannels))) def set_level(self, level): ''' Sets level in dB SPL (RMS) assuming array is in Pascals. ``level`` should be a value in dB, or a tuple of levels, one for each channel. ''' rms_dB = self.get_level() if self.nchannels>1: level = np.array(level) if level.size==1: level = level.repeat(self.nchannels) level = np.reshape(level, (1, self.nchannels)) rms_dB = np.reshape(rms_dB, (1, self.nchannels)) else: if not isinstance(level, dB_type): raise dB_error('Must specify level in dB') rms_dB = float(rms_dB) level = float(level) gain = 10**((level-rms_dB)/20.) self *= gain level = property(fget=get_level, fset=set_level, doc=''' Can be used to get or set the level of a sound, which should be in dB. For single channel sounds a value in dB is used, for multiple channel sounds a value in dB can be used for setting the level (all channels will be set to the same level), or a list/tuple/array of levels. It is assumed that the unit of the sound is Pascals. ''')
[docs] def atlevel(self, level): ''' Returns the sound at the given level in dB SPL (RMS) assuming array is in Pascals. ``level`` should be a value in dB, or a tuple of levels, one for each channel. ''' newsound = self.copy() newsound.level = level return newsound
def get_maxlevel(self): return np.amax(self.level)*dB def set_maxlevel(self, level): self.level += level-self.maxlevel maxlevel = property(fget=get_maxlevel, fset=set_maxlevel, doc=''' Can be used to set or get the maximum level of a sound. For mono sounds, this is the same as the level, but for multichannel sounds it is the maximum level across the channels. Relative level differences will be preserved. The specified level should be a value in dB, and it is assumed that the unit of the sound is Pascals. ''')
[docs] def atmaxlevel(self, level): ''' Returns the sound with the maximum level across channels set to the given level. Relative level differences will be preserved. The specified level should be a value in dB and it is assumed that the unit of the sound is Pascals. ''' newsound = self.copy() newsound.maxlevel = level return newsound
[docs] def ramp(self, when='onset', duration=10*ms, envelope=None, inplace=True): ''' Adds a ramp on/off to the sound ``when='onset'`` Can take values 'onset', 'offset' or 'both' ``duration=10*ms`` The time over which the ramping happens ``envelope`` A ramping function, if not specified uses ``sin(pi*t/2)**2``. The function should be a function of one variable ``t`` ranging from 0 to 1, and should increase from ``f(0)=0`` to ``f(0)=1``. The reverse is applied for the offset ramp. ``inplace`` Whether to apply ramping to current sound or return a new array. ''' when = when.lower().strip() if envelope is None: envelope = lambda t:np.sin(np.pi * t / 2) ** 2 if not isinstance(duration, int): sz = int(np.rint(duration * self.samplerate)) else: sz = duration multiplier = envelope(np.reshape(np.linspace(0.0, 1.0, sz), (sz, 1))) if inplace: target = self else: target = Sound(np.copy(self), self.samplerate) if when == 'onset' or when == 'both': target[:sz, :] *= multiplier if when == 'offset' or when == 'both': target[target.nsamples-sz:, :] *= multiplier[::-1] return target
[docs] @check_units(duration=second) def ramped(self, when='onset', duration=10*ms, envelope=None): ''' Returns a ramped version of the sound (see :meth:`Sound.ramp`). ''' return self.ramp(when=when, duration=duration, envelope=envelope, inplace=False)
def fft(self,n=None): ''' Performs an n-point FFT on the sound object, that is an array of the same size containing the DFT of each channel. n defaults to the number of samples of the sound, but can be changed manually setting the ``n`` keyword argument ''' if n is None: n=self.shape[0] res=np.zeros(n,self.nchannels) for i in range(self.nchannels): res[:,i]=fft(np.asarray(self)[:,i].flatten(),n=n) return res
[docs] @staticmethod @check_units(frequency=Hz, duration=second, samplerate=Hz) def tone(frequency, duration, phase=0, samplerate=None, nchannels=1): ''' Returns a pure tone at frequency for duration, using the default samplerate or the given one. The ``frequency`` and ``phase`` parameters can be single values, in which case multiple channels can be specified with the ``nchannels`` argument, or they can be sequences (lists/tuples/arrays) in which case there is one frequency or phase for each channel. ''' samplerate = get_samplerate(samplerate) duration = get_duration(duration, samplerate) frequency = np.asarray(frequency)*Hz phase = np.array(phase) if frequency.size>nchannels and nchannels==1: nchannels = frequency.size if phase.size>nchannels and nchannels==1: nchannels = phase.size if frequency.size==nchannels: frequency.shape = (1, nchannels) if phase.size==nchannels: phase.shape =(nchannels, 1) t = np.arange(0, duration, 1)/samplerate t.shape = (t.size, 1) # ensures C-order (in contrast to tile(...).T ) x = np.sin(phase + 2.0 * np.pi * frequency * np.tile(t, (1, nchannels))) return Sound(x, samplerate)
[docs] @staticmethod @check_units(f0=Hz, duration=second, samplerate=Hz) def harmoniccomplex(f0, duration, amplitude=1, phase=0, samplerate=None, nchannels=1): ''' Returns a harmonic complex composed of pure tones at integer multiples of the fundamental frequency ``f0``. The ``amplitude`` and ``phase`` keywords can be set to either a single value or an array of values. In the former case the value is set for all harmonics, and harmonics up to the sampling frequency are generated. In the latter each harmonic parameter is set separately, and the number of harmonics generated corresponds to the length of the array. ''' samplerate=get_samplerate(samplerate) phases = np.array(phase).flatten() amplitudes = np.array(amplitude).flatten() if len(phases)>1 or len(amplitudes)>1: if (len(phases)>1 and len(amplitudes)>1) and (len(phases) != len(amplitudes)): raise ValueError('Please specify the same number of phases and amplitudes') Nharmonics = max(len(phases),len(amplitudes)) else: Nharmonics = int(np.floor( samplerate/(2*f0) ) ) if len(phases) == 1: phases = np.tile(phase, Nharmonics) if len(amplitudes) == 1: amplitudes = np.tile(amplitude, Nharmonics) x = amplitudes[0]*tone(f0, duration, phase = phases[0], samplerate = samplerate, nchannels = nchannels) for i in range(1,Nharmonics): x += amplitudes[i]*tone((i+1)*f0, duration, phase = phases[i], samplerate = samplerate, nchannels = nchannels) return Sound(x,samplerate)
[docs] @staticmethod @check_units(duration=second, samplerate=Hz) def whitenoise(duration, samplerate=None, nchannels=1): ''' Returns a white noise. If the samplerate is not specified, the global default value will be used. ''' samplerate = get_samplerate(samplerate) duration = get_duration(duration,samplerate) x = randn(duration,nchannels) return Sound(x, samplerate)
[docs] @staticmethod @check_units(duration=second, samplerate=Hz) def powerlawnoise(duration, alpha, samplerate=None, nchannels=1,normalise=False): ''' Returns a power-law noise for the given duration. Spectral density per unit of bandwidth scales as 1/(f**alpha). Sample usage:: noise = powerlawnoise(200*ms, 1, samplerate=44100*Hz) Arguments: ``duration`` Duration of the desired output. ``alpha`` Power law exponent. ``samplerate`` Desired output samplerate ''' samplerate = get_samplerate(samplerate) duration = get_duration(duration,samplerate) # Adapted from http://www.eng.ox.ac.uk/samp/software/powernoise/powernoise.m # Little MA et al. (2007), "Exploiting nonlinear recurrence and fractal # scaling properties for voice disorder detection", Biomed Eng Online, 6:23 n=duration n2=int(n/2) f=np.array(fftfreq(n,d=1.0/samplerate), dtype=complex) f.shape=(len(f),1) f=np.tile(f,(1,nchannels)) if n%2==1: z=(randn(n2,nchannels)+1j*randn(n2,nchannels)) a2=1.0/( f[1:(n2+1),:]**(alpha/2.0)) else: z=(randn(n2-1,nchannels)+1j*randn(n2-1,nchannels)) a2=1.0/(f[1:n2,:]**(alpha/2.0)) a2*=z if n%2==1: d=np.vstack((np.ones((1,nchannels)),a2, np.flipud(np.conj(a2)))) else: d=np.vstack((np.ones((1,nchannels)),a2, 1.0/( abs(f[n2])**(alpha/2.0) )* randn(1,nchannels), np.flipud(np.conj(a2)))) x=np.real(ifft(d.flatten())) x.shape=(n,nchannels) if normalise: for i in range(nchannels): #x[:,i]=normalise_rms(x[:,i]) x[:,i] = ((x[:,i] - np.amin(x[:,i]))/(np.amax(x[:,i]) - np.amin(x[:,i])) - 0.5) * 2; return Sound(x,samplerate)
[docs] @staticmethod @check_units(duration=second, samplerate=Hz) def pinknoise(duration, samplerate=None, nchannels=1, normalise=False): ''' Returns pink noise, i.e :func:`powerlawnoise` with alpha=1 ''' return Sound.powerlawnoise(duration, 1.0, samplerate=samplerate, nchannels=nchannels, normalise=normalise)
[docs] @staticmethod @check_units(duration=second, samplerate=Hz) def brownnoise(duration, samplerate=None, nchannels=1, normalise=False): ''' Returns brown noise, i.e :func:`powerlawnoise` with alpha=2 ''' return Sound.powerlawnoise(duration, 2.0, samplerate=samplerate, nchannels=nchannels, normalise=normalise)
@staticmethod @check_units(duration=second, samplerate=Hz) def irns(delay, gain, niter, duration, samplerate=None, nchannels=1): ''' Returns an IRN_S noise. The iterated ripple noise is obtained trough a cascade of gain and delay filtering. For more details: see Yost 1996 or chapter 15 in Hartman Sound Signal Sensation. ''' if nchannels!=1: raise ValueError("nchannels!=1 not supported.") samplerate = get_samplerate(samplerate) noise=Sound.whitenoise(duration) splrate=noise.samplerate x=np.array(noise.T)[0] IRNfft=fft(x) Nspl,spl_dur=len(IRNfft),float(1.0/splrate) w=2*np.pi*fftfreq(Nspl,spl_dur) d=float(delay) for k in range(1,niter+1): nchoosek=factorial(niter)/(factorial(niter-k)*factorial(k)) IRNfft+=nchoosek*(gain**k)*IRNfft*np.exp(-1j*w*k*d) IRNadd = ifft(IRNfft) x=np.real(IRNadd) return Sound(x,samplerate) @staticmethod @check_units(duration=second, samplerate=Hz) def irno(delay, gain, niter, duration, samplerate=None, nchannels=1): ''' Returns an IRN_O noise. The iterated ripple noise is obtained many attenuated and delayed version of the original broadband noise. For more details: see Yost 1996 or chapter 15 in Hartman Sound Signal Sensation. ''' samplerate = get_samplerate(samplerate) noise=Sound.whitenoise(duration) splrate=noise.samplerate x=np.array(noise.T)[0] IRNadd=fft(x) Nspl,spl_dur=len(IRNadd),float(1.0/splrate) w=2*np.pi*fftfreq(Nspl,spl_dur) d=float(delay) for k in range(1,niter+1): IRNadd+=(gain**k)*IRNadd*np.exp(-1j*w*k*d) IRNadd = ifft(IRNadd) x=np.real(IRNadd) return Sound(x, samplerate)
[docs] @staticmethod @check_units(samplerate=Hz) def click(duration=1, peak=None, samplerate=None, nchannels=1): ''' Returns a click of the given duration (in time or samples) If ``peak`` is not specified, the amplitude will be 1, otherwise ``peak`` refers to the peak dB SPL of the click, according to the formula ``28e-6*10**(peak/20.)``. ''' samplerate = get_samplerate(samplerate) duration = get_duration(duration, samplerate) if peak is not None: if not isinstance(peak, dB_type): raise dB_error('Peak must be given in dB') amplitude = 28e-6*10**(float(peak)/20.) else: amplitude = 1 x = amplitude*np.ones((duration,nchannels)) return Sound(x, samplerate)
[docs] @staticmethod @check_units(duration=second, samplerate=Hz) def clicks(duration, n, interval, peak=None, samplerate=None, nchannels=1): ''' Returns a series of n clicks (see :func:`click`) separated by interval. ''' oneclick = Sound.click(duration, peak=peak, samplerate=samplerate) return oneclick[slice(None, interval)].repeat(n)
[docs] @staticmethod @check_units(duration=second, samplerate=Hz) def silence(duration, samplerate=None, nchannels=1): ''' Returns a silent, zero sound for the given duration. Set nchannels to set the number of channels. ''' samplerate = get_samplerate(samplerate) duration = get_duration(duration,samplerate) x=np.zeros((duration,nchannels)) return Sound(x, samplerate)
[docs] @staticmethod @check_units(pitch=Hz, duration=second, samplerate=Hz) def vowel(vowel=None, formants=None, pitch=100*Hz, duration=1*second, samplerate=None, nchannels=1): ''' Returns an artifically created spoken vowel sound (following the source-filter model of speech production) with a given ``pitch``. The vowel can be specified by either providing ``vowel`` as a string ('a', 'i' or 'u') or by setting ``formants`` to a sequence of formant frequencies. The returned sound is normalized to a maximum amplitude of 1. The implementation is based on the MakeVowel function written by Richard O. Duda, part of the Auditory Toolbox for Matlab by Malcolm Slaney: https://engineering.purdue.edu/~malcolm/interval/1998-010/ ''' samplerate = get_samplerate(samplerate) duration = get_duration(duration, samplerate) if not (vowel or formants): raise ValueError('Need either a vowel or a list of formants') elif (vowel and formants): raise ValueError('Cannot use both vowel and formants') if vowel: if vowel == 'a' or vowel == '/a/': formants = (730.0*Hz, 1090.0*Hz, 2440.0*Hz) elif vowel == 'i' or vowel == '/i/': formants = (270.0*Hz, 2290.0*Hz, 3010.0*Hz) elif vowel == 'u' or vowel == '/u/': formants = (300.0*Hz, 870.0*Hz, 2240.0*Hz) else: raise ValueError('Unknown vowel: "%s"' % (vowel)) points = np.arange(0, duration - 1, samplerate / pitch) indices = np.floor(points).astype(int) y = np.zeros(duration) y[indices] = (indices + 1) - points y[indices + 1] = points - indices # model the sound source (periodic glottal excitation) a = np.exp(-250.*Hz * 2 * np.pi / samplerate) y = lfilter([1],[1, 0, -a * a], y.copy()) # model the filtering by the vocal tract bandwidth = 50.*Hz for f in formants: cft = f / samplerate q = f / bandwidth rho = np.exp(-np.pi * cft / q) theta = 2 * np.pi * cft * np.sqrt(1 - 1/(4.0 * q * q)) a2 = -2 * rho * np.cos(theta) a3 = rho * rho y = lfilter([1 + a2 + a3], [1, a2, a3], y.copy()) #normalize sound data = y / np.max(np.abs(y), axis=0) data.shape = (data.size, 1) return Sound(np.tile(data, (nchannels, 1)), samplerate=samplerate)
[docs] @staticmethod def sequence(*args, **kwds): ''' Returns the sequence of sounds in the list sounds joined together ''' samplerate = kwds.pop('samplerate', None) if len(kwds): raise TypeError('Unexpected keywords to function sequence()') sounds = [] for arg in args: if isinstance(arg, (list, tuple)): sounds.extend(arg) else: sounds.append(arg) if samplerate is None: samplerate = max(s.samplerate for s in sounds) rates = np.unique([int(s.samplerate) for s in sounds]) if len(rates)>1: sounds = tuple(s.resample(samplerate) for s in sounds) x = np.vstack(sounds) return Sound(x, samplerate)
[docs] def save(self, filename, normalise=False, samplewidth=2): ''' Save the sound as a WAV. If the normalise keyword is set to True, the amplitude of the sound will be normalised to 1. The samplewidth keyword can be 1 or 2 to save the data as 8 or 16 bit samples. ''' ext = filename.split('.')[-1].lower() if ext=='wav': import wave as sndmodule elif ext=='aiff' or ext=='aifc': import aifc as sndmodule raise NotImplementedError('Can only save as wav soundfiles') else: raise NotImplementedError('Can only save as wav soundfiles') if samplewidth != 1 and samplewidth != 2: raise ValueError('Sample width must be 1 or 2 bytes.') scale = {2:2**15-1, 1: 2**7-1}[samplewidth] if ext=='wav': meanval = {2:0, 1:2**7}[samplewidth] dtype = {2:np.int16, 1:np.uint8}[samplewidth] typecode = {2:'h', 1:'B'}[samplewidth] else: meanval = {2:0, 1:2**7}[samplewidth] dtype = {2:np.int16, 1:np.uint8}[samplewidth] typecode = {2:'h', 1:'B'}[samplewidth] w = sndmodule.open(filename, 'wb') w.setnchannels(self.nchannels) w.setsampwidth(samplewidth) w.setframerate(int(self.samplerate)) x = np.array(self,copy=True) am=np.amax(x) z = np.zeros(x.shape[0]*self.nchannels, dtype=x.dtype) x.shape=(x.shape[0],self.nchannels) for i in range(self.nchannels): if normalise: x[:,i] /= am x[:,i] = (x[:,i]) * scale + meanval z[i::self.nchannels] = x[::1,i] data = np.array(z, dtype=dtype) data = pyarray.array(typecode, data) try: out = data.tobytes() except AttributeError: out = data.tostring() w.writeframes(out) w.close()
[docs] @staticmethod def load(filename): ''' Load the file given by filename and returns a Sound object. Sound file can be either a .wav or a .aif file. ''' ext = filename.split('.')[-1].lower() if ext=='wav': import wave as sndmodule elif ext=='aif' or ext=='aiff': import aifc as sndmodule else: raise NotImplementedError('Can only load aif or wav soundfiles') wav = sndmodule.open(filename, "r") nchannels, sampwidth, framerate, nframes, comptype, compname = wav.getparams() frames = wav.readframes(nframes * nchannels) typecode = {2:'h', 1:'B'}[sampwidth] out = np.frombuffer(frames, dtype=np.dtype(typecode)) scale = {2:2 ** 15, 1:2 ** 7-1}[sampwidth] meanval = {2:0, 1:2**7}[sampwidth] data = np.zeros((nframes, nchannels)) for i in range(nchannels): data[:, i] = out[i::nchannels] data[:, i] /= scale data[:, i] -= meanval return Sound(data, samplerate=framerate*Hz)
def __repr__(self): arrayrep = repr(np.asarray(self)) arrayrep = '\n'.join(' '+l for l in arrayrep.split('\n')) return 'Sound(\n'+arrayrep+',\n '+repr(self.samplerate)+')' def __str__(self): return 'Sound duration %s, channels %s, samplerate %s' % (self.duration, self.nchannels, self.samplerate) def __reduce__(self): return (_load_Sound_from_pickle, (np.asarray(self), float(self.samplerate)))
def _load_Sound_from_pickle(arr, samplerate): return Sound(arr, samplerate=samplerate*Hz)
[docs]def play(*sounds, **kwds): ''' Plays a sound or sequence of sounds. For example:: play(sound) play(sound1, sound2) play([sound1, sound2, sound3]) If ``normalise=True``, the sequence of sounds will be normalised to the maximum range (-1 to 1), and if ``sleep=True`` the function will wait until the sounds have finished playing before returning. ''' normalise = kwds.pop('normalise', False) sleep = kwds.pop('sleep', False) if len(kwds): raise TypeError('Unexpected keyword arguments to function play()') sound = sequence(*sounds) sound.play(normalise=normalise, sleep=sleep)
play.__doc__ = Sound.play.__doc__
[docs]def savesound(sound, filename, normalise=False, samplewidth=2): sound.save(filename, normalise=normalise, samplewidth=samplewidth)
savesound.__doc__ = Sound.save.__doc__ def get_duration(duration,samplerate): if not isinstance(duration, int): duration = int(np.rint(duration * samplerate)) return duration whitenoise = Sound.whitenoise powerlawnoise = Sound.powerlawnoise pinknoise = Sound.pinknoise brownnoise = Sound.brownnoise irns = Sound.irns irno = Sound.irno tone = Sound.tone harmoniccomplex = Sound.harmoniccomplex click = Sound.click clicks = Sound.clicks silence = Sound.silence sequence = Sound.sequence vowel = Sound.vowel loadsound = Sound.load