from builtins import range
import array as pyarray
import time
import numpy as np
from numpy.fft import fft, ifft, fftfreq
from numpy.random import randn
import matplotlib.pyplot as plt
from scipy.signal import fftconvolve, lfilter
from scipy.special import factorial
try:
from samplerate import resample
have_resample = True
except (ImportError):
have_resample = False
from brian2 import (Hz, ms, second, check_units,
have_same_dimensions, get_unit, DimensionMismatchError)
from .bufferable import Bufferable
from .prefs import get_samplerate
from .db import dB, dB_type, dB_error
pygame_loaded = False
__all__ = ['BaseSound', 'Sound',
'pinknoise','brownnoise','powerlawnoise',
'whitenoise', 'irns', 'irno',
'tone', 'click', 'clicks', 'silence', 'sequence', 'harmoniccomplex',
'loadsound', 'savesound', 'play', 'vowel'
]
_mixer_status = [-1,-1]
[docs]class BaseSound(Bufferable):
'''
Base class for Sound and OnlineSound
'''
pass
[docs]class Sound(BaseSound, np.ndarray):
'''
Class for working with sounds, including loading/saving, manipulating and playing.
For an overview, see :ref:`sounds_overview`.
**Initialisation**
The following arguments are used to initialise a sound object
``data``
Can be a filename, an array, a function or a sequence (list or tuple).
If its a filename, the sound file (WAV or AIFF) will be loaded. If its
an array, it should have shape ``(nsamples, nchannels)``. If its a
function, it should be a function f(t). If its a sequence, the items
in the sequence can be filenames, functions, arrays or Sound objects.
The output will be a multi-channel sound with channels the corresponding
sound for each element of the sequence.
``samplerate=None``
The samplerate, if necessary, will use the default (for an array or
function) or the samplerate of the data (for a filename).
``duration=None``
The duration of the sound, if initialising with a function.
**Loading, saving and playing**
.. automethod:: load
.. automethod:: save
.. automethod:: play
**Properties**
.. autoattribute:: duration
.. autoattribute:: nsamples
.. autoattribute:: nchannels
.. autoattribute:: times
.. autoattribute:: left
.. autoattribute:: right
.. automethod:: channel
**Generating sounds**
All sound generating methods can be used with durations arguments in samples (int) or units (e.g. 500*ms). One can also set the number of channels by setting the keyword argument nchannels to the desired value. Notice that for noise the channels will be generated independantly.
.. automethod:: tone
.. automethod:: whitenoise
.. automethod:: powerlawnoise
.. automethod:: brownnoise
.. automethod:: pinknoise
.. automethod:: silence
.. automethod:: click
.. automethod:: clicks
.. automethod:: harmoniccomplex
.. automethod:: vowel
**Timing and sequencing**
.. automethod:: sequence(*sounds, samplerate=None)
.. automethod:: repeat
.. automethod:: extended
.. automethod:: shifted
.. automethod:: resized
**Slicing**
One can slice sound objects in various ways, for example ``sound[100*ms:200*ms]``
returns the part of the sound between 100 ms and 200 ms (not including the
right hand end point). If the sound is less than 200 ms long it will be
zero padded. You can also set values using slicing, e.g.
``sound[:50*ms] = 0`` will silence the first 50 ms of the sound. The syntax
is the same as usual for Python slicing. In addition, you can select a
subset of the channels by doing, for example, ``sound[:, -5:]`` would be
the last 5 channels. For time indices, either times or samples can be given,
e.g. ``sound[:100]`` gives the first 100 samples. In addition, steps can
be used for example to reverse a sound as ``sound[::-1]``.
Note that slicing with units of time rather than samples will only work in
Python 3. In Python 2, you can get the same effect by writing, for example,
``sound[slice(0*ms, 10*ms)]``. This is a change from the original version of
``brian.hears``.
**Arithmetic operations**
Standard arithemetical operations and numpy functions work as you would
expect with sounds, e.g. ``sound1+sound2``, ``3*sound`` or ``abs(sound)``.
**Level**
.. autoattribute:: level
.. automethod:: atlevel
.. autoattribute:: maxlevel
.. automethod:: atmaxlevel
**Ramping**
.. automethod:: ramp
.. automethod:: ramped
**Plotting**
.. automethod:: spectrogram
.. automethod:: spectrum
'''
duration = property(fget=lambda self:len(self) / self.samplerate,
doc='The length of the sound in seconds.')
nsamples = property(fget=lambda self:len(self),
doc='The number of samples in the sound.')
times = property(fget=lambda self:np.arange(len(self), dtype=float) / self.samplerate,
doc='An array of times (in seconds) corresponding to each sample.')
nchannels = property(fget=lambda self:self.shape[1],
doc='The number of channels in the sound.')
left = property(fget=lambda self:self.channel(0),
doc='The left channel for a stereo sound.')
right = property(fget=lambda self:self.channel(1),
doc='The right channel for a stereo sound.')
@check_units(samplerate=Hz, duration=second)
def __new__(cls, data, samplerate=None, duration=None):
if isinstance(data, np.ndarray):
samplerate = get_samplerate(samplerate)
# if samplerate is None:
# raise ValueError('Must specify samplerate to initialise Sound with array.')
if duration is not None:
raise ValueError('Cannot specify duration when initialising Sound with array.')
x = np.array(data, dtype=float)
elif isinstance(data, str):
if duration is not None:
raise ValueError('Cannot specify duration when initialising Sound from file.')
if samplerate is not None:
raise ValueError('Cannot specify samplerate when initialising Sound from a file.')
x = Sound.load(data)
samplerate = x.samplerate
elif callable(data):
samplerate = get_samplerate(samplerate)
# if samplerate is None:
# raise ValueError('Must specify samplerate to initialise Sound with function.')
if duration is None:
raise ValueError('Must specify duration to initialise Sound with function.')
L = int(np.rint(duration * samplerate))
t = np.arange(L, dtype=float) / samplerate
x = data(t)
elif isinstance(data, (list, tuple)):
kwds = {}
if samplerate is not None:
kwds['samplerate'] = samplerate
if duration is not None:
kwds['duration'] = duration
channels = tuple(Sound(c, **kwds) for c in data)
x = np.hstack(channels)
samplerate = channels[0].samplerate
else:
raise TypeError('Cannot initialise Sound with data of class ' + str(data.__class__))
if len(x.shape)==1:
x.shape = (len(x), 1)
x = x.view(cls)
x.samplerate = samplerate
x.buffer_init()
return x
def __array_wrap__(self, obj, context=None):
handled = False
x = np.ndarray.__array_wrap__(self, obj, context)
if not hasattr(x, 'samplerate') and hasattr(self, 'samplerate'):
x.samplerate = self.samplerate
if context is not None:
ufunc = context[0]
args = context[1]
return x
def __array_finalize__(self,obj):
if obj is None: return
self.samplerate = getattr(obj, 'samplerate', None)
def buffer_init(self):
pass
def buffer_fetch(self, start, end):
if start<0:
raise IndexError('Can only use positive indices in buffer.')
samples = end-start
X = np.asarray(self)[start:end, :]
if X.shape[0]<samples:
X = np.vstack((X, np.zeros((samples-X.shape[0], X.shape[1]))))
return X
[docs] def channel(self, n):
'''
Returns the nth channel of the sound.
'''
return Sound(self[:, n], self.samplerate)
def __add__(self, other):
if isinstance(other, Sound):
if int(other.samplerate) > int(self.samplerate):
self = self.resample(other.samplerate)
elif int(other.samplerate) < int(self.samplerate):
other = other.resample(self.samplerate)
if len(self) > len(other):
other = other.resized(len(self))
elif len(self) < len(other):
self = self.resized(len(other))
return Sound(np.ndarray.__add__(self, other), samplerate=self.samplerate)
else:
x = np.ndarray.__add__(self, other)
return Sound(x, self.samplerate)
__radd__ = __add__
def __getitem__(self, key):
channel = slice(None)
if isinstance(key, tuple):
channel = key[1]
key = key[0]
if isinstance(key, int):
return np.ndarray.__getitem__(self, key)
if isinstance(key, float):
return np.ndarray.__getitem__(self, round(key*self.samplerate))
sliceattr = [v for v in [key.start, key.stop] if v is not None]
slicedims = np.array([have_same_dimensions(flag, second) for flag in sliceattr])
attrisint = np.array([isinstance(v, int) for v in sliceattr])
s = sum(attrisint)
if s!=0 and s!=len(sliceattr):
raise ValueError('Slice attributes must be all ints or all times')
if s==len(sliceattr): # all ints
start = key.start or 0
stop = key.stop or self.shape[0]
step = key.step or 1
if start>=0 and stop<=self.shape[0]:
return Sound(np.ndarray.__getitem__(self, (key, channel)),
self.samplerate)
else:
startpad = max(-start, 0)
endpad = max(stop-self.shape[0], 0)
startmid = max(start, 0)
endmid = min(stop, self.shape[0])
atstart = np.zeros((startpad, self.shape[1]))
atend = np.zeros((endpad, self.shape[1]))
return Sound(np.vstack((atstart,
np.asarray(self)[startmid:endmid:step],
atend)), self.samplerate)
if not slicedims.all():
raise DimensionMismatchError('Slicing',
*[get_unit(d) for d in sliceattr])
start = key.start or 0*ms
stop = key.stop or self.duration
step = key.step or 1
if int(step)!=step:
#resampling
raise NotImplementedError
start = int(np.rint(start*self.samplerate))
stop = int(np.rint(stop*self.samplerate))
return self.__getitem__((slice(start,stop,step),channel))
def __setitem__(self,key,value):
channel=slice(None)
if isinstance(key,tuple):
channel=key[1]
key=key[0]
if isinstance(key,int):
return np.ndarray.__setitem__(self,(key,channel),value)
if isinstance(key,float):
return np.ndarray.__setitem__(self,(int(np.rint(key*self.samplerate)),channel),value)
sliceattr = [v for v in [key.start, key.step, key.stop] if v is not None]
slicedims = np.array([have_same_dimensions(flag, second) for flag in sliceattr])
attrisint = np.array([isinstance(v, int) for v in sliceattr])
s = sum(attrisint)
if s!=0 and s!=len(sliceattr):
raise ValueError('Slice attributes must be all ints or all times')
if s==len(sliceattr): # all ints
# If value is a mono sound its shape will be (N, 1) but the numpy
# setitem will have shape (N,) so in this case it's a shape mismatch
# so we squeeze the array to make sure this doesn't happen.
if isinstance(value,Sound) and channel!=slice(None):
value=value.squeeze()
return np.asarray(self).__setitem__((key,channel),value) # returns None
if not slicedims.all():
raise DimensionMismatchError('Slicing',
*[get_unit(d) for d in sliceattr])
if key.__getattribute__('step') is not None:
# resampling?
raise NotImplementedError
start = key.start
stop = key.stop or self.duration
if (start is not None and start<0*ms) or stop > self.duration:
raise IndexError('Slice bigger than Sound object')
if start is not None: start = int(np.rint(start*self.samplerate))
stop = int(np.rint(stop*self.samplerate))
return self.__setitem__((slice(start,stop),channel),value)
[docs] def extended(self, duration):
'''
Returns the Sound with length extended by the given duration, which
can be the number of samples or a length of time in seconds.
'''
duration = get_duration(duration, self.samplerate)
return self[:self.nsamples+duration]
[docs] def resized(self, L):
'''
Returns the Sound with length extended (or contracted) to have L samples.
'''
if L == len(self):
return self
elif L < len(self):
return Sound(self[:L, :], samplerate=self.samplerate)
else:
padding = np.zeros((L - len(self), self.nchannels))
return Sound(np.concatenate((self, padding)), samplerate=self.samplerate)
[docs] def shifted(self, duration, fractional=False, filter_length=2048):
'''
Returns the sound delayed by duration, which can be the number of
samples or a length of time in seconds. Normally, only integer
numbers of samples will be used, but if ``fractional=True`` then
the filtering method from
`http://www.labbookpages.co.uk/audio/beamforming/fractionalDelay.html <http://www.labbookpages.co.uk/audio/beamforming/fractionalDelay.html>`__
will be used (introducing some small numerical errors). With this
method, you can specify the ``filter_length``, larger values are
slower but more accurate, especially at higher frequencies. The large
default value of 2048 samples provides good accuracy for sounds with
frequencies above 20 Hz, but not for lower frequency sounds. If you are
restricted to high frequency sounds, a smaller value will be more
efficient. Note that if ``fractional=True`` then
``duration`` is assumed to be a time not a number of samples.
'''
if not fractional:
if not isinstance(duration, int):
duration = int(np.rint(duration*self.samplerate))
if duration>=0:
y = np.vstack((np.zeros((duration, self.nchannels)), self))
return Sound(y, samplerate=self.samplerate)
else:
return self[-duration:, :]
else:
if self.nchannels>1:
sounds = [self.channel(i).shifted(duration, fractional=True, filter_length=filter_length) for i in range(self.nchannels)]
return Sound(np.hstack(sounds), samplerate=self.samplerate)
# Adapted from
# http://www.labbookpages.co.uk/audio/beamforming/fractionalDelay.html
delay = duration*self.samplerate
if delay>=0:
idelay = int(delay)
elif delay<0:
idelay = -int(-delay)
delay -= idelay
centre_tap = filter_length // 2
t = np.arange(filter_length)
x = t-delay
if abs(round(delay)-delay)<1e-10:
tap_weight = np.array(x==centre_tap, dtype=float)
else:
sinc = np.sin(np.pi*(x-centre_tap))/(np.pi*(x-centre_tap))
window = 0.54-0.46*np.cos(2.0*np.pi*(x+0.5)/filter_length) # Hamming window
tap_weight = window*sinc
if filter_length<256:
y = np.convolve(tap_weight, self.flatten())
else:
y = fftconvolve(tap_weight, self.flatten())
y = y[filter_length/2:-filter_length/2]
sound = Sound(y, self.samplerate)
sound = sound.shifted(idelay)
return sound
[docs] def repeat(self, n):
'''
Repeats the sound n times
'''
x = np.vstack((self,)*n)
return Sound(x, samplerate=self.samplerate)
@check_units(samplerate=Hz)
def resample(self, samplerate, resample_type='sinc_best'):
'''
Returns a resampled version of the sound.
'''
if not have_resample:
raise ImportError('Need samplerate package for resampling')
y = np.array(resample(self, float(samplerate / self.samplerate), resample_type),
dtype=np.float64)
return Sound(y, samplerate=samplerate)
def _init_mixer(self):
global _mixer_status
if _mixer_status==[-1,-1] or _mixer_status[0]!= self.nchannels or _mixer_status[1] != self.samplerate:
pygame.mixer.quit()
pygame.mixer.init(int(self.samplerate), -16, self.nchannels)
_mixer_status=[self.nchannels,self.samplerate]
[docs] def play(self, normalise=False, sleep=False):
'''
Plays the sound (normalised to avoid clipping if required). If
sleep=True then the function will wait until the sound has finished
playing before returning.
'''
global pygame, pygame_loaded
if not pygame_loaded:
try:
import pygame
except ImportError:
raise ImportError("Playing sounds requires the pygame module to be installed")
pygame_loaded = True
if self.nchannels>2:
raise ValueError("Can only play sounds with 1 or 2 channels.")
self._init_mixer()
if normalise:
a = np.amax(np.abs(self))
else:
a = 1
x = np.array((2 ** 15 - 1) * np.clip(self / a, -1, 1), dtype=np.int16)
if self.nchannels==1:
x.shape = x.size
# Make sure pygame receives an array in C-order
x = pygame.sndarray.make_sound(np.ascontiguousarray(x))
x.play()
if sleep:
time.sleep(float(self.duration))
[docs] def spectrogram(self, low=None, high=None, log_power=True, other = None, **kwds):
'''
Plots a spectrogram of the sound
Arguments:
``low=None``, ``high=None``
If these are left unspecified, it shows the full spectrogram,
otherwise it shows only between ``low`` and ``high`` in Hz.
``log_power=True``
If True the colour represents the log of the power.
``**kwds``
Are passed to Pylab's ``specgram`` command.
Returns the values returned by pylab's ``specgram``, namely
``(pxx, freqs, bins, im)`` where ``pxx`` is a 2D array of powers,
``freqs`` is the corresponding frequencies, ``bins`` are the time bins,
and ``im`` is the image axis.
'''
if self.nchannels>1:
raise ValueError('Can only plot spectrograms for mono sounds.')
if other is not None:
x = self.flatten()-other.flatten()
else:
x = self.flatten()
pxx, freqs, bins, im = plt.specgram(x, Fs=float(self.samplerate), **kwds)
if low is not None or high is not None:
restricted = True
if low is None:
low = 0*Hz
if high is None:
high = np.amax(freqs)*Hz
I = np.logical_and(low <= freqs, freqs <= high)
I2 = np.where(I)[0]
I2 = [max(min(I2) - 1, 0), min(max(I2) + 1, len(freqs) - 1)]
Z = pxx[I2[0]:I2[-1], :]
else:
restricted = False
Z = pxx
if log_power:
Z[Z < 1e-20] = 1e-20 # no zeros because we take logs
Z = 10 * np.log10(Z)
Z = np.flipud(Z)
if restricted:
plt.imshow(Z, extent=(0, np.amax(bins), freqs[I2[0]], freqs[I2[-1]]),
origin='upper', aspect='auto')
else:
plt.imshow(Z, extent=(0, np.amax(bins), freqs[0], freqs[-1]),
origin='upper', aspect='auto')
plt.xlabel('Time (s)')
plt.ylabel('Frequency (Hz)')
return (pxx, freqs*Hz, bins*second, im)
[docs] @check_units(low=Hz, high=Hz)
def spectrum(self, low=None, high=None, log_power=True, display=False):
'''
Returns the spectrum of the sound and optionally plots it.
Arguments:
``low``, ``high``
If these are left unspecified, it shows the full spectrum,
otherwise it shows only between ``low`` and ``high`` in Hz.
``log_power=True``
If True it returns the log of the power.
``display=False``
Whether to plot the output.
Returns ``(Z, freqs, phase)``
where ``Z`` is a 1D array of powers, ``freqs`` is the corresponding
frequencies, ``phase`` is the unwrapped phase of spectrum.
'''
if self.nchannels>1:
raise ValueError('Can only plot spectrum for mono sounds.')
# Flatten array, fft operates on the last axis by default
sp = fft(np.array(self).flatten())
freqs = np.array(range(len(sp)), dtype=np.float64) / len(sp) * np.float64(self.samplerate)
pxx = abs(sp) ** 2
phase = np.unwrap(np.mod(np.angle(sp), 2 * np.pi))
if low is not None or high is not None:
restricted = True
if low is None:
low = 0*Hz
if high is None:
high = np.amax(freqs)*Hz
I = np.logical_and(low <= freqs, freqs <= high)
I2 = np.where(I)[0]
Z = pxx[I2]
freqs = freqs[I2]
phase = phase[I2]
else:
restricted = False
Z = pxx
if log_power:
Z[Z < 1e-20] = 1e-20 # no zeros because we take logs
Z = 10 * np.log10(Z)
if display:
Zp = Z[freqs>0]
phasep = phase[freqs>0]
freqsp = freqs[freqs>0]
plt.subplot(211)
plt.semilogx(freqsp, Zp)
plt.grid()
plt.xlim((freqsp[0], freqsp[-1]))
plt.xlabel('Frequency (Hz)')
plt.ylabel('Power (dB/Hz)') if log_power else plt.ylabel('Power')
plt.subplot(212)
plt.semilogx(freqsp, phasep)
plt.grid()
plt.xlim((freqsp[0], freqsp[-1]))
plt.xlabel('Frequency (Hz)')
plt.ylabel('Phase (rad)')
plt.tight_layout()
return (Z, freqs*Hz, phase)
def get_level(self):
'''
Returns level in dB SPL (RMS) assuming array is in Pascals.
In the case of multi-channel sounds, returns an array of levels
for each channel, otherwise returns a float.
'''
if self.nchannels==1:
rms_value = np.sqrt(np.mean((np.asarray(self)-np.mean(np.asarray(self)))**2))
rms_dB = 20.0*np.log10(rms_value/2e-5)
return rms_dB*dB
else:
return np.array(tuple(self.channel(i).get_level() for i in range(self.nchannels)))
def set_level(self, level):
'''
Sets level in dB SPL (RMS) assuming array is in Pascals. ``level``
should be a value in dB, or a tuple of levels, one for each channel.
'''
rms_dB = self.get_level()
if self.nchannels>1:
level = np.array(level)
if level.size==1:
level = level.repeat(self.nchannels)
level = np.reshape(level, (1, self.nchannels))
rms_dB = np.reshape(rms_dB, (1, self.nchannels))
else:
if not isinstance(level, dB_type):
raise dB_error('Must specify level in dB')
rms_dB = float(rms_dB)
level = float(level)
gain = 10**((level-rms_dB)/20.)
self *= gain
level = property(fget=get_level, fset=set_level, doc='''
Can be used to get or set the level of a sound, which should be in dB.
For single channel sounds a value in dB is used, for multiple channel
sounds a value in dB can be used for setting the level (all channels
will be set to the same level), or a list/tuple/array of levels. It
is assumed that the unit of the sound is Pascals.
''')
[docs] def atlevel(self, level):
'''
Returns the sound at the given level in dB SPL (RMS) assuming array is
in Pascals. ``level`` should be a value in dB, or a tuple of levels,
one for each channel.
'''
newsound = self.copy()
newsound.level = level
return newsound
def get_maxlevel(self):
return np.amax(self.level)*dB
def set_maxlevel(self, level):
self.level += level-self.maxlevel
maxlevel = property(fget=get_maxlevel, fset=set_maxlevel, doc='''
Can be used to set or get the maximum level of a sound. For mono
sounds, this is the same as the level, but for multichannel sounds
it is the maximum level across the channels. Relative level differences
will be preserved. The specified level should be a value in dB, and it
is assumed that the unit of the sound is Pascals.
''')
[docs] def atmaxlevel(self, level):
'''
Returns the sound with the maximum level across channels set to the
given level. Relative level differences will be preserved. The specified
level should be a value in dB and it is assumed that the unit of the
sound is Pascals.
'''
newsound = self.copy()
newsound.maxlevel = level
return newsound
[docs] def ramp(self, when='onset', duration=10*ms, envelope=None, inplace=True):
'''
Adds a ramp on/off to the sound
``when='onset'``
Can take values 'onset', 'offset' or 'both'
``duration=10*ms``
The time over which the ramping happens
``envelope``
A ramping function, if not specified uses ``sin(pi*t/2)**2``. The
function should be a function of one variable ``t`` ranging from
0 to 1, and should increase from ``f(0)=0`` to ``f(0)=1``. The
reverse is applied for the offset ramp.
``inplace``
Whether to apply ramping to current sound or return a new array.
'''
when = when.lower().strip()
if envelope is None: envelope = lambda t:np.sin(np.pi * t / 2) ** 2
if not isinstance(duration, int):
sz = int(np.rint(duration * self.samplerate))
else:
sz = duration
multiplier = envelope(np.reshape(np.linspace(0.0, 1.0, sz), (sz, 1)))
if inplace:
target = self
else:
target = Sound(np.copy(self), self.samplerate)
if when == 'onset' or when == 'both':
target[:sz, :] *= multiplier
if when == 'offset' or when == 'both':
target[target.nsamples-sz:, :] *= multiplier[::-1]
return target
[docs] @check_units(duration=second)
def ramped(self, when='onset', duration=10*ms, envelope=None):
'''
Returns a ramped version of the sound (see :meth:`Sound.ramp`).
'''
return self.ramp(when=when, duration=duration, envelope=envelope, inplace=False)
def fft(self,n=None):
'''
Performs an n-point FFT on the sound object, that is an array of the same size containing the DFT of each
channel. n defaults to the number of samples of the sound, but can be changed manually setting the ``n`` keyword
argument
'''
if n is None:
n=self.shape[0]
res=np.zeros(n,self.nchannels)
for i in range(self.nchannels):
res[:,i]=fft(np.asarray(self)[:,i].flatten(),n=n)
return res
[docs] @staticmethod
@check_units(frequency=Hz, duration=second, samplerate=Hz)
def tone(frequency, duration, phase=0, samplerate=None, nchannels=1):
'''
Returns a pure tone at frequency for duration, using the default
samplerate or the given one. The ``frequency`` and ``phase`` parameters
can be single values, in which case multiple channels can be
specified with the ``nchannels`` argument, or they can be sequences
(lists/tuples/arrays) in which case there is one frequency or phase for
each channel.
'''
samplerate = get_samplerate(samplerate)
duration = get_duration(duration, samplerate)
frequency = np.asarray(frequency)*Hz
phase = np.array(phase)
if frequency.size>nchannels and nchannels==1:
nchannels = frequency.size
if phase.size>nchannels and nchannels==1:
nchannels = phase.size
if frequency.size==nchannels:
frequency.shape = (1, nchannels)
if phase.size==nchannels:
phase.shape =(nchannels, 1)
t = np.arange(0, duration, 1)/samplerate
t.shape = (t.size, 1) # ensures C-order (in contrast to tile(...).T )
x = np.sin(phase + 2.0 * np.pi * frequency * np.tile(t, (1, nchannels)))
return Sound(x, samplerate)
[docs] @staticmethod
@check_units(f0=Hz, duration=second, samplerate=Hz)
def harmoniccomplex(f0, duration, amplitude=1, phase=0, samplerate=None, nchannels=1):
'''
Returns a harmonic complex composed of pure tones at integer multiples
of the fundamental frequency ``f0``.
The ``amplitude`` and
``phase`` keywords can be set to either a single value or an
array of values. In the former case the value is set for all
harmonics, and harmonics up to the sampling frequency are
generated. In the latter each harmonic parameter is set
separately, and the number of harmonics generated corresponds
to the length of the array.
'''
samplerate=get_samplerate(samplerate)
phases = np.array(phase).flatten()
amplitudes = np.array(amplitude).flatten()
if len(phases)>1 or len(amplitudes)>1:
if (len(phases)>1 and len(amplitudes)>1) and (len(phases) != len(amplitudes)):
raise ValueError('Please specify the same number of phases and amplitudes')
Nharmonics = max(len(phases),len(amplitudes))
else:
Nharmonics = int(np.floor( samplerate/(2*f0) ) )
if len(phases) == 1:
phases = np.tile(phase, Nharmonics)
if len(amplitudes) == 1:
amplitudes = np.tile(amplitude, Nharmonics)
x = amplitudes[0]*tone(f0, duration, phase = phases[0],
samplerate = samplerate, nchannels = nchannels)
for i in range(1,Nharmonics):
x += amplitudes[i]*tone((i+1)*f0, duration, phase = phases[i],
samplerate = samplerate, nchannels = nchannels)
return Sound(x,samplerate)
[docs] @staticmethod
@check_units(duration=second, samplerate=Hz)
def whitenoise(duration, samplerate=None, nchannels=1):
'''
Returns a white noise. If the samplerate is not specified, the global
default value will be used.
'''
samplerate = get_samplerate(samplerate)
duration = get_duration(duration,samplerate)
x = randn(duration,nchannels)
return Sound(x, samplerate)
[docs] @staticmethod
@check_units(duration=second, samplerate=Hz)
def powerlawnoise(duration, alpha, samplerate=None, nchannels=1,normalise=False):
'''
Returns a power-law noise for the given duration. Spectral density per unit of bandwidth scales as 1/(f**alpha).
Sample usage::
noise = powerlawnoise(200*ms, 1, samplerate=44100*Hz)
Arguments:
``duration``
Duration of the desired output.
``alpha``
Power law exponent.
``samplerate``
Desired output samplerate
'''
samplerate = get_samplerate(samplerate)
duration = get_duration(duration,samplerate)
# Adapted from http://www.eng.ox.ac.uk/samp/software/powernoise/powernoise.m
# Little MA et al. (2007), "Exploiting nonlinear recurrence and fractal
# scaling properties for voice disorder detection", Biomed Eng Online, 6:23
n=duration
n2=int(n/2)
f=np.array(fftfreq(n,d=1.0/samplerate), dtype=complex)
f.shape=(len(f),1)
f=np.tile(f,(1,nchannels))
if n%2==1:
z=(randn(n2,nchannels)+1j*randn(n2,nchannels))
a2=1.0/( f[1:(n2+1),:]**(alpha/2.0))
else:
z=(randn(n2-1,nchannels)+1j*randn(n2-1,nchannels))
a2=1.0/(f[1:n2,:]**(alpha/2.0))
a2*=z
if n%2==1:
d=np.vstack((np.ones((1,nchannels)),a2,
np.flipud(np.conj(a2))))
else:
d=np.vstack((np.ones((1,nchannels)),a2,
1.0/( abs(f[n2])**(alpha/2.0) )*
randn(1,nchannels),
np.flipud(np.conj(a2))))
x=np.real(ifft(d.flatten()))
x.shape=(n,nchannels)
if normalise:
for i in range(nchannels):
#x[:,i]=normalise_rms(x[:,i])
x[:,i] = ((x[:,i] - np.amin(x[:,i]))/(np.amax(x[:,i]) - np.amin(x[:,i])) - 0.5) * 2;
return Sound(x,samplerate)
[docs] @staticmethod
@check_units(duration=second, samplerate=Hz)
def pinknoise(duration, samplerate=None, nchannels=1, normalise=False):
'''
Returns pink noise, i.e :func:`powerlawnoise` with alpha=1
'''
return Sound.powerlawnoise(duration, 1.0, samplerate=samplerate,
nchannels=nchannels, normalise=normalise)
[docs] @staticmethod
@check_units(duration=second, samplerate=Hz)
def brownnoise(duration, samplerate=None, nchannels=1, normalise=False):
'''
Returns brown noise, i.e :func:`powerlawnoise` with alpha=2
'''
return Sound.powerlawnoise(duration, 2.0, samplerate=samplerate,
nchannels=nchannels, normalise=normalise)
@staticmethod
@check_units(duration=second, samplerate=Hz)
def irns(delay, gain, niter, duration, samplerate=None, nchannels=1):
'''
Returns an IRN_S noise. The iterated ripple noise is obtained trough
a cascade of gain and delay filtering.
For more details: see Yost 1996 or chapter 15 in Hartman Sound Signal Sensation.
'''
if nchannels!=1:
raise ValueError("nchannels!=1 not supported.")
samplerate = get_samplerate(samplerate)
noise=Sound.whitenoise(duration)
splrate=noise.samplerate
x=np.array(noise.T)[0]
IRNfft=fft(x)
Nspl,spl_dur=len(IRNfft),float(1.0/splrate)
w=2*np.pi*fftfreq(Nspl,spl_dur)
d=float(delay)
for k in range(1,niter+1):
nchoosek=factorial(niter)/(factorial(niter-k)*factorial(k))
IRNfft+=nchoosek*(gain**k)*IRNfft*np.exp(-1j*w*k*d)
IRNadd = ifft(IRNfft)
x=np.real(IRNadd)
return Sound(x,samplerate)
@staticmethod
@check_units(duration=second, samplerate=Hz)
def irno(delay, gain, niter, duration, samplerate=None, nchannels=1):
'''
Returns an IRN_O noise. The iterated ripple noise is obtained many attenuated and
delayed version of the original broadband noise.
For more details: see Yost 1996 or chapter 15 in Hartman Sound Signal Sensation.
'''
samplerate = get_samplerate(samplerate)
noise=Sound.whitenoise(duration)
splrate=noise.samplerate
x=np.array(noise.T)[0]
IRNadd=fft(x)
Nspl,spl_dur=len(IRNadd),float(1.0/splrate)
w=2*np.pi*fftfreq(Nspl,spl_dur)
d=float(delay)
for k in range(1,niter+1):
IRNadd+=(gain**k)*IRNadd*np.exp(-1j*w*k*d)
IRNadd = ifft(IRNadd)
x=np.real(IRNadd)
return Sound(x, samplerate)
[docs] @staticmethod
@check_units(samplerate=Hz)
def click(duration=1, peak=None, samplerate=None, nchannels=1):
'''
Returns a click of the given duration (in time or samples)
If ``peak`` is not specified, the amplitude will be 1, otherwise
``peak`` refers to the peak dB SPL of the click, according to the
formula ``28e-6*10**(peak/20.)``.
'''
samplerate = get_samplerate(samplerate)
duration = get_duration(duration, samplerate)
if peak is not None:
if not isinstance(peak, dB_type):
raise dB_error('Peak must be given in dB')
amplitude = 28e-6*10**(float(peak)/20.)
else:
amplitude = 1
x = amplitude*np.ones((duration,nchannels))
return Sound(x, samplerate)
[docs] @staticmethod
@check_units(duration=second, samplerate=Hz)
def clicks(duration, n, interval, peak=None, samplerate=None, nchannels=1):
'''
Returns a series of n clicks (see :func:`click`) separated by interval.
'''
oneclick = Sound.click(duration, peak=peak, samplerate=samplerate)
return oneclick[slice(None, interval)].repeat(n)
[docs] @staticmethod
@check_units(duration=second, samplerate=Hz)
def silence(duration, samplerate=None, nchannels=1):
'''
Returns a silent, zero sound for the given duration. Set nchannels to set the number of channels.
'''
samplerate = get_samplerate(samplerate)
duration = get_duration(duration,samplerate)
x=np.zeros((duration,nchannels))
return Sound(x, samplerate)
[docs] @staticmethod
@check_units(pitch=Hz, duration=second, samplerate=Hz)
def vowel(vowel=None, formants=None, pitch=100*Hz, duration=1*second,
samplerate=None, nchannels=1):
'''
Returns an artifically created spoken vowel sound (following the
source-filter model of speech production) with a given ``pitch``.
The vowel can be specified by either providing ``vowel`` as a string
('a', 'i' or 'u') or by setting ``formants`` to a sequence of formant
frequencies.
The returned sound is normalized to a maximum amplitude of 1.
The implementation is based on the MakeVowel function written by Richard
O. Duda, part of the Auditory Toolbox for Matlab by Malcolm Slaney:
https://engineering.purdue.edu/~malcolm/interval/1998-010/
'''
samplerate = get_samplerate(samplerate)
duration = get_duration(duration, samplerate)
if not (vowel or formants):
raise ValueError('Need either a vowel or a list of formants')
elif (vowel and formants):
raise ValueError('Cannot use both vowel and formants')
if vowel:
if vowel == 'a' or vowel == '/a/':
formants = (730.0*Hz, 1090.0*Hz, 2440.0*Hz)
elif vowel == 'i' or vowel == '/i/':
formants = (270.0*Hz, 2290.0*Hz, 3010.0*Hz)
elif vowel == 'u' or vowel == '/u/':
formants = (300.0*Hz, 870.0*Hz, 2240.0*Hz)
else:
raise ValueError('Unknown vowel: "%s"' % (vowel))
points = np.arange(0, duration - 1, samplerate / pitch)
indices = np.floor(points).astype(int)
y = np.zeros(duration)
y[indices] = (indices + 1) - points
y[indices + 1] = points - indices
# model the sound source (periodic glottal excitation)
a = np.exp(-250.*Hz * 2 * np.pi / samplerate)
y = lfilter([1],[1, 0, -a * a], y.copy())
# model the filtering by the vocal tract
bandwidth = 50.*Hz
for f in formants:
cft = f / samplerate
q = f / bandwidth
rho = np.exp(-np.pi * cft / q)
theta = 2 * np.pi * cft * np.sqrt(1 - 1/(4.0 * q * q))
a2 = -2 * rho * np.cos(theta)
a3 = rho * rho
y = lfilter([1 + a2 + a3], [1, a2, a3], y.copy())
#normalize sound
data = y / np.max(np.abs(y), axis=0)
data.shape = (data.size, 1)
return Sound(np.tile(data, (nchannels, 1)), samplerate=samplerate)
[docs] @staticmethod
def sequence(*args, **kwds):
'''
Returns the sequence of sounds in the list sounds joined together
'''
samplerate = kwds.pop('samplerate', None)
if len(kwds):
raise TypeError('Unexpected keywords to function sequence()')
sounds = []
for arg in args:
if isinstance(arg, (list, tuple)):
sounds.extend(arg)
else:
sounds.append(arg)
if samplerate is None:
samplerate = max(s.samplerate for s in sounds)
rates = np.unique([int(s.samplerate) for s in sounds])
if len(rates)>1:
sounds = tuple(s.resample(samplerate) for s in sounds)
x = np.vstack(sounds)
return Sound(x, samplerate)
[docs] def save(self, filename, normalise=False, samplewidth=2):
'''
Save the sound as a WAV.
If the normalise keyword is set to True, the amplitude of the sound will be
normalised to 1. The samplewidth keyword can be 1 or 2 to save the data as
8 or 16 bit samples.
'''
ext = filename.split('.')[-1].lower()
if ext=='wav':
import wave as sndmodule
elif ext=='aiff' or ext=='aifc':
import aifc as sndmodule
raise NotImplementedError('Can only save as wav soundfiles')
else:
raise NotImplementedError('Can only save as wav soundfiles')
if samplewidth != 1 and samplewidth != 2:
raise ValueError('Sample width must be 1 or 2 bytes.')
scale = {2:2**15-1, 1: 2**7-1}[samplewidth]
if ext=='wav':
meanval = {2:0, 1:2**7}[samplewidth]
dtype = {2:np.int16, 1:np.uint8}[samplewidth]
typecode = {2:'h', 1:'B'}[samplewidth]
else:
meanval = {2:0, 1:2**7}[samplewidth]
dtype = {2:np.int16, 1:np.uint8}[samplewidth]
typecode = {2:'h', 1:'B'}[samplewidth]
w = sndmodule.open(filename, 'wb')
w.setnchannels(self.nchannels)
w.setsampwidth(samplewidth)
w.setframerate(int(self.samplerate))
x = np.array(self,copy=True)
am=np.amax(x)
z = np.zeros(x.shape[0]*self.nchannels, dtype=x.dtype)
x.shape=(x.shape[0],self.nchannels)
for i in range(self.nchannels):
if normalise:
x[:,i] /= am
x[:,i] = (x[:,i]) * scale + meanval
z[i::self.nchannels] = x[::1,i]
data = np.array(z, dtype=dtype)
data = pyarray.array(typecode, data)
try:
out = data.tobytes()
except AttributeError:
out = data.tostring()
w.writeframes(out)
w.close()
[docs] @staticmethod
def load(filename):
'''
Load the file given by filename and returns a Sound object.
Sound file can be either a .wav or a .aif file.
'''
ext = filename.split('.')[-1].lower()
if ext=='wav':
import wave as sndmodule
elif ext=='aif' or ext=='aiff':
import aifc as sndmodule
else:
raise NotImplementedError('Can only load aif or wav soundfiles')
wav = sndmodule.open(filename, "r")
nchannels, sampwidth, framerate, nframes, comptype, compname = wav.getparams()
frames = wav.readframes(nframes * nchannels)
typecode = {2:'h', 1:'B'}[sampwidth]
out = np.frombuffer(frames, dtype=np.dtype(typecode))
scale = {2:2 ** 15, 1:2 ** 7-1}[sampwidth]
meanval = {2:0, 1:2**7}[sampwidth]
data = np.zeros((nframes, nchannels))
for i in range(nchannels):
data[:, i] = out[i::nchannels]
data[:, i] /= scale
data[:, i] -= meanval
return Sound(data, samplerate=framerate*Hz)
def __repr__(self):
arrayrep = repr(np.asarray(self))
arrayrep = '\n'.join(' '+l for l in arrayrep.split('\n'))
return 'Sound(\n'+arrayrep+',\n '+repr(self.samplerate)+')'
def __str__(self):
return 'Sound duration %s, channels %s, samplerate %s' % (self.duration,
self.nchannels,
self.samplerate)
def __reduce__(self):
return (_load_Sound_from_pickle, (np.asarray(self), float(self.samplerate)))
def _load_Sound_from_pickle(arr, samplerate):
return Sound(arr, samplerate=samplerate*Hz)
[docs]def play(*sounds, **kwds):
'''
Plays a sound or sequence of sounds. For example::
play(sound)
play(sound1, sound2)
play([sound1, sound2, sound3])
If ``normalise=True``, the sequence of sounds will be normalised to the
maximum range (-1 to 1), and if ``sleep=True`` the function will wait
until the sounds have finished playing before returning.
'''
normalise = kwds.pop('normalise', False)
sleep = kwds.pop('sleep', False)
if len(kwds):
raise TypeError('Unexpected keyword arguments to function play()')
sound = sequence(*sounds)
sound.play(normalise=normalise, sleep=sleep)
play.__doc__ = Sound.play.__doc__
[docs]def savesound(sound, filename, normalise=False, samplewidth=2):
sound.save(filename, normalise=normalise, samplewidth=samplewidth)
savesound.__doc__ = Sound.save.__doc__
def get_duration(duration,samplerate):
if not isinstance(duration, int):
duration = int(np.rint(duration * samplerate))
return duration
whitenoise = Sound.whitenoise
powerlawnoise = Sound.powerlawnoise
pinknoise = Sound.pinknoise
brownnoise = Sound.brownnoise
irns = Sound.irns
irno = Sound.irno
tone = Sound.tone
harmoniccomplex = Sound.harmoniccomplex
click = Sound.click
clicks = Sound.clicks
silence = Sound.silence
sequence = Sound.sequence
vowel = Sound.vowel
loadsound = Sound.load