Source code for sox.transform

#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
Python wrapper around the SoX library.
This module requires that SoX is installed.
'''

from __future__ import print_function

import os
import random
from pathlib import Path
from typing import List, Optional, Dict, Union, Tuple

import numpy as np
from typing_extensions import Literal

from . import file_info
from .core import ENCODING_VALS, EncodingValue
from .core import SoxError
from .core import VALID_FORMATS
from .core import is_number
from .core import play
from .core import sox
from .log import logger

VERBOSITY_VALS = [0, 1, 2, 3, 4]

ENCODINGS_MAPPING = {
    np.int16: 's16',
    np.int8: 's8',
    np.float32: 'f32',
    np.float64: 'f64',
}

GainType = Literal['amplitude', 'power', 'db']


[docs]class Transformer: '''Audio file transformer. Class which allows multiple effects to be chained to create an output file, saved to output_filepath. Methods ------- set_globals Overwrite the default global arguments. build Execute the current chain of commands to create an output file. build_file Alias of build. build_array Execute the current chain of commands to create an output array. ''' def __init__(self): ''' Attributes ---------- input_format : list of str Input file format arguments that will be passed to SoX. output_format : list of str Output file format arguments that will be bassed to SoX. effects : list of str Effects arguments that will be passed to SoX. effects_log : list of str Ordered sequence of effects applied. globals : list of str Global arguments that will be passed to SoX. ''' self.input_format = {} # type: Dict self.output_format = {} # type : Dict self.effects = [] # type: List[str] self.effects_log = [] # type: List[str] self.globals = [] # type: List[str] self.set_globals()
[docs] def set_globals(self, dither: bool = False, guard: bool = False, multithread: bool = False, replay_gain: bool = False, verbosity: int = 2): '''Sets SoX's global arguments. Overwrites any previously set global arguments. If this function is not explicity called, globals are set to this function's defaults. Parameters ---------- dither : bool, default=False If True, dithering is applied for low files with low bit rates. guard : bool, default=False If True, invokes the gain effect to guard against clipping. multithread : bool, default=False If True, each channel is processed in parallel. replay_gain : bool, default=False If True, applies replay-gain adjustment to input-files. verbosity : int, default=2 SoX's verbosity level. One of: * 0 : No messages are shown at all * 1 : Only error messages are shown. These are generated if SoX cannot complete the requested commands. * 2 : Warning messages are also shown. These are generated if SoX can complete the requested commands, but not exactly according to the requested command parameters, or if clipping occurs. * 3 : Descriptions of SoX’s processing phases are also shown. Useful for seeing exactly how SoX is processing your audio. * 4, >4 : Messages to help with debugging SoX are also shown. ''' if not isinstance(dither, bool): raise ValueError('dither must be a boolean.') if not isinstance(guard, bool): raise ValueError('guard must be a boolean.') if not isinstance(multithread, bool): raise ValueError('multithread must be a boolean.') if not isinstance(replay_gain, bool): raise ValueError('replay_gain must be a boolean.') if verbosity not in VERBOSITY_VALS: raise ValueError( 'Invalid value for VERBOSITY. Must be one {}'.format( VERBOSITY_VALS) ) global_args = [] if not dither: global_args.append('-D') if guard: global_args.append('-G') if multithread: global_args.append('--multi-threaded') if replay_gain: global_args.append('--replay-gain') global_args.append('track') global_args.append('-V{}'.format(verbosity)) self.globals = global_args return self
def _validate_input_format(self, input_format): '''Private helper function for validating input formats ''' file_type = input_format.get('file_type') rate = input_format.get('rate') bits = input_format.get('bits') channels = input_format.get('channels') encoding = input_format.get('encoding') ignore_length = input_format.get('ignore_length', False) if file_type not in VALID_FORMATS + [None]: raise ValueError( 'Invalid file_type. Must be one of {}'.format(VALID_FORMATS) ) if not is_number(rate) and rate is not None: raise ValueError('rate must be a float or None') if rate is not None and rate <= 0: raise ValueError('rate must be a positive number') if not isinstance(bits, int) and bits is not None: raise ValueError('bits must be an int or None') if bits is not None and bits <= 0: raise ValueError('bits must be a positive number') if not isinstance(channels, int) and channels is not None: raise ValueError('channels must be an int or None') if channels is not None and channels <= 0: raise ValueError('channels must be a positive number') if encoding not in ENCODING_VALS + [None]: raise ValueError( 'Invalid encoding {}. Must be one of {}'.format( encoding, ENCODING_VALS) ) if not isinstance(ignore_length, bool): raise ValueError('ignore_length must be a boolean') def _input_format_args(self, input_format): '''Private helper function for set_input_format ''' self._validate_input_format(input_format) file_type = input_format.get('file_type') rate = input_format.get('rate') bits = input_format.get('bits') channels = input_format.get('channels') encoding = input_format.get('encoding') ignore_length = input_format.get('ignore_length', False) input_format_args = [] if file_type is not None: input_format_args.extend(['-t', '{}'.format(file_type)]) if rate is not None: input_format_args.extend(['-r', '{:f}'.format(rate)]) if bits is not None: input_format_args.extend(['-b', '{}'.format(bits)]) if channels is not None: input_format_args.extend(['-c', '{}'.format(channels)]) if encoding is not None: input_format_args.extend(['-e', '{}'.format(encoding)]) if ignore_length: input_format_args.append('--ignore-length') return input_format_args
[docs] def set_input_format(self, file_type: Optional[str] = None, rate: Optional[float] = None, bits: Optional[int] = None, channels: Optional[int] = None, encoding: Optional[EncodingValue] = None, ignore_length: bool = False): '''Sets input file format arguments. This is primarily useful when dealing with audio files without a file extension. Overwrites any previously set input file arguments. If this function is not explicity called the input format is inferred from the file extension or the file's header. Parameters ---------- file_type : str or None, default=None The file type of the input audio file. Should be the same as what the file extension would be, for ex. 'mp3' or 'wav'. rate : float or None, default=None The sample rate of the input audio file. If None the sample rate is inferred. bits : int or None, default=None The number of bits per sample. If None, the number of bits per sample is inferred. channels : int or None, default=None The number of channels in the audio file. If None the number of channels is inferred. encoding : str or None, default=None The audio encoding type. Sometimes needed with file-types that support more than one encoding type. One of: * signed-integer : PCM data stored as signed (‘two’s complement’) integers. Commonly used with a 16 or 24−bit encoding size. A value of 0 represents minimum signal power. * unsigned-integer : PCM data stored as unsigned integers. Commonly used with an 8-bit encoding size. A value of 0 represents maximum signal power. * floating-point : PCM data stored as IEEE 753 single precision (32-bit) or double precision (64-bit) floating-point (‘real’) numbers. A value of 0 represents minimum signal power. * a-law : International telephony standard for logarithmic encoding to 8 bits per sample. It has a precision equivalent to roughly 13-bit PCM and is sometimes encoded with reversed bit-ordering. * u-law : North American telephony standard for logarithmic encoding to 8 bits per sample. A.k.a. μ-law. It has a precision equivalent to roughly 14-bit PCM and is sometimes encoded with reversed bit-ordering. * oki-adpcm : OKI (a.k.a. VOX, Dialogic, or Intel) 4-bit ADPCM; it has a precision equivalent to roughly 12-bit PCM. ADPCM is a form of audio compression that has a good compromise between audio quality and encoding/decoding speed. * ima-adpcm : IMA (a.k.a. DVI) 4-bit ADPCM; it has a precision equivalent to roughly 13-bit PCM. * ms-adpcm : Microsoft 4-bit ADPCM; it has a precision equivalent to roughly 14-bit PCM. * gsm-full-rate : GSM is currently used for the vast majority of the world’s digital wireless telephone calls. It utilises several audio formats with different bit-rates and associated speech quality. SoX has support for GSM’s original 13kbps ‘Full Rate’ audio format. It is usually CPU-intensive to work with GSM audio. ignore_length : bool, default=False If True, overrides an (incorrect) audio length given in an audio file’s header. If this option is given then SoX will keep reading audio until it reaches the end of the input file. ''' input_format = { 'file_type': file_type, 'rate': rate, 'bits': bits, 'channels': channels, 'encoding': encoding, 'ignore_length': ignore_length } self._validate_input_format(input_format) self.input_format = input_format
def _validate_output_format(self, output_format): '''Private helper function for validating input formats ''' file_type = output_format.get('file_type') rate = output_format.get('rate') bits = output_format.get('bits') channels = output_format.get('channels') encoding = output_format.get('encoding') comments = output_format.get('comments') append_comments = output_format.get('append_comments', True) if file_type not in VALID_FORMATS + [None]: raise ValueError( 'Invalid file_type. Must be one of {}'.format(VALID_FORMATS) ) if not is_number(rate) and rate is not None: raise ValueError('rate must be a float or None') if rate is not None and rate <= 0: raise ValueError('rate must be a positive number') if not isinstance(bits, int) and bits is not None: raise ValueError('bits must be an int or None') if bits is not None and bits <= 0: raise ValueError('bits must be a positive number') if not isinstance(channels, int) and channels is not None: raise ValueError('channels must be an int or None') if channels is not None and channels <= 0: raise ValueError('channels must be a positive number') if encoding not in ENCODING_VALS + [None]: raise ValueError( 'Invalid encoding. Must be one of {}'.format(ENCODING_VALS) ) if comments is not None and not isinstance(comments, str): raise ValueError('comments must be a string or None') if not isinstance(append_comments, bool): raise ValueError('append_comments must be a boolean') def _output_format_args(self, output_format): '''Private helper function for set_output_format ''' self._validate_output_format(output_format) file_type = output_format.get('file_type') rate = output_format.get('rate') bits = output_format.get('bits') channels = output_format.get('channels') encoding = output_format.get('encoding') comments = output_format.get('comments') append_comments = output_format.get('append_comments', True) output_format_args = [] if file_type is not None: output_format_args.extend(['-t', '{}'.format(file_type)]) if rate is not None: output_format_args.extend(['-r', '{:f}'.format(rate)]) if bits is not None: output_format_args.extend(['-b', '{}'.format(bits)]) if channels is not None: output_format_args.extend(['-c', '{}'.format(channels)]) if encoding is not None: output_format_args.extend(['-e', '{}'.format(encoding)]) if comments is not None: if append_comments: output_format_args.extend(['--add-comment', comments]) else: output_format_args.extend(['--comment', comments]) return output_format_args
[docs] def set_output_format(self, file_type: Optional[str] = None, rate: Optional[float] = None, bits: Optional[int] = None, channels: Optional[int] = None, encoding: Optional[EncodingValue] = None, comments: Optional[str] = None, append_comments: bool = True): '''Sets output file format arguments. These arguments will overwrite any format related arguments supplied by other effects (e.g. rate). If this function is not explicity called the output format is inferred from the file extension or the file's header. Parameters ---------- file_type : str or None, default=None The file type of the output audio file. Should be the same as what the file extension would be, for ex. 'mp3' or 'wav'. rate : float or None, default=None The sample rate of the output audio file. If None the sample rate is inferred. bits : int or None, default=None The number of bits per sample. If None, the number of bits per sample is inferred. channels : int or None, default=None The number of channels in the audio file. If None the number of channels is inferred. encoding : str or None, default=None The audio encoding type. Sometimes needed with file-types that support more than one encoding type. One of: * signed-integer : PCM data stored as signed (‘two’s complement’) integers. Commonly used with a 16 or 24−bit encoding size. A value of 0 represents minimum signal power. * unsigned-integer : PCM data stored as unsigned integers. Commonly used with an 8-bit encoding size. A value of 0 represents maximum signal power. * floating-point : PCM data stored as IEEE 753 single precision (32-bit) or double precision (64-bit) floating-point (‘real’) numbers. A value of 0 represents minimum signal power. * a-law : International telephony standard for logarithmic encoding to 8 bits per sample. It has a precision equivalent to roughly 13-bit PCM and is sometimes encoded with reversed bit-ordering. * u-law : North American telephony standard for logarithmic encoding to 8 bits per sample. A.k.a. μ-law. It has a precision equivalent to roughly 14-bit PCM and is sometimes encoded with reversed bit-ordering. * oki-adpcm : OKI (a.k.a. VOX, Dialogic, or Intel) 4-bit ADPCM; it has a precision equivalent to roughly 12-bit PCM. ADPCM is a form of audio compression that has a good compromise between audio quality and encoding/decoding speed. * ima-adpcm : IMA (a.k.a. DVI) 4-bit ADPCM; it has a precision equivalent to roughly 13-bit PCM. * ms-adpcm : Microsoft 4-bit ADPCM; it has a precision equivalent to roughly 14-bit PCM. * gsm-full-rate : GSM is currently used for the vast majority of the world’s digital wireless telephone calls. It utilises several audio formats with different bit-rates and associated speech quality. SoX has support for GSM’s original 13kbps ‘Full Rate’ audio format. It is usually CPU-intensive to work with GSM audio. comments : str or None, default=None If not None, the string is added as a comment in the header of the output audio file. If None, no comments are added. append_comments : bool, default=True If True, comment strings are appended to SoX's default comments. If False, the supplied comment replaces the existing comment. ''' output_format = { 'file_type': file_type, 'rate': rate, 'bits': bits, 'channels': channels, 'encoding': encoding, 'comments': comments, 'append_comments': append_comments } self._validate_output_format(output_format) self.output_format = output_format
[docs] def clear_effects(self): '''Remove all effects processes. ''' self.effects = list() self.effects_log = list() return self
def _parse_inputs(self, input_filepath, input_array, sample_rate_in): '''Private helper function for parsing inputs to build and build_array Parameters ---------- input_filepath : str or None Either path to input audio file or None. input_array : np.ndarray or None A np.ndarray of an waveform with shape (n_samples, n_channels) or None sample_rate_in : int or None Sample rate of input_array or None Returns ------- input_format : dict Input format dictionary input_filepath : str Formatted input filepath. ''' if input_filepath is not None and input_array is not None: raise ValueError( "Only one of input_filepath and input_array may be specified" ) # set input parameters if input_filepath is not None: file_info.validate_input_file(input_filepath) input_format = self.input_format if input_format.get('channels') is None: input_format['channels'] = file_info.channels(input_filepath) elif input_array is not None: if not isinstance(input_array, np.ndarray): raise TypeError("input_array must be a numpy array or None") if sample_rate_in is None: raise ValueError( "sample_rate_in must be specified for array inputs" ) input_filepath = '-' input_format = { 'file_type': ENCODINGS_MAPPING[input_array.dtype.type], 'rate': sample_rate_in, 'bits': None, 'channels': ( input_array.shape[-1] if len(input_array.shape) > 1 else 1 ), 'encoding': None, 'ignore_length': False } else: raise ValueError( "One of input_filepath or input_array must be specified" ) return input_format, input_filepath
[docs] def build(self, input_filepath: Optional[Union[str, Path]] = None, output_filepath: Optional[Union[str, Path]] = None, input_array: Optional[str] = None, sample_rate_in: Optional[float] = None, extra_args: Optional[List[str]] = None, return_output: bool = False): '''Given an input file or array, creates an output_file on disk by executing the current set of commands. This function returns True on success. If return_output is True, this function returns a triple of (status, out, err), giving the success state, along with stdout and stderr returned by sox. Parameters ---------- input_filepath : str or None Either path to input audio file or None for array input. output_filepath : str Path to desired output file. If a file already exists at the given path, the file will be overwritten. If '-n', no file is created. input_array : np.ndarray or None An np.ndarray of an waveform with shape (n_samples, n_channels). sample_rate_in must also be provided. If None, input_filepath must be specified. sample_rate_in : int Sample rate of input_array. This argument is ignored if input_array is None. extra_args : list or None, default=None If a list is given, these additional arguments are passed to SoX at the end of the list of effects. Don't use this argument unless you know exactly what you're doing! return_output : bool, default=False If True, returns the status and information sent to stderr and stdout as a tuple (status, stdout, stderr). If output_filepath is None, return_output=True by default. If False, returns True on success. Returns ------- status : bool True on success. out : str (optional) This is not returned unless return_output is True. When returned, captures the stdout produced by sox. err : str (optional) This is not returned unless return_output is True. When returned, captures the stderr produced by sox. Examples -------- >>> import numpy as np >>> import sox >>> tfm = sox.Transformer() >>> sample_rate = 44100 >>> y = np.sin(2 * np.pi * 440.0 * np.arange(sample_rate * 1.0) / sample_rate) file in, file out - basic usage >>> status = tfm.build('path/to/input.wav', 'path/to/output.mp3') file in, file out - equivalent usage >>> status = tfm.build( input_filepath='path/to/input.wav', output_filepath='path/to/output.mp3' ) array in, file out >>> status = tfm.build( input_array=y, sample_rate_in=sample_rate, output_filepath='path/to/output.mp3' ) ''' input_format, input_filepath = self._parse_inputs( input_filepath, input_array, sample_rate_in ) if output_filepath is None: raise ValueError("output_filepath is not specified!") # set output parameters if input_filepath == output_filepath: raise ValueError( "input_filepath must be different from output_filepath." ) file_info.validate_output_file(output_filepath) args = [] args.extend(self.globals) args.extend(self._input_format_args(input_format)) args.append(input_filepath) args.extend(self._output_format_args(self.output_format)) args.append(output_filepath) args.extend(self.effects) if extra_args is not None: if not isinstance(extra_args, list): raise ValueError("extra_args must be a list.") args.extend(extra_args) status, out, err = sox(args, input_array, True) if status != 0: raise SoxError( "Stdout: {}\nStderr: {}".format(out, err) ) logger.info( "Created %s with effects: %s", output_filepath, " ".join(self.effects_log) ) if return_output: return status, out, err return True
[docs] def build_file(self, input_filepath: Optional[Union[str, Path]] = None, output_filepath: Optional[Union[str, Path]] = None, input_array: Optional[np.ndarray] = None, sample_rate_in: Optional[float] = None, extra_args: Optional[List[str]] = None, return_output: bool = False): '''An alias for build. Given an input file or array, creates an output_file on disk by executing the current set of commands. This function returns True on success. If return_output is True, this function returns a triple of (status, out, err), giving the success state, along with stdout and stderr returned by sox. Parameters ---------- input_filepath : str or None Either path to input audio file or None for array input. output_filepath : str Path to desired output file. If a file already exists at the given path, the file will be overwritten. If '-n', no file is created. input_array : np.ndarray or None An np.ndarray of an waveform with shape (n_samples, n_channels). sample_rate_in must also be provided. If None, input_filepath must be specified. sample_rate_in : int Sample rate of input_array. This argument is ignored if input_array is None. extra_args : list or None, default=None If a list is given, these additional arguments are passed to SoX at the end of the list of effects. Don't use this argument unless you know exactly what you're doing! return_output : bool, default=False If True, returns the status and information sent to stderr and stdout as a tuple (status, stdout, stderr). If output_filepath is None, return_output=True by default. If False, returns True on success. Returns ------- status : bool True on success. out : str (optional) This is not returned unless return_output is True. When returned, captures the stdout produced by sox. err : str (optional) This is not returned unless return_output is True. When returned, captures the stderr produced by sox. Examples -------- >>> import numpy as np >>> import sox >>> tfm = sox.Transformer() >>> sample_rate = 44100 >>> y = np.sin(2 * np.pi * 440.0 * np.arange(sample_rate * 1.0) / sample_rate) file in, file out - basic usage >>> status = tfm.build('path/to/input.wav', 'path/to/output.mp3') file in, file out - equivalent usage >>> status = tfm.build( input_filepath='path/to/input.wav', output_filepath='path/to/output.mp3' ) array in, file out >>> status = tfm.build( input_array=y, sample_rate_in=sample_rate, output_filepath='path/to/output.mp3' ) ''' return self.build( input_filepath, output_filepath, input_array, sample_rate_in, extra_args, return_output )
[docs] def build_array(self, input_filepath: Optional[Union[str, Path]] = None, input_array: Optional[np.ndarray] = None, sample_rate_in: Optional[float] = None, extra_args: Optional[List[str]] = None): '''Given an input file or array, returns the ouput as a numpy array by executing the current set of commands. By default the array will have the same sample rate as the input file unless otherwise specified using set_output_format. Functions such as rate, channels and convert will be ignored! Parameters ---------- input_filepath : str or None Either path to input audio file or None. input_array : np.ndarray or None A np.ndarray of an waveform with shape (n_samples, n_channels). If this argument is passed, sample_rate_in must also be provided. If None, input_filepath must be specified. sample_rate_in : int Sample rate of input_array. This argument is ignored if input_array is None. extra_args : list or None, default=None If a list is given, these additional arguments are passed to SoX at the end of the list of effects. Don't use this argument unless you know exactly what you're doing! Returns ------- output_array : np.ndarray Output audio as a numpy array Examples -------- >>> import numpy as np >>> import sox >>> tfm = sox.Transformer() >>> sample_rate = 44100 >>> y = np.sin(2 * np.pi * 440.0 * np.arange(sample_rate * 1.0) / sample_rate) file in, array out >>> output_array = tfm.build(input_filepath='path/to/input.wav') array in, array out >>> output_array = tfm.build(input_array=y, sample_rate_in=sample_rate) specifying the output sample rate >>> tfm.set_output_format(rate=8000) >>> output_array = tfm.build(input_array=y, sample_rate_in=sample_rate) if an effect changes the number of channels, you must explicitly specify the number of output channels >>> tfm.remix(remix_dictionary={1: [1], 2: [1], 3: [1]}) >>> tfm.set_output_format(channels=3) >>> output_array = tfm.build(input_array=y, sample_rate_in=sample_rate) ''' input_format, input_filepath = self._parse_inputs( input_filepath, input_array, sample_rate_in ) # check if any of the below commands are part of the effects chain ignored_commands = ['rate', 'channels', 'convert'] if set(ignored_commands) & set(self.effects_log): logger.warning( "When outputting to an array, rate, channels and convert " + "effects may be ignored. Use set_output_format() to " + "specify output formats." ) output_filepath = '-' if input_format.get('file_type') is None: encoding_out = np.int16 else: encoding_out = [ k for k, v in ENCODINGS_MAPPING.items() if input_format['file_type'] == v ][0] n_bits = np.dtype(encoding_out).itemsize * 8 output_format = { 'file_type': 'raw', 'rate': sample_rate_in, 'bits': n_bits, 'channels': input_format['channels'], 'encoding': None, 'comments': None, 'append_comments': True, } if self.output_format.get('rate') is not None: output_format['rate'] = self.output_format['rate'] if self.output_format.get('channels') is not None: output_format['channels'] = self.output_format['channels'] if self.output_format.get('bits') is not None: n_bits = self.output_format['bits'] output_format['bits'] = n_bits if n_bits == 8: encoding_out = np.int8 elif n_bits == 16: encoding_out = np.int16 elif n_bits == 32: encoding_out = np.float32 elif n_bits == 64: encoding_out = np.float64 else: raise ValueError("invalid n_bits {}".format(n_bits)) args = [] args.extend(self.globals) args.extend(self._input_format_args(input_format)) args.append(input_filepath) args.extend(self._output_format_args(output_format)) args.append(output_filepath) args.extend(self.effects) if extra_args is not None: if not isinstance(extra_args, list): raise ValueError("extra_args must be a list.") args.extend(extra_args) status, out, err = sox(args, input_array, False) if status != 0: raise SoxError( "Stdout: {}\nStderr: {}".format(out, err) ) out = np.frombuffer(out, dtype=encoding_out) if output_format['channels'] > 1: out = out.reshape( ( output_format['channels'], int(len(out) / output_format['channels']) ), order='F' ).T logger.info( "Created array with effects: %s", " ".join(self.effects_log) ) return out
[docs] def preview(self, input_filepath: Union[str, Path]): '''Play a preview of the output with the current set of effects Parameters ---------- input_filepath : str Path to input audio file. ''' args = ["play", "--no-show-progress"] args.extend(self.globals) args.extend(self.input_format) args.append(input_filepath) args.extend(self.effects) play(args)
[docs] def allpass(self, frequency: float, width_q: float = 2.0): '''Apply a two-pole all-pass filter. An all-pass filter changes the audio’s frequency to phase relationship without changing its frequency to amplitude relationship. The filter is described in detail in at http://musicdsp.org/files/Audio-EQ-Cookbook.txt Parameters ---------- frequency : float The filter's center frequency in Hz. width_q : float, default=2.0 The filter's width as a Q-factor. See Also -------- equalizer, highpass, lowpass, sinc ''' if not is_number(frequency) or frequency <= 0: raise ValueError("frequency must be a positive number.") if not is_number(width_q) or width_q <= 0: raise ValueError("width_q must be a positive number.") effect_args = [ 'allpass', '{:f}'.format(frequency), '{:f}q'.format(width_q) ] self.effects.extend(effect_args) self.effects_log.append('allpass') return self
[docs] def bandpass(self, frequency: float, width_q: float = 2.0, constant_skirt: bool = False): '''Apply a two-pole Butterworth band-pass filter with the given central frequency, and (3dB-point) band-width. The filter rolls off at 6dB per octave (20dB per decade) and is described in detail in http://musicdsp.org/files/Audio-EQ-Cookbook.txt Parameters ---------- frequency : float The filter's center frequency in Hz. width_q : float, default=2.0 The filter's width as a Q-factor. constant_skirt : bool, default=False If True, selects constant skirt gain (peak gain = width_q). If False, selects constant 0dB peak gain. See Also -------- bandreject, sinc ''' if not is_number(frequency) or frequency <= 0: raise ValueError("frequency must be a positive number.") if not is_number(width_q) or width_q <= 0: raise ValueError("width_q must be a positive number.") if not isinstance(constant_skirt, bool): raise ValueError("constant_skirt must be a boolean.") effect_args = ['bandpass'] if constant_skirt: effect_args.append('-c') effect_args.extend(['{:f}'.format(frequency), '{:f}q'.format(width_q)]) self.effects.extend(effect_args) self.effects_log.append('bandpass') return self
[docs] def bandreject(self, frequency: float, width_q: float = 2.0, constant_skirt: bool = False): '''Apply a two-pole Butterworth band-reject filter with the given central frequency, and (3dB-point) band-width. The filter rolls off at 6dB per octave (20dB per decade) and is described in detail in http://musicdsp.org/files/Audio-EQ-Cookbook.txt Parameters ---------- frequency : float The filter's center frequency in Hz. width_q : float, default=2.0 The filter's width as a Q-factor. constant_skirt : bool, default=False If True, selects constant skirt gain (peak gain = width_q). If False, selects constant 0dB peak gain. See Also -------- bandreject, sinc ''' if not is_number(frequency) or frequency <= 0: raise ValueError("frequency must be a positive number.") if not is_number(width_q) or width_q <= 0: raise ValueError("width_q must be a positive number.") if not isinstance(constant_skirt, bool): raise ValueError("constant_skirt must be a boolean.") effect_args = ['bandreject'] if constant_skirt: effect_args.append('-c') effect_args.extend(['{:f}'.format(frequency), '{:f}q'.format(width_q)]) self.effects.extend(effect_args) self.effects_log.append('bandreject') return self
[docs] def bass(self, gain_db: float, frequency: float = 100.0, slope: float = 0.5): '''Boost or cut the bass (lower) frequencies of the audio using a two-pole shelving filter with a response similar to that of a standard hi-fi’s tone-controls. This is also known as shelving equalisation. The filters are described in detail in http://musicdsp.org/files/Audio-EQ-Cookbook.txt Parameters ---------- gain_db : float The gain at 0 Hz. For a large cut use -20, for a large boost use 20. frequency : float, default=100.0 The filter's cutoff frequency in Hz. slope : float, default=0.5 The steepness of the filter's shelf transition. For a gentle slope use 0.3, and use 1.0 for a steep slope. See Also -------- treble, equalizer ''' if not is_number(gain_db): raise ValueError("gain_db must be a number") if not is_number(frequency) or frequency <= 0: raise ValueError("frequency must be a positive number.") if not is_number(slope) or slope <= 0 or slope > 1.0: raise ValueError("width_q must be a positive number.") effect_args = [ 'bass', '{:f}'.format(gain_db), '{:f}'.format(frequency), '{:f}s'.format(slope) ] self.effects.extend(effect_args) self.effects_log.append('bass') return self
[docs] def bend(self, n_bends: int, start_times: List[float], end_times: List[float], cents: List[float], frame_rate: int = 25, oversample_rate: int = 16): '''Changes pitch by specified amounts at specified times. The pitch-bending algorithm utilises the Discrete Fourier Transform (DFT) at a particular frame rate and over-sampling rate. Parameters ---------- n_bends : int The number of intervals to pitch shift start_times : list of floats A list of absolute start times (in seconds), in order end_times : list of floats A list of absolute end times (in seconds) in order. [start_time, end_time] intervals may not overlap! cents : list of floats A list of pitch shifts in cents. A positive value shifts the pitch up, a negative value shifts the pitch down. frame_rate : int, default=25 The number of DFT frames to process per second, between 10 and 80 oversample_rate: int, default=16 The number of frames to over sample per second, between 4 and 32 See Also -------- pitch ''' if not isinstance(n_bends, int) or n_bends < 1: raise ValueError("n_bends must be a positive integer.") if not isinstance(start_times, list) or len(start_times) != n_bends: raise ValueError("start_times must be a list of length n_bends.") if any([(not is_number(p) or p <= 0) for p in start_times]): raise ValueError("start_times must be positive floats.") if sorted(start_times) != start_times: raise ValueError("start_times must be in increasing order.") if not isinstance(end_times, list) or len(end_times) != n_bends: raise ValueError("end_times must be a list of length n_bends.") if any([(not is_number(p) or p <= 0) for p in end_times]): raise ValueError("end_times must be positive floats.") if sorted(end_times) != end_times: raise ValueError("end_times must be in increasing order.") if any([e <= s for s, e in zip(start_times, end_times)]): raise ValueError( "end_times must be element-wise greater than start_times." ) if any([e > s for s, e in zip(start_times[1:], end_times[:-1])]): raise ValueError( "[start_time, end_time] intervals must be non-overlapping." ) if not isinstance(cents, list) or len(cents) != n_bends: raise ValueError("cents must be a list of length n_bends.") if any([not is_number(p) for p in cents]): raise ValueError("elements of cents must be floats.") if (not isinstance(frame_rate, int) or frame_rate < 10 or frame_rate > 80): raise ValueError("frame_rate must be an integer between 10 and 80") if (not isinstance(oversample_rate, int) or oversample_rate < 4 or oversample_rate > 32): raise ValueError( "oversample_rate must be an integer between 4 and 32." ) effect_args = [ 'bend', '-f', '{}'.format(frame_rate), '-o', '{}'.format(oversample_rate) ] last = 0 for i in range(n_bends): t_start = round(start_times[i] - last, 2) t_end = round(end_times[i] - start_times[i], 2) effect_args.append( '{:f},{:f},{:f}'.format(t_start, cents[i], t_end) ) last = end_times[i] self.effects.extend(effect_args) self.effects_log.append('bend') return self
[docs] def biquad(self, b: List[float], a: List[float]): '''Apply a biquad IIR filter with the given coefficients. Parameters ---------- b : list of floats Numerator coefficients. Must be length 3 a : list of floats Denominator coefficients. Must be length 3 See Also -------- fir, treble, bass, equalizer ''' if not isinstance(b, list): raise ValueError('b must be a list.') if not isinstance(a, list): raise ValueError('a must be a list.') if len(b) != 3: raise ValueError('b must be a length 3 list.') if len(a) != 3: raise ValueError('a must be a length 3 list.') if not all([is_number(b_val) for b_val in b]): raise ValueError('all elements of b must be numbers.') if not all([is_number(a_val) for a_val in a]): raise ValueError('all elements of a must be numbers.') effect_args = [ 'biquad', '{:f}'.format(b[0]), '{:f}'.format(b[1]), '{:f}'.format(b[2]), '{:f}'.format(a[0]), '{:f}'.format(a[1]), '{:f}'.format(a[2]) ] self.effects.extend(effect_args) self.effects_log.append('biquad') return self
[docs] def channels(self, n_channels: int): '''Change the number of channels in the audio signal. If decreasing the number of channels it mixes channels together, if increasing the number of channels it duplicates. Note: This overrides arguments used in the convert effect! Parameters ---------- n_channels : int Desired number of channels. See Also -------- convert ''' if not isinstance(n_channels, int) or n_channels <= 0: raise ValueError('n_channels must be a positive integer.') effect_args = ['channels', '{}'.format(n_channels)] self.effects.extend(effect_args) self.effects_log.append('channels') return self
[docs] def chorus(self, gain_in: float = 0.5, gain_out: float = 0.9, n_voices: int = 3, delays: Optional[List[float]] = None, decays: Optional[List[float]] = None, speeds: Optional[List[float]] = None, depths: Optional[List[float]] = None, shapes: Optional[List[Literal['s', 't']]] = None): '''Add a chorus effect to the audio. This can makeasingle vocal sound like a chorus, but can also be applied to instrumentation. Chorus resembles an echo effect with a short delay, but whereas with echo the delay is constant, with chorus, it is varied using sinusoidal or triangular modulation. The modulation depth defines the range the modulated delay is played before or after the delay. Hence the delayed sound will sound slower or faster, that is the delayed sound tuned around the original one, like in a chorus where some vocals are slightly off key. Parameters ---------- gain_in : float, default=0.3 The time in seconds over which the instantaneous level of the input signal is averaged to determine increases in volume. gain_out : float, default=0.8 The time in seconds over which the instantaneous level of the input signal is averaged to determine decreases in volume. n_voices : int, default=3 The number of voices in the chorus effect. delays : list of floats > 20 or None, default=None If a list, the list of delays (in miliseconds) of length n_voices. If None, the individual delay parameters are chosen automatically to be between 40 and 60 miliseconds. decays : list of floats or None, default=None If a list, the list of decays (as a fraction of gain_in) of length n_voices. If None, the individual decay parameters are chosen automatically to be between 0.3 and 0.4. speeds : list of floats or None, default=None If a list, the list of modulation speeds (in Hz) of length n_voices If None, the individual speed parameters are chosen automatically to be between 0.25 and 0.4 Hz. depths : list of floats or None, default=None If a list, the list of depths (in miliseconds) of length n_voices. If None, the individual delay parameters are chosen automatically to be between 1 and 3 miliseconds. shapes : list of 's' or 't' or None, default=None If a list, the list of modulation shapes - 's' for sinusoidal or 't' for triangular - of length n_voices. If None, the individual shapes are chosen automatically. ''' if not is_number(gain_in) or gain_in <= 0 or gain_in > 1: raise ValueError("gain_in must be a number between 0 and 1.") if not is_number(gain_out) or gain_out <= 0 or gain_out > 1: raise ValueError("gain_out must be a number between 0 and 1.") if not isinstance(n_voices, int) or n_voices <= 0: raise ValueError("n_voices must be a positive integer.") # validate delays if not (delays is None or isinstance(delays, list)): raise ValueError("delays must be a list or None") if delays is not None: if len(delays) != n_voices: raise ValueError("the length of delays must equal n_voices") if any((not is_number(p) or p < 20) for p in delays): raise ValueError("the elements of delays must be numbers > 20") else: delays = [random.uniform(40, 60) for _ in range(n_voices)] # validate decays if not (decays is None or isinstance(decays, list)): raise ValueError("decays must be a list or None") if decays is not None: if len(decays) != n_voices: raise ValueError("the length of decays must equal n_voices") if any((not is_number(p) or p <= 0 or p > 1) for p in decays): raise ValueError( "the elements of decays must be between 0 and 1" ) else: decays = [random.uniform(0.3, 0.4) for _ in range(n_voices)] # validate speeds if not (speeds is None or isinstance(speeds, list)): raise ValueError("speeds must be a list or None") if speeds is not None: if len(speeds) != n_voices: raise ValueError("the length of speeds must equal n_voices") if any((not is_number(p) or p <= 0) for p in speeds): raise ValueError("the elements of speeds must be numbers > 0") else: speeds = [random.uniform(0.25, 0.4) for _ in range(n_voices)] # validate depths if not (depths is None or isinstance(depths, list)): raise ValueError("depths must be a list or None") if depths is not None: if len(depths) != n_voices: raise ValueError("the length of depths must equal n_voices") if any((not is_number(p) or p <= 0) for p in depths): raise ValueError("the elements of depths must be numbers > 0") else: depths = [random.uniform(1.0, 3.0) for _ in range(n_voices)] # validate shapes if not (shapes is None or isinstance(shapes, list)): raise ValueError("shapes must be a list or None") if shapes is not None: if len(shapes) != n_voices: raise ValueError("the length of shapes must equal n_voices") if any((p not in ['t', 's']) for p in shapes): raise ValueError("the elements of shapes must be 's' or 't'") else: shapes = [random.choice(['t', 's']) for _ in range(n_voices)] effect_args = ['chorus', '{}'.format(gain_in), '{}'.format(gain_out)] for i in range(n_voices): effect_args.extend([ '{:f}'.format(delays[i]), '{:f}'.format(decays[i]), '{:f}'.format(speeds[i]), '{:f}'.format(depths[i]), '-{}'.format(shapes[i]) ]) self.effects.extend(effect_args) self.effects_log.append('chorus') return self
[docs] def compand(self, attack_time: float = 0.3, decay_time: float = 0.8, soft_knee_db: float = 6.0, tf_points: List[Tuple[float, float]] = [(-70, -70), (-60, -20), (0, 0)], ): '''Compand (compress or expand) the dynamic range of the audio. Parameters ---------- attack_time : float, default=0.3 The time in seconds over which the instantaneous level of the input signal is averaged to determine increases in volume. decay_time : float, default=0.8 The time in seconds over which the instantaneous level of the input signal is averaged to determine decreases in volume. soft_knee_db : float or None, default=6.0 The ammount (in dB) for which the points at where adjacent line segments on the transfer function meet will be rounded. If None, no soft_knee is applied. tf_points : list of tuples Transfer function points as a list of tuples corresponding to points in (dB, dB) defining the compander's transfer function. See Also -------- mcompand, contrast ''' if not is_number(attack_time) or attack_time <= 0: raise ValueError("attack_time must be a positive number.") if not is_number(decay_time) or decay_time <= 0: raise ValueError("decay_time must be a positive number.") if attack_time > decay_time: logger.warning( "attack_time is larger than decay_time.\n" "For most situations, attack_time should be shorter than " "decay time because the human ear is more sensitive to sudden " "loud music than sudden soft music." ) if not (is_number(soft_knee_db) or soft_knee_db is None): raise ValueError("soft_knee_db must be a number or None.") if not isinstance(tf_points, list): raise TypeError("tf_points must be a list.") if len(tf_points) == 0: raise ValueError("tf_points must have at least one point.") if any(not isinstance(pair, tuple) for pair in tf_points): raise ValueError("elements of tf_points must be pairs") if any(len(pair) != 2 for pair in tf_points): raise ValueError("Tuples in tf_points must be length 2") if any(not (is_number(p[0]) and is_number(p[1])) for p in tf_points): raise ValueError("Tuples in tf_points must be pairs of numbers.") if any((p[0] > 0 or p[1] > 0) for p in tf_points): raise ValueError("Tuple values in tf_points must be <= 0 (dB).") if len(tf_points) > len(set([p[0] for p in tf_points])): raise ValueError("Found duplicate x-value in tf_points.") tf_points = sorted( tf_points, key=lambda tf_points: tf_points[0] ) transfer_list = [] for point in tf_points: transfer_list.extend([ "{:f}".format(point[0]), "{:f}".format(point[1]) ]) effect_args = [ 'compand', "{:f},{:f}".format(attack_time, decay_time) ] if soft_knee_db is not None: effect_args.append( "{:f}:{}".format(soft_knee_db, ",".join(transfer_list)) ) else: effect_args.append(",".join(transfer_list)) self.effects.extend(effect_args) self.effects_log.append('compand') return self
[docs] def contrast(self, amount=75): '''Comparable with compression, this effect modifies an audio signal to make it sound louder. Parameters ---------- amount : float Amount of enhancement between 0 and 100. See Also -------- compand, mcompand ''' if not is_number(amount) or amount < 0 or amount > 100: raise ValueError('amount must be a number between 0 and 100.') effect_args = ['contrast', '{:f}'.format(amount)] self.effects.extend(effect_args) self.effects_log.append('contrast') return self
[docs] def convert(self, samplerate: Optional[float] = None, n_channels: Optional[int] = None, bitdepth: Optional[int] = None): '''Converts output audio to the specified format. Parameters ---------- samplerate : float, default=None Desired samplerate. If None, defaults to the same as input. n_channels : int, default=None Desired number of channels. If None, defaults to the same as input. bitdepth : int, default=None Desired bitdepth. If None, defaults to the same as input. See Also -------- rate ''' bitdepths = [8, 16, 24, 32, 64] if bitdepth is not None: if bitdepth not in bitdepths: raise ValueError( "bitdepth must be one of {}.".format(str(bitdepths)) ) self.output_format['bits'] = bitdepth if n_channels is not None: if not isinstance(n_channels, int) or n_channels <= 0: raise ValueError( "n_channels must be a positive integer." ) self.output_format['channels'] = n_channels if samplerate is not None: if not is_number(samplerate) or samplerate <= 0: raise ValueError("samplerate must be a positive number.") self.rate(samplerate) return self
[docs] def dcshift(self, shift: float = 0.0): '''Apply a DC shift to the audio. Parameters ---------- shift : float Amount to shift audio between -2 and 2. (Audio is between -1 and 1) See Also -------- highpass ''' if not is_number(shift) or shift < -2 or shift > 2: raise ValueError('shift must be a number between -2 and 2.') effect_args = ['dcshift', '{:f}'.format(shift)] self.effects.extend(effect_args) self.effects_log.append('dcshift') return self
[docs] def deemph(self): '''Apply Compact Disc (IEC 60908) de-emphasis (a treble attenuation shelving filter). Pre-emphasis was applied in the mastering of some CDs issued in the early 1980s. These included many classical music albums, as well as now sought-after issues of albums by The Beatles, Pink Floyd and others. Pre-emphasis should be removed at playback time by a de-emphasis filter in the playback device. However, not all modern CD players have this filter, and very few PC CD drives have it; playing pre-emphasised audio without the correct de-emphasis filter results in audio that sounds harsh and is far from what its creators intended. The de-emphasis filter is implemented as a biquad and requires the input audio sample rate to be either 44.1kHz or 48kHz. Maximum deviation from the ideal response is only 0.06dB (up to 20kHz). See Also -------- bass, treble ''' effect_args = ['deemph'] self.effects.extend(effect_args) self.effects_log.append('deemph') return self
[docs] def delay(self, positions: List[float]): '''Delay one or more audio channels such that they start at the given positions. Parameters ---------- positions: list of floats List of times (in seconds) to delay each audio channel. If fewer positions are given than the number of channels, the remaining channels will be unaffected. ''' if not isinstance(positions, list): raise ValueError("positions must be a a list of numbers") if not all((is_number(p) and p >= 0) for p in positions): raise ValueError("positions must be positive nubmers") effect_args = ['delay'] effect_args.extend(['{:f}'.format(p) for p in positions]) self.effects.extend(effect_args) self.effects_log.append('delay') return self
[docs] def downsample(self, factor: int = 2): '''Downsample the signal by an integer factor. Only the first out of each factor samples is retained, the others are discarded. No decimation filter is applied. If the input is not a properly bandlimited baseband signal, aliasing will occur. This may be desirable e.g., for frequency translation. For a general resampling effect with anti-aliasing, see rate. Parameters ---------- factor : int, default=2 Downsampling factor. See Also -------- rate, upsample ''' if not isinstance(factor, int) or factor < 1: raise ValueError('factor must be a positive integer.') effect_args = ['downsample', '{}'.format(factor)] self.effects.extend(effect_args) self.effects_log.append('downsample') return self
[docs] def earwax(self): '''Makes audio easier to listen to on headphones. Adds ‘cues’ to 44.1kHz stereo audio so that when listened to on headphones the stereo image is moved from inside your head (standard for headphones) to outside and in front of the listener (standard for speakers). Warning: Will only work properly on 44.1kHz stereo audio! ''' effect_args = ['earwax'] self.effects.extend(effect_args) self.effects_log.append('earwax') return self
[docs] def echo(self, gain_in: float = 0.8, gain_out: float = 0.9, n_echos: int = 1, delays: List[float] = [60], decays: List[float] = [0.4]): '''Add echoing to the audio. Echoes are reflected sound and can occur naturally amongst mountains (and sometimes large buildings) when talking or shouting; digital echo effects emulate this behav- iour and are often used to help fill out the sound of a single instrument or vocal. The time differ- ence between the original signal and the reflection is the 'delay' (time), and the loudness of the reflected signal is the 'decay'. Multiple echoes can have different delays and decays. Parameters ---------- gain_in : float, default=0.8 Input volume, between 0 and 1 gain_out : float, default=0.9 Output volume, between 0 and 1 n_echos : int, default=1 Number of reflections delays : list, default=[60] List of delays in miliseconds decays : list, default=[0.4] List of decays, relative to gain in between 0 and 1 See Also -------- echos, reverb, chorus ''' if not is_number(gain_in) or gain_in <= 0 or gain_in > 1: raise ValueError("gain_in must be a number between 0 and 1.") if not is_number(gain_out) or gain_out <= 0 or gain_out > 1: raise ValueError("gain_out must be a number between 0 and 1.") if not isinstance(n_echos, int) or n_echos <= 0: raise ValueError("n_echos must be a positive integer.") # validate delays if not isinstance(delays, list): raise ValueError("delays must be a list") if len(delays) != n_echos: raise ValueError("the length of delays must equal n_echos") if any((not is_number(p) or p <= 0) for p in delays): raise ValueError("the elements of delays must be numbers > 0") # validate decays if not isinstance(decays, list): raise ValueError("decays must be a list") if len(decays) != n_echos: raise ValueError("the length of decays must equal n_echos") if any((not is_number(p) or p <= 0 or p > 1) for p in decays): raise ValueError( "the elements of decays must be between 0 and 1" ) effect_args = ['echo', '{:f}'.format(gain_in), '{:f}'.format(gain_out)] for i in range(n_echos): effect_args.extend([ '{}'.format(delays[i]), '{}'.format(decays[i]) ]) self.effects.extend(effect_args) self.effects_log.append('echo') return self
[docs] def echos(self, gain_in: float = 0.8, gain_out: float = 0.9, n_echos: int = 1, delays: List[float] = [60], decays: List[float] = [0.4]): '''Add a sequence of echoes to the audio. Like the echo effect, echos stand for ‘ECHO in Sequel’, that is the first echos takes the input, the second the input and the first echos, the third the input and the first and the second echos, ... and so on. Care should be taken using many echos; a single echos has the same effect as a single echo. Parameters ---------- gain_in : float, default=0.8 Input volume, between 0 and 1 gain_out : float, default=0.9 Output volume, between 0 and 1 n_echos : int, default=1 Number of reflections delays : list, default=[60] List of delays in miliseconds decays : list, default=[0.4] List of decays, relative to gain in between 0 and 1 See Also -------- echo, reverb, chorus ''' if not is_number(gain_in) or gain_in <= 0 or gain_in > 1: raise ValueError("gain_in must be a number between 0 and 1.") if not is_number(gain_out) or gain_out <= 0 or gain_out > 1: raise ValueError("gain_out must be a number between 0 and 1.") if not isinstance(n_echos, int) or n_echos <= 0: raise ValueError("n_echos must be a positive integer.") # validate delays if not isinstance(delays, list): raise ValueError("the delays must be a list ") if len(delays) != n_echos: raise ValueError("the length of delays must equal n_echos") if any((not is_number(p) or p <= 0) for p in delays): raise ValueError("the elements of delays must be numbers > 0") # validate decays if not isinstance(decays, list): raise ValueError("the decays must be a list ") if len(decays) != n_echos: raise ValueError("the length of decays must equal n_echos") if any((not is_number(p) or p <= 0 or p > 1) for p in decays): raise ValueError( "the elements of decays must be between 0 and 1" ) effect_args = [ 'echos', '{:f}'.format(gain_in), '{:f}'.format(gain_out) ] for i in range(n_echos): effect_args.extend([ '{:f}'.format(delays[i]), '{:f}'.format(decays[i]) ]) self.effects.extend(effect_args) self.effects_log.append('echos') return self
[docs] def equalizer(self, frequency: float, width_q: float, gain_db: float): '''Apply a two-pole peaking equalisation (EQ) filter to boost or reduce around a given frequency. This effect can be applied multiple times to produce complex EQ curves. Parameters ---------- frequency : float The filter's central frequency in Hz. width_q : float The filter's width as a Q-factor. gain_db : float The filter's gain in dB. See Also -------- bass, treble ''' if not is_number(frequency) or frequency <= 0: raise ValueError("frequency must be a positive number.") if not is_number(width_q) or width_q <= 0: raise ValueError("width_q must be a positive number.") if not is_number(gain_db): raise ValueError("gain_db must be a number.") effect_args = [ 'equalizer', '{:f}'.format(frequency), '{:f}q'.format(width_q), '{:f}'.format(gain_db) ] self.effects.extend(effect_args) self.effects_log.append('equalizer') return self
[docs] def fade(self, fade_in_len: float = 0.0, fade_out_len: float = 0.0, fade_shape: Literal['q', 'h', 't', 'l', 'p'] = 'q'): '''Add a fade in and/or fade out to an audio file. Default fade shape is 1/4 sine wave. Parameters ---------- fade_in_len : float, default=0.0 Length of fade-in (seconds). If fade_in_len = 0, no fade in is applied. fade_out_len : float, defaut=0.0 Length of fade-out (seconds). If fade_out_len = 0, no fade in is applied. fade_shape : str, default='q' Shape of fade. Must be one of * 'q' for quarter sine (default), * 'h' for half sine, * 't' for linear, * 'l' for logarithmic * 'p' for inverted parabola. See Also -------- splice ''' fade_shapes = ['q', 'h', 't', 'l', 'p'] if fade_shape not in fade_shapes: raise ValueError( "Fade shape must be one of {}".format(" ".join(fade_shapes)) ) if not is_number(fade_in_len) or fade_in_len < 0: raise ValueError("fade_in_len must be a nonnegative number.") if not is_number(fade_out_len) or fade_out_len < 0: raise ValueError("fade_out_len must be a nonnegative number.") effect_args = [] if fade_in_len > 0: effect_args.extend([ 'fade', '{}'.format(fade_shape), '{:f}'.format(fade_in_len) ]) if fade_out_len > 0: effect_args.extend([ 'reverse', 'fade', '{}'.format(fade_shape), '{:f}'.format(fade_out_len), 'reverse' ]) if len(effect_args) > 0: self.effects.extend(effect_args) self.effects_log.append('fade') return self
[docs] def fir(self, coefficients: List[float]): '''Use SoX’s FFT convolution engine with given FIR filter coefficients. Parameters ---------- coefficients : list fir filter coefficients ''' if not isinstance(coefficients, list): raise ValueError("coefficients must be a list") if not all([is_number(c) for c in coefficients]): raise ValueError("coefficients must be numbers.") effect_args = ['fir'] effect_args.extend(['{:f}'.format(c) for c in coefficients]) self.effects.extend(effect_args) self.effects_log.append('fir') return self
[docs] def flanger(self, delay: float = 0, depth: float = 2, regen: float = 0, width: float = 71, speed: float = 0.5, shape: Literal['sine', 'triangle'] = 'sine', phase: float = 25, interp: Literal['linear', 'quadratic'] = 'linear'): '''Apply a flanging effect to the audio. Parameters ---------- delay : float, default=0 Base delay (in miliseconds) between 0 and 30. depth : float, default=2 Added swept delay (in miliseconds) between 0 and 10. regen : float, default=0 Percentage regeneration between -95 and 95. width : float, default=71, Percentage of delayed signal mixed with original between 0 and 100. speed : float, default=0.5 Sweeps per second (in Hz) between 0.1 and 10. shape : 'sine' or 'triangle', default='sine' Swept wave shape phase : float, default=25 Swept wave percentage phase-shift for multi-channel flange between 0 and 100. 0 = 100 = same phase on each channel interp : 'linear' or 'quadratic', default='linear' Digital delay-line interpolation type. See Also -------- tremolo ''' if not is_number(delay) or delay < 0 or delay > 30: raise ValueError("delay must be a number between 0 and 30.") if not is_number(depth) or depth < 0 or depth > 10: raise ValueError("depth must be a number between 0 and 10.") if not is_number(regen) or regen < -95 or regen > 95: raise ValueError("regen must be a number between -95 and 95.") if not is_number(width) or width < 0 or width > 100: raise ValueError("width must be a number between 0 and 100.") if not is_number(speed) or speed < 0.1 or speed > 10: raise ValueError("speed must be a number between 0.1 and 10.") if shape not in ['sine', 'triangle']: raise ValueError("shape must be one of 'sine' or 'triangle'.") if not is_number(phase) or phase < 0 or phase > 100: raise ValueError("phase must be a number between 0 and 100.") if interp not in ['linear', 'quadratic']: raise ValueError("interp must be one of 'linear' or 'quadratic'.") effect_args = [ 'flanger', '{:f}'.format(delay), '{:f}'.format(depth), '{:f}'.format(regen), '{:f}'.format(width), '{:f}'.format(speed), '{}'.format(shape), '{:f}'.format(phase), '{}'.format(interp) ] self.effects.extend(effect_args) self.effects_log.append('flanger') return self
[docs] def gain(self, gain_db: float = 0.0, normalize: bool = True, limiter: bool = False, balance: Optional[Literal['e', 'B', 'b']] = None): '''Apply amplification or attenuation to the audio signal. Parameters ---------- gain_db : float, default=0.0 Gain adjustment in decibels (dB). normalize : bool, default=True If True, audio is normalized to gain_db relative to full scale. If False, simply adjusts the audio power level by gain_db. limiter : bool, default=False If True, a simple limiter is invoked to prevent clipping. balance : str or None, default=None Balance gain across channels. Can be one of: * None applies no balancing (default) * 'e' applies gain to all channels other than that with the highest peak level, such that all channels attain the same peak level * 'B' applies gain to all channels other than that with the highest RMS level, such that all channels attain the same RMS level * 'b' applies gain with clipping protection to all channels other than that with the highest RMS level, such that all channels attain the same RMS level If normalize=True, 'B' and 'b' are equivalent. See Also -------- loudness ''' if not is_number(gain_db): raise ValueError("gain_db must be a number.") if not isinstance(normalize, bool): raise ValueError("normalize must be a boolean.") if not isinstance(limiter, bool): raise ValueError("limiter must be a boolean.") if balance not in [None, 'e', 'B', 'b']: raise ValueError("balance must be one of None, 'e', 'B', or 'b'.") effect_args = ['gain'] if balance is not None: effect_args.append('-{}'.format(balance)) if normalize: effect_args.append('-n') if limiter: effect_args.append('-l') effect_args.append('{:f}'.format(gain_db)) self.effects.extend(effect_args) self.effects_log.append('gain') return self
[docs] def highpass(self, frequency: float, width_q: float = 0.707, n_poles: int = 2): '''Apply a high-pass filter with 3dB point frequency. The filter can be either single-pole or double-pole. The filters roll off at 6dB per pole per octave (20dB per pole per decade). Parameters ---------- frequency : float The filter's cutoff frequency in Hz. width_q : float, default=0.707 The filter's width as a Q-factor. Applies only when n_poles=2. The default gives a Butterworth response. n_poles : int, default=2 The number of poles in the filter. Must be either 1 or 2 See Also -------- lowpass, equalizer, sinc, allpass ''' if not is_number(frequency) or frequency <= 0: raise ValueError("frequency must be a positive number.") if not is_number(width_q) or width_q <= 0: raise ValueError("width_q must be a positive number.") if n_poles not in [1, 2]: raise ValueError("n_poles must be 1 or 2.") effect_args = [ 'highpass', '-{}'.format(n_poles), '{:f}'.format(frequency) ] if n_poles == 2: effect_args.append('{:f}q'.format(width_q)) self.effects.extend(effect_args) self.effects_log.append('highpass') return self
[docs] def lowpass(self, frequency: float, width_q: float = 0.707, n_poles: int = 2): '''Apply a low-pass filter with 3dB point frequency. The filter can be either single-pole or double-pole. The filters roll off at 6dB per pole per octave (20dB per pole per decade). Parameters ---------- frequency : float The filter's cutoff frequency in Hz. width_q : float, default=0.707 The filter's width as a Q-factor. Applies only when n_poles=2. The default gives a Butterworth response. n_poles : int, default=2 The number of poles in the filter. Must be either 1 or 2 See Also -------- highpass, equalizer, sinc, allpass ''' if not is_number(frequency) or frequency <= 0: raise ValueError("frequency must be a positive number.") if not is_number(width_q) or width_q <= 0: raise ValueError("width_q must be a positive number.") if n_poles not in [1, 2]: raise ValueError("n_poles must be 1 or 2.") effect_args = [ 'lowpass', '-{}'.format(n_poles), '{:f}'.format(frequency) ] if n_poles == 2: effect_args.append('{:f}q'.format(width_q)) self.effects.extend(effect_args) self.effects_log.append('lowpass') return self
[docs] def hilbert(self, num_taps: Optional[int] = None): '''Apply an odd-tap Hilbert transform filter, phase-shifting the signal by 90 degrees. This is used in many matrix coding schemes and for analytic signal generation. The process is often written as a multiplication by i (or j), the imaginary unit. An odd-tap Hilbert transform filter has a bandpass characteristic, attenuating the lowest and highest frequencies. Parameters ---------- num_taps : int or None, default=None Number of filter taps - must be odd. If none, it is chosen to have a cutoff frequency of about 75 Hz. ''' if num_taps is not None and not isinstance(num_taps, int): raise ValueError("num taps must be None or an odd integer.") if num_taps is not None and num_taps % 2 == 0: raise ValueError("num_taps must an odd integer.") effect_args = ['hilbert'] if num_taps is not None: effect_args.extend(['-n', '{}'.format(num_taps)]) self.effects.extend(effect_args) self.effects_log.append('hilbert') return self
[docs] def loudness(self, gain_db: float = -10.0, reference_level: float = 65.0): '''Loudness control. Similar to the gain effect, but provides equalisation for the human auditory system. The gain is adjusted by gain_db and the signal is equalised according to ISO 226 w.r.t. reference_level. Parameters ---------- gain_db : float, default=-10.0 Loudness adjustment amount (in dB) reference_level : float, default=65.0 Reference level (in dB) according to which the signal is equalized. Must be between 50 and 75 (dB) See Also -------- gain ''' if not is_number(gain_db): raise ValueError('gain_db must be a number.') if not is_number(reference_level): raise ValueError('reference_level must be a number') if reference_level > 75 or reference_level < 50: raise ValueError('reference_level must be between 50 and 75') effect_args = [ 'loudness', '{:f}'.format(gain_db), '{:f}'.format(reference_level) ] self.effects.extend(effect_args) self.effects_log.append('loudness') return self
[docs] def mcompand(self, n_bands: int = 2, crossover_frequencies: List[float] = [1600], attack_time: List[float] = [0.005, 0.000625], decay_time: List[float] = [0.1, 0.0125], soft_knee_db: List[Optional[float]] = [6.0, None], tf_points: List[List[Tuple[float, float]]] = [ [(-47, -40), (-34, -34), (-17, -33), (0, 0)], [(-47, -40), (-34, -34), (-15, -33), (0, 0)] ], gain: List[Optional[float]] = [None, None]): '''The multi-band compander is similar to the single-band compander but the audio is first divided into bands using Linkwitz-Riley cross-over filters and a separately specifiable compander run on each band. When used with n_bands=1, this effect is identical to compand. When using n_bands > 1, the first set of arguments applies a single band compander, and each subsequent set of arugments is applied on each of the crossover frequencies. Parameters ---------- n_bands : int, default=2 The number of bands. crossover_frequencies : list of float, default=[1600] A list of crossover frequencies in Hz of length n_bands-1. The first band is always the full spectrum, followed by the bands specified by crossover_frequencies. attack_time : list of float, default=[0.005, 0.000625] A list of length n_bands, where each element is the time in seconds over which the instantaneous level of the input signal is averaged to determine increases in volume over the current band. decay_time : list of float, default=[0.1, 0.0125] A list of length n_bands, where each element is the time in seconds over which the instantaneous level of the input signal is averaged to determine decreases in volume over the current band. soft_knee_db : list of float or None, default=[6.0, None] A list of length n_bands, where each element is the ammount (in dB) for which the points at where adjacent line segments on the transfer function meet will be rounded over the current band. If None, no soft_knee is applied. tf_points : list of list of tuples, default=[ [(-47, -40), (-34, -34), (-17, -33), (0, 0)], [(-47, -40), (-34, -34), (-15, -33), (0, 0)]] A list of length n_bands, where each element is the transfer function points as a list of tuples corresponding to points in (dB, dB) defining the compander's transfer function over the current band. gain : list of floats or None A list of gain values for each frequency band. If None, no gain is applied. See Also -------- compand, contrast ''' if not isinstance(n_bands, int) or n_bands < 1: raise ValueError("n_bands must be a positive integer.") if (not isinstance(crossover_frequencies, list) or len(crossover_frequencies) != n_bands - 1): raise ValueError( "crossover_frequences must be a list of length n_bands - 1" ) if any([not is_number(f) or f < 0 for f in crossover_frequencies]): raise ValueError( "crossover_frequencies elements must be positive floats." ) if not isinstance(attack_time, list) or len(attack_time) != n_bands: raise ValueError("attack_time must be a list of length n_bands") if any([not is_number(a) or a <= 0 for a in attack_time]): raise ValueError("attack_time elements must be positive numbers.") if not isinstance(decay_time, list) or len(decay_time) != n_bands: raise ValueError("decay_time must be a list of length n_bands") if any([not is_number(d) or d <= 0 for d in decay_time]): raise ValueError("decay_time elements must be positive numbers.") if any([a > d for a, d in zip(attack_time, decay_time)]): logger.warning( "Elements of attack_time are larger than decay_time.\n" "For most situations, attack_time should be shorter than " "decay time because the human ear is more sensitive to sudden " "loud music than sudden soft music." ) if not isinstance(soft_knee_db, list) or len(soft_knee_db) != n_bands: raise ValueError("soft_knee_db must be a list of length n_bands.") if any([(not is_number(d) and d is not None) for d in soft_knee_db]): raise ValueError( "elements of soft_knee_db must be a number or None." ) if not isinstance(tf_points, list) or len(tf_points) != n_bands: raise ValueError("tf_points must be a list of length n_bands.") if any([not isinstance(t, list) or len(t) == 0 for t in tf_points]): raise ValueError( "tf_points must be a list with at least one point." ) for tfp in tf_points: if any(not isinstance(pair, tuple) for pair in tfp): raise ValueError("elements of tf_points lists must be pairs") if any(len(pair) != 2 for pair in tfp): raise ValueError("Tuples in tf_points lists must be length 2") if any(not (is_number(p[0]) and is_number(p[1])) for p in tfp): raise ValueError( "Tuples in tf_points lists must be pairs of numbers." ) if any((p[0] > 0 or p[1] > 0) for p in tfp): raise ValueError( "Tuple values in tf_points lists must be <= 0 (dB)." ) if len(tfp) > len(set([p[0] for p in tfp])): raise ValueError("Found duplicate x-value in tf_points list.") if not isinstance(gain, list) or len(gain) != n_bands: raise ValueError("gain must be a list of length n_bands") if any([not (is_number(g) or g is None) for g in gain]): raise ValueError("gain elements must be numbers or None.") effect_args = ['mcompand'] for i in range(n_bands): if i > 0: effect_args.append('{:f}'.format(crossover_frequencies[i - 1])) intermed_args = ["{:f},{:f}".format(attack_time[i], decay_time[i])] tf_points_band = tf_points[i] tf_points_band = sorted( tf_points_band, key=lambda tf_points_band: tf_points_band[0] ) transfer_list = [] for point in tf_points_band: transfer_list.extend([ "{:f}".format(point[0]), "{:f}".format(point[1]) ]) if soft_knee_db[i] is not None: intermed_args.append( "{:f}:{}".format(soft_knee_db[i], ",".join(transfer_list)) ) else: intermed_args.append(",".join(transfer_list)) if gain[i] is not None: intermed_args.append("{:f}".format(gain[i])) effect_args.append(' '.join(intermed_args)) self.effects.extend(effect_args) self.effects_log.append('mcompand') return self
[docs] def noiseprof(self, input_filepath: Union[str, Path], profile_path: Union[str, Path]): '''Calculate a profile of the audio for use in noise reduction. Running this command does not effect the Transformer effects chain. When this function is called, the calculated noise profile file is saved to the `profile_path`. Parameters ---------- input_filepath : str Path to audiofile from which to compute a noise profile. profile_path : str Path to save the noise profile file. See Also -------- noisered ''' if os.path.isdir(profile_path): raise ValueError( "profile_path {} is a directory.".format(profile_path)) if os.path.dirname(profile_path) == '' and profile_path != '': _abs_profile_path = os.path.join(os.getcwd(), profile_path) else: _abs_profile_path = profile_path if not os.access(os.path.dirname(_abs_profile_path), os.W_OK): raise IOError( "profile_path {} is not writeable.".format(_abs_profile_path)) effect_args = ['noiseprof', profile_path] self.build(input_filepath, '-n', extra_args=effect_args) return None
[docs] def noisered(self, profile_path: Union[str, Path], amount: float = 0.5): '''Reduce noise in the audio signal by profiling and filtering. This effect is moderately effective at removing consistent background noise such as hiss or hum. Parameters ---------- profile_path : str Path to a noise profile file. This file can be generated using the `noiseprof` effect. amount : float, default=0.5 How much noise should be removed is specified by amount. Should be between 0 and 1. Higher numbers will remove more noise but present a greater likelihood of removing wanted components of the audio signal. See Also -------- noiseprof ''' if not os.path.exists(profile_path): raise IOError( "profile_path {} does not exist.".format(profile_path)) if not is_number(amount) or amount < 0 or amount > 1: raise ValueError("amount must be a number between 0 and 1.") effect_args = [ 'noisered', profile_path, '{:f}'.format(amount) ] self.effects.extend(effect_args) self.effects_log.append('noisered') return self
[docs] def norm(self, db_level: float = -3.0): '''Normalize an audio file to a particular db level. This behaves identically to the gain effect with normalize=True. Parameters ---------- db_level : float, default=-3.0 Output volume (db) See Also -------- gain, loudness ''' if not is_number(db_level): raise ValueError('db_level must be a number.') effect_args = [ 'norm', '{:f}'.format(db_level) ] self.effects.extend(effect_args) self.effects_log.append('norm') return self
[docs] def oops(self): '''Out Of Phase Stereo effect. Mixes stereo to twin-mono where each mono channel contains the difference between the left and right stereo channels. This is sometimes known as the 'karaoke' effect as it often has the effect of removing most or all of the vocals from a recording. ''' effect_args = ['oops'] self.effects.extend(effect_args) self.effects_log.append('oops') return self
[docs] def overdrive(self, gain_db: float = 20.0, colour: float = 20.0): '''Apply non-linear distortion. Parameters ---------- gain_db : float, default=20 Controls the amount of distortion (dB). colour : float, default=20 Controls the amount of even harmonic content in the output (dB). ''' if not is_number(gain_db): raise ValueError('db_level must be a number.') if not is_number(colour): raise ValueError('colour must be a number.') effect_args = [ 'overdrive', '{:f}'.format(gain_db), '{:f}'.format(colour) ] self.effects.extend(effect_args) self.effects_log.append('overdrive') return self
[docs] def pad(self, start_duration: float = 0.0, end_duration: float = 0.0): '''Add silence to the beginning or end of a file. Calling this with the default arguments has no effect. Parameters ---------- start_duration : float Number of seconds of silence to add to beginning. end_duration : float Number of seconds of silence to add to end. See Also -------- delay ''' if not is_number(start_duration) or start_duration < 0: raise ValueError("Start duration must be a positive number.") if not is_number(end_duration) or end_duration < 0: raise ValueError("End duration must be positive.") effect_args = [ 'pad', '{:f}'.format(start_duration), '{:f}'.format(end_duration) ] self.effects.extend(effect_args) self.effects_log.append('pad') return self
[docs] def phaser(self, gain_in: float = 0.8, gain_out: float = 0.74, delay: int = 3, decay: float = 0.4, speed: float = 0.5, modulation_shape: Literal['sinusoidal', 'triangular'] = 'sinusoidal'): '''Apply a phasing effect to the audio. Parameters ---------- gain_in : float, default=0.8 Input volume between 0 and 1 gain_out: float, default=0.74 Output volume between 0 and 1 delay : float, default=3 Delay in miliseconds between 0 and 5 decay : float, default=0.4 Decay relative to gain_in, between 0.1 and 0.5. speed : float, default=0.5 Modulation speed in Hz, between 0.1 and 2 modulation_shape : str, defaul='sinusoidal' Modulation shpae. One of 'sinusoidal' or 'triangular' See Also -------- flanger, tremolo ''' if not is_number(gain_in) or gain_in <= 0 or gain_in > 1: raise ValueError("gain_in must be a number between 0 and 1.") if not is_number(gain_out) or gain_out <= 0 or gain_out > 1: raise ValueError("gain_out must be a number between 0 and 1.") if not is_number(delay) or delay <= 0 or delay > 5: raise ValueError("delay must be a positive number.") if not is_number(decay) or decay < 0.1 or decay > 0.5: raise ValueError("decay must be a number between 0.1 and 0.5.") if not is_number(speed) or speed < 0.1 or speed > 2: raise ValueError("speed must be a positive number.") if modulation_shape not in ['sinusoidal', 'triangular']: raise ValueError( "modulation_shape must be one of 'sinusoidal', 'triangular'." ) effect_args = [ 'phaser', '{:f}'.format(gain_in), '{:f}'.format(gain_out), '{:f}'.format(delay), '{:f}'.format(decay), '{:f}'.format(speed) ] if modulation_shape == 'sinusoidal': effect_args.append('-s') elif modulation_shape == 'triangular': effect_args.append('-t') self.effects.extend(effect_args) self.effects_log.append('phaser') return self
[docs] def pitch(self, n_semitones: float, quick: bool = False): '''Pitch shift the audio without changing the tempo. This effect uses the WSOLA algorithm. The audio is chopped up into segments which are then shifted in the time domain and overlapped (cross-faded) at points where their waveforms are most similar as determined by measurement of least squares. Parameters ---------- n_semitones : float The number of semitones to shift. Can be positive or negative. quick : bool, default=False If True, this effect will run faster but with lower sound quality. See Also -------- bend, speed, tempo ''' if not is_number(n_semitones): raise ValueError("n_semitones must be a positive number") if n_semitones < -12 or n_semitones > 12: logger.warning( "Using an extreme pitch shift. " "Quality of results will be poor" ) if not isinstance(quick, bool): raise ValueError("quick must be a boolean.") effect_args = ['pitch'] if quick: effect_args.append('-q') effect_args.append('{:f}'.format(n_semitones * 100.)) self.effects.extend(effect_args) self.effects_log.append('pitch') return self
[docs] def rate(self, samplerate: float, quality: Literal['q', 'l', 'm', 'h', 'v'] = 'h'): '''Change the audio sampling rate (i.e. resample the audio) to any given `samplerate`. Better the resampling quality = slower runtime. Parameters ---------- samplerate : float Desired sample rate. quality : str Resampling quality. One of: * q : Quick - very low quality, * l : Low, * m : Medium, * h : High (default), * v : Very high See Also -------- upsample, downsample, convert ''' quality_vals = ['q', 'l', 'm', 'h', 'v'] if not is_number(samplerate) or samplerate <= 0: raise ValueError("Samplerate must be a positive number.") if quality not in quality_vals: raise ValueError( "Quality must be one of {}.".format(' '.join(quality_vals)) ) effect_args = [ 'rate', '-{}'.format(quality), '{:f}'.format(samplerate) ] self.effects.extend(effect_args) self.effects_log.append('rate') return self
[docs] def remix(self, remix_dictionary: Optional[Dict[int, List[int]]] = None, num_output_channels: Optional[int] = None): '''Remix the channels of an audio file. Note: volume options are not yet implemented Parameters ---------- remix_dictionary : dict or None Dictionary mapping output channel to list of input channel(s). Empty lists indicate the corresponding output channel should be empty. If None, mixes all channels down to a single mono file. num_output_channels : int or None The number of channels in the output file. If None, the number of output channels is equal to the largest key in remix_dictionary. If remix_dictionary is None, this variable is ignored. Examples -------- Remix a 4-channel input file. The output file will have input channel 2 in channel 1, a mixdown of input channels 1 an 3 in channel 2, an empty channel 3, and a copy of input channel 4 in channel 4. >>> import sox >>> tfm = sox.Transformer() >>> remix_dictionary = {1: [2], 2: [1, 3], 4: [4]} >>> tfm.remix(remix_dictionary) ''' if not (isinstance(remix_dictionary, dict) or remix_dictionary is None): raise ValueError("remix_dictionary must be a dictionary or None.") if remix_dictionary is not None: if not all([isinstance(i, int) and i > 0 for i in remix_dictionary.keys()]): raise ValueError( "remix dictionary must have positive integer keys." ) if not all([isinstance(v, list) for v in remix_dictionary.values()]): raise ValueError("remix dictionary values must be lists.") for v_list in remix_dictionary.values(): if not all([isinstance(v, int) and v > 0 for v in v_list]): raise ValueError( "elements of remix dictionary values must " "be positive integers" ) if not ((isinstance(num_output_channels, int) and num_output_channels > 0) or num_output_channels is None): raise ValueError( "num_output_channels must be a positive integer or None." ) effect_args = ['remix'] if remix_dictionary is None: effect_args.append('-') else: if num_output_channels is None: num_output_channels = max(remix_dictionary.keys()) for channel in range(1, num_output_channels + 1): if channel in remix_dictionary.keys(): out_channel = ','.join( [str(i) for i in remix_dictionary[channel]] ) else: out_channel = '0' effect_args.append(out_channel) self.effects.extend(effect_args) self.effects_log.append('remix') return self
[docs] def repeat(self, count: int = 1): '''Repeat the entire audio count times. Parameters ---------- count : int, default=1 The number of times to repeat the audio. ''' if not isinstance(count, int) or count < 1: raise ValueError("count must be a postive integer.") effect_args = ['repeat', '{}'.format(count)] self.effects.extend(effect_args) self.effects_log.append('repeat')
[docs] def reverb(self, reverberance: float = 50, high_freq_damping: float = 50, room_scale: float = 100, stereo_depth: float = 100, pre_delay: float = 0, wet_gain: float = 0, wet_only: bool = False): '''Add reverberation to the audio using the ‘freeverb’ algorithm. A reverberation effect is sometimes desirable for concert halls that are too small or contain so many people that the hall’s natural reverberance is diminished. Applying a small amount of stereo reverb to a (dry) mono signal will usually make it sound more natural. Parameters ---------- reverberance : float, default=50 Percentage of reverberance high_freq_damping : float, default=50 Percentage of high-frequency damping. room_scale : float, default=100 Scale of the room as a percentage. stereo_depth : float, default=100 Stereo depth as a percentage. pre_delay : float, default=0 Pre-delay in milliseconds. wet_gain : float, default=0 Amount of wet gain in dB wet_only : bool, default=False If True, only outputs the wet signal. See Also -------- echo ''' if (not is_number(reverberance) or reverberance < 0 or reverberance > 100): raise ValueError("reverberance must be between 0 and 100") if (not is_number(high_freq_damping) or high_freq_damping < 0 or high_freq_damping > 100): raise ValueError("high_freq_damping must be between 0 and 100") if (not is_number(room_scale) or room_scale < 0 or room_scale > 100): raise ValueError("room_scale must be between 0 and 100") if (not is_number(stereo_depth) or stereo_depth < 0 or stereo_depth > 100): raise ValueError("stereo_depth must be between 0 and 100") if not is_number(pre_delay) or pre_delay < 0: raise ValueError("pre_delay must be a positive number") if not is_number(wet_gain): raise ValueError("wet_gain must be a number") if not isinstance(wet_only, bool): raise ValueError("wet_only must be a boolean.") effect_args = ['reverb'] if wet_only: effect_args.append('-w') effect_args.extend([ '{:f}'.format(reverberance), '{:f}'.format(high_freq_damping), '{:f}'.format(room_scale), '{:f}'.format(stereo_depth), '{:f}'.format(pre_delay), '{:f}'.format(wet_gain) ]) self.effects.extend(effect_args) self.effects_log.append('reverb') return self
[docs] def reverse(self): '''Reverse the audio completely ''' effect_args = ['reverse'] self.effects.extend(effect_args) self.effects_log.append('reverse') return self
[docs] def silence(self, location: Literal[0, 1, -1] = 0, silence_threshold: float = 0.1, min_silence_duration: float = 0.1, buffer_around_silence: bool = False): '''Removes silent regions from an audio file. Parameters ---------- location : int, default=0 Where to remove silence. One of: * 0 to remove silence throughout the file (default), * 1 to remove silence from the beginning, * -1 to remove silence from the end, silence_threshold : float, default=0.1 Silence threshold as percentage of maximum sample amplitude. Must be between 0 and 100. min_silence_duration : float, default=0.1 The minimum ammount of time in seconds required for a region to be considered non-silent. buffer_around_silence : bool, default=False If True, leaves a buffer of min_silence_duration around removed silent regions. See Also -------- vad ''' if location not in [-1, 0, 1]: raise ValueError("location must be one of -1, 0, 1.") if not is_number(silence_threshold) or silence_threshold < 0: raise ValueError( "silence_threshold must be a number between 0 and 100" ) elif silence_threshold >= 100: raise ValueError( "silence_threshold must be a number between 0 and 100" ) if not is_number(min_silence_duration) or min_silence_duration <= 0: raise ValueError( "min_silence_duration must be a positive number." ) if not isinstance(buffer_around_silence, bool): raise ValueError("buffer_around_silence must be a boolean.") effect_args = [] if location == -1: effect_args.append('reverse') if buffer_around_silence: effect_args.extend(['silence', '-l']) else: effect_args.append('silence') effect_args.extend([ '1', '{:f}'.format(min_silence_duration), '{:f}%'.format(silence_threshold) ]) if location == 0: effect_args.extend([ '-1', '{:f}'.format(min_silence_duration), '{:f}%'.format(silence_threshold) ]) if location == -1: effect_args.append('reverse') self.effects.extend(effect_args) self.effects_log.append('silence') return self
[docs] def sinc(self, filter_type: Literal['high', 'low', 'pass', 'reject'] = 'high', cutoff_freq: Union[float, List[float]] = 3000, stop_band_attenuation: float = 120, transition_bw: Optional[Union[float, List[float]]] = None, phase_response: Optional[float] = None): '''Apply a sinc kaiser-windowed low-pass, high-pass, band-pass, or band-reject filter to the signal. Parameters ---------- filter_type : str, default='high' Type of filter. One of: - 'high' for a high-pass filter - 'low' for a low-pass filter - 'pass' for a band-pass filter - 'reject' for a band-reject filter cutoff_freq : float or list, default=3000 A scalar or length 2 list indicating the filter's critical frequencies. The critical frequencies are given in Hz and must be positive. For a high-pass or low-pass filter, cutoff_freq must be a scalar. For a band-pass or band-reject filter, it must be a length 2 list. stop_band_attenuation : float, default=120 The stop band attenuation in dB transition_bw : float, list or None, default=None The transition band-width in Hz. If None, sox's default of 5% of the total bandwith is used. If a float, the given transition bandwith is used for both the upper and lower bands (if applicable). If a list, the first argument is used for the lower band and the second for the upper band. phase_response : float or None The filter's phase response between 0 (minimum) and 100 (maximum). If None, sox's default phase repsonse is used. See Also -------- band, bandpass, bandreject, highpass, lowpass ''' filter_types = ['high', 'low', 'pass', 'reject'] if filter_type not in filter_types: raise ValueError( "filter_type must be one of {}".format(', '.join(filter_types)) ) if not (is_number(cutoff_freq) or isinstance(cutoff_freq, list)): raise ValueError("cutoff_freq must be a number or a list") if filter_type in ['high', 'low'] and isinstance(cutoff_freq, list): raise ValueError( "For filter types 'high' and 'low', " "cutoff_freq must be a float, not a list" ) if filter_type in ['pass', 'reject'] and is_number(cutoff_freq): raise ValueError( "For filter types 'pass' and 'reject', " "cutoff_freq must be a list, not a float" ) if is_number(cutoff_freq) and cutoff_freq <= 0: raise ValueError("cutoff_freq must be a postive number") if isinstance(cutoff_freq, list): if len(cutoff_freq) != 2: raise ValueError( "If cutoff_freq is a list it may only have 2 elements." ) if any([not is_number(f) or f <= 0 for f in cutoff_freq]): raise ValueError( "elements of cutoff_freq must be positive numbers" ) cutoff_freq = sorted(cutoff_freq) if not is_number(stop_band_attenuation) or stop_band_attenuation < 0: raise ValueError("stop_band_attenuation must be a positive number") if not (is_number(transition_bw) or isinstance(transition_bw, list) or transition_bw is None): raise ValueError("transition_bw must be a number, a list or None.") if filter_type in ['high', 'low'] and isinstance(transition_bw, list): raise ValueError( "For filter types 'high' and 'low', " "transition_bw must be a float, not a list" ) if is_number(transition_bw) and transition_bw <= 0: raise ValueError("transition_bw must be a postive number") if isinstance(transition_bw, list): if any([not is_number(f) or f <= 0 for f in transition_bw]): raise ValueError( "elements of transition_bw must be positive numbers" ) if len(transition_bw) != 2: raise ValueError( "If transition_bw is a list it may only have 2 elements." ) if phase_response is not None and not is_number(phase_response): raise ValueError("phase_response must be a number or None.") if (is_number(phase_response) and (phase_response < 0 or phase_response > 100)): raise ValueError("phase response must be between 0 and 100") effect_args = ['sinc'] effect_args.extend(['-a', '{:f}'.format(stop_band_attenuation)]) if phase_response is not None: effect_args.extend(['-p', '{:f}'.format(phase_response)]) if filter_type == 'high': if transition_bw is not None: effect_args.extend(['-t', '{:f}'.format(transition_bw)]) effect_args.append('{:f}'.format(cutoff_freq)) elif filter_type == 'low': effect_args.append('-{:f}'.format(cutoff_freq)) if transition_bw is not None: effect_args.extend(['-t', '{:f}'.format(transition_bw)]) else: if is_number(transition_bw): effect_args.extend(['-t', '{:f}'.format(transition_bw)]) elif isinstance(transition_bw, list): effect_args.extend(['-t', '{:f}'.format(transition_bw[0])]) if filter_type == 'pass': effect_args.append( '{:f}-{:f}'.format(cutoff_freq[0], cutoff_freq[1]) ) elif filter_type == 'reject': effect_args.append( '{:f}-{:f}'.format(cutoff_freq[1], cutoff_freq[0]) ) if isinstance(transition_bw, list): effect_args.extend(['-t', '{:f}'.format(transition_bw[1])]) self.effects.extend(effect_args) self.effects_log.append('sinc') return self
[docs] def speed(self, factor: float): '''Adjust the audio speed (pitch and tempo together). Technically, the speed effect only changes the sample rate information, leaving the samples themselves untouched. The rate effect is invoked automatically to resample to the output sample rate, using its default quality/speed. For higher quality or higher speed resampling, in addition to the speed effect, specify the rate effect with the desired quality option. Parameters ---------- factor : float The ratio of the new speed to the old speed. For ex. 1.1 speeds up the audio by 10%; 0.9 slows it down by 10%. Note - this argument is the inverse of what is passed to the sox stretch effect for consistency with speed. See Also -------- rate, tempo, pitch ''' if not is_number(factor) or factor <= 0: raise ValueError("factor must be a positive number") if factor < 0.5 or factor > 2: logger.warning( "Using an extreme factor. Quality of results will be poor" ) effect_args = ['speed', '{:f}'.format(factor)] self.effects.extend(effect_args) self.effects_log.append('speed') return self
[docs] def stat(self, input_filepath: Union[str, Path], scale: Optional[float] = None, rms: Optional[bool] = False): '''Display time and frequency domain statistical information about the audio. Audio is passed unmodified through the SoX processing chain. Unlike other Transformer methods, this does not modify the transformer effects chain. Instead it computes statistics on the output file that would be created if the build command were invoked. Note: The file is downmixed to mono prior to computation. Parameters ---------- input_filepath : str Path to input file to compute stats on. scale : float or None, default=None If not None, scales the input by the given scale factor. rms : bool, default=False If True, scales all values by the average rms amplitude. Returns ------- stat_dict : dict Dictionary of statistics. See Also -------- stats, power_spectrum, sox.file_info ''' effect_args = ['channels', '1', 'stat'] if scale is not None: if not is_number(scale) or scale <= 0: raise ValueError("scale must be a positive number.") effect_args.extend(['-s', '{:f}'.format(scale)]) if rms: effect_args.append('-rms') _, _, stat_output = self.build( input_filepath, '-n', extra_args=effect_args, return_output=True ) stat_dict = {} lines = stat_output.split('\n') for line in lines: split_line = line.split() if not split_line: continue value = split_line[-1] key = ' '.join(split_line[:-1]) stat_dict[key.strip(':')] = value return stat_dict
[docs] def power_spectrum(self, input_filepath: Union[str, Path]): '''Calculates the power spectrum (4096 point DFT). This method internally invokes the stat command with the -freq option. Note: The file is downmixed to mono prior to computation. Parameters ---------- input_filepath : str Path to input file to compute stats on. Returns ------- power_spectrum : list List of frequency (Hz), amplitude pairs. See Also -------- stat, stats, sox.file_info ''' effect_args = ['channels', '1', 'stat', '-freq'] _, _, stat_output = self.build( input_filepath, '-n', extra_args=effect_args, return_output=True ) power_spectrum = [] lines = stat_output.split('\n') for line in lines: split_line = line.split() if len(split_line) != 2: continue freq, amp = split_line power_spectrum.append([float(freq), float(amp)]) return power_spectrum
[docs] def stats(self, input_filepath: Union[str, Path]): '''Display time domain statistical information about the audio channels. Audio is passed unmodified through the SoX processing chain. Statistics are calculated and displayed for each audio channel Unlike other Transformer methods, this does not modify the transformer effects chain. Instead it computes statistics on the output file that would be created if the build command were invoked. Note: The file is downmixed to mono prior to computation. Parameters ---------- input_filepath : str Path to input file to compute stats on. Returns ------- stats_dict : dict List of frequency (Hz), amplitude pairs. See Also -------- stat, sox.file_info ''' effect_args = ['channels', '1', 'stats'] _, _, stats_output = self.build( input_filepath, '-n', extra_args=effect_args, return_output=True ) stats_dict = {} lines = stats_output.split('\n') for line in lines: split_line = line.split() if len(split_line) == 0: continue value = split_line[-1] key = ' '.join(split_line[:-1]) stats_dict[key] = value return stats_dict
[docs] def stretch(self, factor: float, window: float = 20): '''Change the audio duration (but not its pitch). **Unless factor is close to 1, use the tempo effect instead.** This effect is broadly equivalent to the tempo effect with search set to zero, so in general, its results are comparatively poor; it is retained as it can sometimes out-perform tempo for small factors. Parameters ---------- factor : float The ratio of the new tempo to the old tempo. For ex. 1.1 speeds up the tempo by 10%; 0.9 slows it down by 10%. Note - this argument is the inverse of what is passed to the sox stretch effect for consistency with tempo. window : float, default=20 Window size in miliseconds See Also -------- tempo, speed, pitch ''' if not is_number(factor) or factor <= 0: raise ValueError("factor must be a positive number") if factor < 0.5 or factor > 2: logger.warning( "Using an extreme time stretching factor. " "Quality of results will be poor" ) if abs(factor - 1.0) > 0.1: logger.warning( "For this stretch factor, " "the tempo effect has better performance." ) if not is_number(window) or window <= 0: raise ValueError( "window must be a positive number." ) effect_args = ['stretch', '{:f}'.format(factor), '{:f}'.format(window)] self.effects.extend(effect_args) self.effects_log.append('stretch') return self
[docs] def swap(self): '''Swap stereo channels. If the input is not stereo, pairs of channels are swapped, and a possible odd last channel passed through. E.g., for seven channels, the output order will be 2, 1, 4, 3, 6, 5, 7. See Also ---------- remix ''' effect_args = ['swap'] self.effects.extend(effect_args) self.effects_log.append('swap') return self
[docs] def tempo(self, factor: float, audio_type: Optional[Literal['m', 's', 'l']] = None, quick: bool = False): '''Time stretch audio without changing pitch. This effect uses the WSOLA algorithm. The audio is chopped up into segments which are then shifted in the time domain and overlapped (cross-faded) at points where their waveforms are most similar as determined by measurement of least squares. Parameters ---------- factor : float The ratio of new tempo to the old tempo. For ex. 1.1 speeds up the tempo by 10%; 0.9 slows it down by 10%. audio_type : str Type of audio, which optimizes algorithm parameters. One of: * m : Music, * s : Speech, * l : Linear (useful when factor is close to 1), quick : bool, default=False If True, this effect will run faster but with lower sound quality. See Also -------- stretch, speed, pitch ''' if not is_number(factor) or factor <= 0: raise ValueError("factor must be a positive number") if factor < 0.5 or factor > 2: logger.warning( "Using an extreme time stretching factor. " "Quality of results will be poor" ) if abs(factor - 1.0) <= 0.1: logger.warning( "For this stretch factor, " "the stretch effect has better performance." ) if audio_type not in [None, 'm', 's', 'l']: raise ValueError( "audio_type must be one of None, 'm', 's', or 'l'." ) if not isinstance(quick, bool): raise ValueError("quick must be a boolean.") effect_args = ['tempo'] if quick: effect_args.append('-q') if audio_type is not None: effect_args.append('-{}'.format(audio_type)) effect_args.append('{:f}'.format(factor)) self.effects.extend(effect_args) self.effects_log.append('tempo') return self
[docs] def treble(self, gain_db: float, frequency: float = 3000.0, slope: float = 0.5): '''Boost or cut the treble (lower) frequencies of the audio using a two-pole shelving filter with a response similar to that of a standard hi-fi’s tone-controls. This is also known as shelving equalisation. The filters are described in detail in http://musicdsp.org/files/Audio-EQ-Cookbook.txt Parameters ---------- gain_db : float The gain at the Nyquist frequency. For a large cut use -20, for a large boost use 20. frequency : float, default=100.0 The filter's cutoff frequency in Hz. slope : float, default=0.5 The steepness of the filter's shelf transition. For a gentle slope use 0.3, and use 1.0 for a steep slope. See Also -------- bass, equalizer ''' if not is_number(gain_db): raise ValueError("gain_db must be a number") if not is_number(frequency) or frequency <= 0: raise ValueError("frequency must be a positive number.") if not is_number(slope) or slope <= 0 or slope > 1.0: raise ValueError("width_q must be a positive number.") effect_args = [ 'treble', '{:f}'.format(gain_db), '{:f}'.format(frequency), '{:f}s'.format(slope) ] self.effects.extend(effect_args) self.effects_log.append('treble') return self
[docs] def tremolo(self, speed: float = 6.0, depth: float = 40.0): '''Apply a tremolo (low frequency amplitude modulation) effect to the audio. The tremolo frequency in Hz is giv en by speed, and the depth as a percentage by depth (default 40). Parameters ---------- speed : float Tremolo speed in Hz. depth : float Tremolo depth as a percentage of the total amplitude. See Also -------- flanger Examples -------- >>> tfm = sox.Transformer() For a growl-type effect >>> tfm.tremolo(speed=100.0) ''' if not is_number(speed) or speed <= 0: raise ValueError("speed must be a positive number.") if not is_number(depth) or depth <= 0 or depth > 100: raise ValueError("depth must be a positive number less than 100.") effect_args = [ 'tremolo', '{:f}'.format(speed), '{:f}'.format(depth) ] self.effects.extend(effect_args) self.effects_log.append('tremolo') return self
[docs] def trim(self, start_time: float, end_time: Optional[float] = None): '''Excerpt a clip from an audio file, given the start timestamp and end timestamp of the clip within the file, expressed in seconds. If the end timestamp is set to `None` or left unspecified, it defaults to the duration of the audio file. Parameters ---------- start_time : float Start time of the clip (seconds) end_time : float or None, default=None End time of the clip (seconds) ''' if not is_number(start_time) or start_time < 0: raise ValueError("start_time must be a positive number.") effect_args = [ 'trim', '{:f}'.format(start_time) ] if end_time is not None: if not is_number(end_time) or end_time < 0: raise ValueError("end_time must be a positive number.") if start_time >= end_time: raise ValueError("start_time must be smaller than end_time.") effect_args.append('{:f}'.format(end_time - start_time)) self.effects.extend(effect_args) self.effects_log.append('trim') return self
[docs] def upsample(self, factor: int = 2): '''Upsample the signal by an integer factor: zero-value samples are inserted between each pair of input samples. As a result, the original spectrum is replicated into the new frequency space (imaging) and attenuated. The upsample effect is typically used in combination with filtering effects. Parameters ---------- factor : int, default=2 Integer upsampling factor. See Also -------- rate, downsample ''' if not isinstance(factor, int) or factor < 1: raise ValueError('factor must be a positive integer.') effect_args = ['upsample', '{}'.format(factor)] self.effects.extend(effect_args) self.effects_log.append('upsample') return self
[docs] def vad(self, location: Literal[1, -1] = 1, normalize: bool = True, activity_threshold: float = 7.0, min_activity_duration: float = 0.25, initial_search_buffer: float = 1.0, max_gap: float = 0.25, initial_pad: float = 0.0): '''Voice Activity Detector. Attempts to trim silence and quiet background sounds from the ends of recordings of speech. The algorithm currently uses a simple cepstral power measurement to detect voice, so may be fooled by other things, especially music. The effect can trim only from the front of the audio, so in order to trim from the back, the reverse effect must also be used. Parameters ---------- location : 1 or -1, default=1 If 1, trims silence from the beginning If -1, trims silence from the end normalize : bool, default=True If true, normalizes audio before processing. activity_threshold : float, default=7.0 The measurement level used to trigger activity detection. This may need to be cahnged depending on the noise level, signal level, and other characteristics of the input audio. min_activity_duration : float, default=0.25 The time constant (in seconds) used to help ignore short bursts of sound. initial_search_buffer : float, default=1.0 The amount of audio (in seconds) to search for quieter/shorter bursts of audio to include prior to the detected trigger point. max_gap : float, default=0.25 The allowed gap (in seconds) between quiteter/shorter bursts of audio to include prior to the detected trigger point initial_pad : float, default=0.0 The amount of audio (in seconds) to preserve before the trigger point and any found quieter/shorter bursts. See Also -------- silence Examples -------- >>> tfm = sox.Transformer() Remove silence from the beginning of speech >>> tfm.vad(initial_pad=0.3) Remove silence from the end of speech >>> tfm.vad(location=-1, initial_pad=0.2) ''' if location not in [-1, 1]: raise ValueError("location must be -1 or 1.") if not isinstance(normalize, bool): raise ValueError("normalize muse be a boolean.") if not is_number(activity_threshold): raise ValueError("activity_threshold must be a number.") if not is_number(min_activity_duration) or min_activity_duration < 0: raise ValueError("min_activity_duration must be a positive number") if not is_number(initial_search_buffer) or initial_search_buffer < 0: raise ValueError("initial_search_buffer must be a positive number") if not is_number(max_gap) or max_gap < 0: raise ValueError("max_gap must be a positive number.") if not is_number(initial_pad) or initial_pad < 0: raise ValueError("initial_pad must be a positive number.") effect_args = [] if normalize: effect_args.append('norm') if location == -1: effect_args.append('reverse') effect_args.extend([ 'vad', '-t', '{:f}'.format(activity_threshold), '-T', '{:f}'.format(min_activity_duration), '-s', '{:f}'.format(initial_search_buffer), '-g', '{:f}'.format(max_gap), '-p', '{:f}'.format(initial_pad) ]) if location == -1: effect_args.append('reverse') self.effects.extend(effect_args) self.effects_log.append('vad') return self
[docs] def vol(self, gain: float, gain_type: Literal['amplitude', 'power', 'db'] = 'amplitude', limiter_gain: Optional[float] = None): '''Apply an amplification or an attenuation to the audio signal. Parameters ---------- gain : float Interpreted according to the given `gain_type`. If `gain_type' = 'amplitude', `gain' is a positive amplitude ratio. If `gain_type' = 'power', `gain' is a power (voltage squared). If `gain_type' = 'db', `gain' is in decibels. gain_type : string, default='amplitude' Type of gain. One of: - 'amplitude' - 'power' - 'db' limiter_gain : float or None, default=None If specified, a limiter is invoked on peaks greater than `limiter_gain' to prevent clipping. `limiter_gain` should be a positive value much less than 1. See Also -------- gain, compand ''' if not is_number(gain): raise ValueError('gain must be a number.') if limiter_gain is not None: if (not is_number(limiter_gain) or limiter_gain <= 0 or limiter_gain >= 1): raise ValueError( 'limiter gain must be a positive number less than 1' ) if gain_type in ['amplitude', 'power'] and gain < 0: raise ValueError( "If gain_type = amplitude or power, gain must be positive." ) effect_args = ['vol'] effect_args.append('{:f}'.format(gain)) if gain_type == 'amplitude': effect_args.append('amplitude') elif gain_type == 'power': effect_args.append('power') elif gain_type == 'db': effect_args.append('dB') else: raise ValueError('gain_type must be one of amplitude power or db') if limiter_gain is not None: if gain_type in ['amplitude', 'power'] and gain > 1: effect_args.append('{:f}'.format(limiter_gain)) elif gain_type == 'db' and gain > 0: effect_args.append('{:f}'.format(limiter_gain)) self.effects.extend(effect_args) self.effects_log.append('vol') return self