00001 /* 00002 * SpanDSP - a series of DSP components for telephony 00003 * 00004 * time_scale.h - Time scaling for linear speech data 00005 * 00006 * Written by Steve Underwood <steveu@coppice.org> 00007 * 00008 * Copyright (C) 2004 Steve Underwood 00009 * 00010 * All rights reserved. 00011 * 00012 * This program is free software; you can redistribute it and/or modify 00013 * it under the terms of the GNU General Public License as published by 00014 * the Free Software Foundation; either version 2 of the License, or 00015 * (at your option) any later version. 00016 * 00017 * This program is distributed in the hope that it will be useful, 00018 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00019 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00020 * GNU General Public License for more details. 00021 * 00022 * You should have received a copy of the GNU General Public License 00023 * along with this program; if not, write to the Free Software 00024 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 00025 * 00026 * $Id: time_scale.h,v 1.5 2005/11/24 13:04:52 steveu Exp $ 00027 */ 00028 00029 #if !defined(_TIME_SCALE_H_) 00030 #define _TIME_SCALE_H_ 00031 00032 /*! \page time_scale_page Time scaling speech 00033 \section time_scale_page_sec_1 What does it do? 00034 The time scaling module allows speech files to be played back at a 00035 different speed, from the speed at which they were recorded. If this 00036 were done by simply speeding up or slowing down replay, the pitch of 00037 the voice would change, and sound very odd. This modules keeps the pitch 00038 of the voice normal. 00039 00040 \section time_scale_page_sec_2 How does it work? 00041 The time scaling module is based on the Pointer Interval Controlled 00042 OverLap and Add (PICOLA) method, developed by Morita Naotaka. 00043 Mikio Ikeda has an excellent web page on this subject at 00044 http://keizai.yokkaichi-u.ac.jp/~ikeda/research/picola.html 00045 There is also working code there. This implementation uses 00046 exactly the same algorithms, but the code is a complete rewrite. 00047 Mikio's code batch processes files. This version works incrementally 00048 on streams, and allows multiple streams to be processed concurrently. 00049 */ 00050 00051 #define TIME_SCALE_MIN_PITCH 60 00052 #define TIME_SCALE_MAX_PITCH 250 00053 #define TIME_SCALE_BUF_LEN (2*SAMPLE_RATE/TIME_SCALE_MIN_PITCH) 00054 00055 typedef struct 00056 { 00057 double rate; 00058 double rcomp; 00059 double rate_nudge; 00060 int fill; 00061 int lcp; 00062 int16_t buf[TIME_SCALE_BUF_LEN]; 00063 } time_scale_t; 00064 00065 #ifdef __cplusplus 00066 extern "C" { 00067 #endif 00068 00069 /*! Initialise a time scale context. This must be called before the first 00070 use of the context, to initialise its contents. 00071 \brief Initialise a time scale context. 00072 \param s The time scale context. 00073 \param rate The ratio between the output speed and the input speed. 00074 \return 0 if initialised OK, else -1. */ 00075 int time_scale_init(time_scale_t *s, float rate); 00076 00077 /*! Change the time scale rate. 00078 \brief Change the time scale rate. 00079 \param s The time scale context. 00080 \param rate The ratio between the output speed and the input speed. 00081 \return 0 if changed OK, else -1. */ 00082 int time_scale_rate(time_scale_t *s, float rate); 00083 00084 /*! Time scale a chunk of audio samples. 00085 \brief Time scale a chunk of audio samples. 00086 \param s The time sclae context. 00087 \param out The output audio sample buffer. 00088 \param in The input audio sample buffer. 00089 \param len The number of input samples. 00090 \return The number of output samples. 00091 */ 00092 int time_scale(time_scale_t *s, int16_t out[], int16_t in[], int len); 00093 00094 #ifdef __cplusplus 00095 } 00096 #endif 00097 00098 #endif 00099 /*- End of file ------------------------------------------------------------*/