/* Generate a vocoder sound. Like Kraftwerk and Doomo Arigato, Mr. Roboto. * * Not currently working; produces what sounds like a series of * impulses on output due to what I think is oscillator overflow. * * To use, on a Linux system with ALSA: arecord | ./vocoder | aplay * * The basic approach is that you use two identical filter banks; one * of them measures the spectral envelope of the voice signal, while * the other imposes that same spectral envelope on the carrier * signal. Most of the energy of a human voice is in a range of about * 75Hz to about 3000Hz, a ratio of 40, and a typical music vocoder * has about 10 bands, which suggests that each filter band should * cover somewhere around half an octave, for a Q of somewhere around * 2. * * I want to use a minimal amount of computation so that this is * practical to run even on a machine with fairly limited computing * power, such as an Arduino, which, however, does have a two-cycle * 8-bit multiply instruction. The approach that occurs to me, in my * ignorance of filters and DSP, is to use a kind of IIR filter that * simulates a damped resonator as the filter, which works like this: */ typedef short samp; static samp high_bits(samp in) { return in >> 8; } typedef struct { samp sf, df, s, c; } osc; static void run_osc(osc *o, samp s) { o->s += high_bits(o->c * o->sf); o->s += s >> 1; o->c -= high_bits(o->s * o->sf); o->c -= high_bits(o->c * o->df); } /* s and c are the sine and cosine coordinates of a rotating phasor, * which follows a slightly elliptical path here due to the fact that * we subtract the scaled new sine and add the scaled old cosine. * Perhaps they should be called x and y. sf is the "sine factor", * which is basically the sine of the angle through which the phasor * rotates on each sample, scaled with respect to high_bits (i.e. 1.0 * would be represented as 256, and 128 would represent 0.5.) df is * the "decay factor", which determines the Q factor of the system; * like sf, it's scaled. 0 would mean no dissipation, i.e. an * infinite Q, while 256 (representing 1.0) would zero the entire * cosine part of the phasor in every cycle, decaying the entire * energy to zero every quarter-turn of the phasor, which I think * would make it "critically damped". I'm not sure exactly how to * convert between Q and df. * * If we're dealing with 8ksps and we want a resonant frequency of * 90Hz, we want the phasor to rotate 90/8000 = 0.0113 rotations on * every sample, or about 0.071 radians, whose sine is pretty much * 0.071 too. So the relevant sf would be 0.071*256 = 18. Repeating * this calculation in Python: >>> [int(round(math.sin(90*((3000/75)**(1/10))**i/8000*math.pi*2)*256)) for i in range(10)] [18, 26, 38, 54, 78, 111, 154, 206, 250, 237] * * The last value there is a bit dubious because it's totally not * going to work the way we want it to; it's going to rotate the * phasor by *less* than π/4 radians, not *more*. I'm not really sure * the last several will work either, actually. * * Wikipedia tells me that Q is 2π(energy stored / energy dissipated * per cycle), which I guess is just (energy stored / energy * dissipated per radian). If we want a Q of around 2, and we're * rotating 0.071 radians per sample, then we want to diminish the * amplitude by about 1/√2 in a radian, which is about 44 samples, * which I think means we should reduce the amplitude by about 0.8% * each sample, except df is only reducing the cosine half of it, so * maybe it should be 2·0.8% or √2·0.8%, so like 1.5% or something. * Anyway that works out to a df of about 4. That makes me think that * maybe we're going to be sad about using 256 instead of 512 or * something as the scaling factor for our fixed-point math, since we * can't adjust our Q very precisely. But maybe that's only a problem * for the low-frequency resonators, since you want to increase the * decay factor proportional with the sine factor to keep the same Q, * right? >>> [int(round(.015 * 256 / 18 * int(round(math.sin(90*((3000/75)**(1/10))**i/8000*math.pi*2)*256)))) for i in range(10)] [4, 6, 8, 12, 17, 24, 33, 44, 53, 51] */ osc filter_bank[] = { {18, 4}, {26, 6}, {38, 8}, {54, 12}, {78, 17}, {111, 24}, {154, 33}, {206, 44}, {250, 53}, }; /* Okay, so let's see if that gives us some kind of reasonable * analysis of a voice's formants. */ #define RHO(x) (sizeof(x)/sizeof((x)[0])) #include #include void draw_equalizer() { fprintf(stderr, "\033[0H"); for (int ii = 0; ii < RHO(filter_bank); ii++) { osc *o = &filter_bank[ii]; long amp = sqrt(o->s*o->s + o->c*o->c) / 400.0; int jj = 0; for (; jj < amp; jj++) fprintf(stderr, "#"); for (; jj < 100; jj++) fprintf(stderr, " "); fprintf(stderr, "\n"); } } void graphic_equalizer() { int tt = 0; for (;;) { samp ss = (unsigned char)getchar(); for (int ii = 0; ii < RHO(filter_bank); ii++) { run_osc(&filter_bank[ii], ss); } if ((tt++ & 255) == 0) draw_equalizer(); putchar(high_bits(filter_bank[0].s)); fflush(stdout); } } int main() { graphic_equalizer(); return 0; }