/* Generate a vocoder sound. Like Kraftwerk and Doomo Arigato, Mr. Roboto.
*
* Not currently working; produces what sounds like a series of
* impulses on output due to what I think is oscillator overflow.
*
* To use, on a Linux system with ALSA:
arecord | ./vocoder | aplay
*
* The basic approach is that you use two identical filter banks; one
* of them measures the spectral envelope of the voice signal, while
* the other imposes that same spectral envelope on the carrier
* signal. Most of the energy of a human voice is in a range of about
* 75Hz to about 3000Hz, a ratio of 40, and a typical music vocoder
* has about 10 bands, which suggests that each filter band should
* cover somewhere around half an octave, for a Q of somewhere around
* 2.
*
* I want to use a minimal amount of computation so that this is
* practical to run even on a machine with fairly limited computing
* power, such as an Arduino, which, however, does have a two-cycle
* 8-bit multiply instruction. The approach that occurs to me, in my
* ignorance of filters and DSP, is to use a kind of IIR filter that
* simulates a damped resonator as the filter, which works like this:
*/
typedef short samp;
static samp
high_bits(samp in) { return in >> 8; }
typedef struct { samp sf, df, s, c; }
osc;
static void
run_osc(osc *o, samp s)
{
o->s += high_bits(o->c * o->sf);
o->s += s >> 1;
o->c -= high_bits(o->s * o->sf);
o->c -= high_bits(o->c * o->df);
}
/* s and c are the sine and cosine coordinates of a rotating phasor,
* which follows a slightly elliptical path here due to the fact that
* we subtract the scaled new sine and add the scaled old cosine.
* Perhaps they should be called x and y. sf is the "sine factor",
* which is basically the sine of the angle through which the phasor
* rotates on each sample, scaled with respect to high_bits (i.e. 1.0
* would be represented as 256, and 128 would represent 0.5.) df is
* the "decay factor", which determines the Q factor of the system;
* like sf, it's scaled. 0 would mean no dissipation, i.e. an
* infinite Q, while 256 (representing 1.0) would zero the entire
* cosine part of the phasor in every cycle, decaying the entire
* energy to zero every quarter-turn of the phasor, which I think
* would make it "critically damped". I'm not sure exactly how to
* convert between Q and df.
*
* If we're dealing with 8ksps and we want a resonant frequency of
* 90Hz, we want the phasor to rotate 90/8000 = 0.0113 rotations on
* every sample, or about 0.071 radians, whose sine is pretty much
* 0.071 too. So the relevant sf would be 0.071*256 = 18. Repeating
* this calculation in Python:
>>> [int(round(math.sin(90*((3000/75)**(1/10))**i/8000*math.pi*2)*256))
for i in range(10)]
[18, 26, 38, 54, 78, 111, 154, 206, 250, 237]
*
* The last value there is a bit dubious because it's totally not
* going to work the way we want it to; it's going to rotate the
* phasor by *less* than π/4 radians, not *more*. I'm not really sure
* the last several will work either, actually.
*
* Wikipedia tells me that Q is 2π(energy stored / energy dissipated
* per cycle), which I guess is just (energy stored / energy
* dissipated per radian). If we want a Q of around 2, and we're
* rotating 0.071 radians per sample, then we want to diminish the
* amplitude by about 1/√2 in a radian, which is about 44 samples,
* which I think means we should reduce the amplitude by about 0.8%
* each sample, except df is only reducing the cosine half of it, so
* maybe it should be 2·0.8% or √2·0.8%, so like 1.5% or something.
* Anyway that works out to a df of about 4. That makes me think that
* maybe we're going to be sad about using 256 instead of 512 or
* something as the scaling factor for our fixed-point math, since we
* can't adjust our Q very precisely. But maybe that's only a problem
* for the low-frequency resonators, since you want to increase the
* decay factor proportional with the sine factor to keep the same Q,
* right?
>>> [int(round(.015 * 256 / 18 *
int(round(math.sin(90*((3000/75)**(1/10))**i/8000*math.pi*2)*256))))
for i in range(10)]
[4, 6, 8, 12, 17, 24, 33, 44, 53, 51]
*/
osc
filter_bank[] = {
{18, 4},
{26, 6},
{38, 8},
{54, 12},
{78, 17},
{111, 24},
{154, 33},
{206, 44},
{250, 53},
};
/* Okay, so let's see if that gives us some kind of reasonable
* analysis of a voice's formants.
*/
#define RHO(x) (sizeof(x)/sizeof((x)[0]))
#include
#include
void draw_equalizer()
{
fprintf(stderr, "\033[0H");
for (int ii = 0; ii < RHO(filter_bank); ii++) {
osc *o = &filter_bank[ii];
long amp = sqrt(o->s*o->s + o->c*o->c) / 400.0;
int jj = 0;
for (; jj < amp; jj++) fprintf(stderr, "#");
for (; jj < 100; jj++) fprintf(stderr, " ");
fprintf(stderr, "\n");
}
}
void
graphic_equalizer()
{
int tt = 0;
for (;;) {
samp ss = (unsigned char)getchar();
for (int ii = 0; ii < RHO(filter_bank); ii++) {
run_osc(&filter_bank[ii], ss);
}
if ((tt++ & 255) == 0) draw_equalizer();
putchar(high_bits(filter_bank[0].s));
fflush(stdout);
}
}
int
main()
{
graphic_equalizer();
return 0;
}