#!/usr/bin/python
# -*- coding: utf-8 -*-
"""Haar transform code.
I’d heard about wavelets for a long time. What I’d forgotten, or
perhaps never understood, was that the fast wavelet transform with the
Haar wavelet is not only linear-time, but doesn’t involve any
multiplications or divisions; so you could quite plausibly envision
doing it to large quantities of data on a microcontroller.
It seems like distinguishing vowels in speech requires better than an
octave of discrimination: more like a ratio of 1:1.4 or so, i.e. √2.
I suspect you can get that much discrimination as follows:
- Do the Haar transform once on the original signal.
- Do it a second time on the signal downsampled by 1:1.4. (You can do
this just by dropping samples, using Bresenham’s line-drawing
algorithm to choose the samples.)
- Downsample the Haar transform outputs to the timescale at which
you’re interested in the frequency data. This should substantially
increase the frequency precision of the result by looking at more
than one oscillation at a time.
- Intersperse the two different sets of frequency data.
It may be useful to do this at four sets of frequencies instead of
two.
If you analyze a 30th of a second at 16ksps, you have 533 samples. If
I round that to 512 (how?) then doing the Haar transform on it takes
256 additions, 256 doublings, and 256 subtractions for the first pass,
and each value thereafter produced (511 in all) must similarly be
added, doubled, and subtracted one-half-time each. This is a total of
1533 or so basic operations, or about three per sample, and this can
be done in 1024 bytes of RAM if the samples are 8 bits.
If we quadruple that, we end up with a dozen double-precision
operations per sample, and probably 50 machine instructions. That’s
about 25000 machine instructions in 33ms, or 750 000 per second.
That’s within the computational capacity of even the dinkiest PIC,
although they don’t have enough RAM. Even the ATMega8 or ATMega168
used on the original Arduino Duemilanove couldn’t handle it, but an
ATMega324 should be able to. Too bad those are 40-pin packages instead
of 28-pin. The ATMega328P is 28-pin and has 2K of RAM, and although
Digi-Key doesn’t stock them, lots of other places do, and it looks
like it’s the new standard chip for the Duemilanove. (It’s
pin-compatible with the ATMega168PA.)
...this is all maybe pointless because those chips now have 2-cycle
multipliers.
You can buy an Arduino-compatible kit for US$12.50:
There’s a local distributor, but with terrible prices:
.
"""
from __future__ import division
import sys
def haar(vals):
"Perform the 1-D fast Haar transform."
if len(vals) == 1:
return vals
avgs = [(a+b)/2 for a, b in zip(vals[::2], vals[1::2])]
residues = [a - avg for a, avg in zip(vals[::2], avgs)]
return haar(avgs) + residues
def invhaar(vals):
"Invert the 1-D fast Haar transform."
if len(vals) == 1:
return vals
avgs = invhaar(vals[:len(vals)//2])
rv = [0] * len(vals)
residues = vals[len(vals)//2:]
rv[::2] = [avg + res for avg, res in zip(avgs, residues)]
rv[1::2] = [avg - res for avg, res in zip(avgs, residues)]
return rv
def inthaar(vals):
"""Perform a modification of the 1-D fast Haar transform without fractions.
The idea here is that each successive step of 'averaging' is done at
a larger scale, to avoid any truncation errors.
"""
if len(vals) == 1:
return vals
avgs = [a+b for a, b in zip(vals[::2], vals[1::2])]
residues = [2*a - avg for a, avg in zip(vals[::2], avgs)]
return inthaar(avgs) + residues
def invinthaar(vals):
"Invert inthaar."
if len(vals) == 1:
return vals
avgs = invinthaar(vals[:len(vals)//2])
rv = [0] * len(vals)
residues = vals[len(vals)//2:]
rv[::2] = [(avg + res)//2 for avg, res in zip(avgs, residues)]
rv[1::2] = [(avg - res)//2 for avg, res in zip(avgs, residues)]
return rv
def numinthaar(vals):
"Version of inthaar using Numeric for speed and concision."
import Numeric
vals = Numeric.asarray(vals)
if len(vals) == 1:
return vals
avgs = vals[::2] + vals[1::2]
return Numeric.concatenate((numinthaar(avgs), 2*vals[::2] - avgs))
_vals = [3,2,13,2,1,0,15,10]
_hvals = haar(_vals)
_ihvals = invhaar(_hvals)
assert _ihvals == _vals, (_ihvals, _vals)
_hvals = inthaar(_vals)
_ihvals = invinthaar(_hvals)
assert _ihvals == _vals, (_ihvals, _vals)
if __name__ == '__main__':
data = map(ord, file(sys.argv[1]).read())
print inthaar(data)