// Approximate a sine wave as closely as possible with samples from
// the set {-1, 0, +1}, using genetic algorithms.  But what does
// “closely” mean?

// Sine power divided by total power, i.e., dot product divided by
// squared magnitude, turns out to be the wrong answer, because it
// produces a positive impulse and a negative one:
// Winner (quality 1.000):
// +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0

// Sine power by itself is also wrong, producing a square wave:
// Winner (quality 33.731):
// +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

// Halfway in between, sine power divided by magnitude (square root of
// total power) seems more reasonable:

// Winner (quality 4.945):
// +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +0 +0 +0

// Or, with 64 points:
// Winner (quality 5.433):
// +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +0 +0 +0 +0

// That’s 5 zeroes, 23 +1s, 9 zeroes, 23 -1s, and 4 zeroes.  Those
// were not numbers I was expecting.  For 100 points I got 37% +1s,
// 37% -1s, and two 13% runs of zeroes.  For 1024, 66 0s, 381 +1s
// (37.2%), 131 0s, 381 -1s, and 65 zeroes.  These don’t seem to be
// any sort of simple rational expression.  They’re not that far from
// ⅜ +1, ⅛ 0, ⅜ -1, and another ⅛ 0, but that’s definitely not the
// exact value.

// Upon thinking about it a bit more, I decided that clearly the
// correct answer is the sum of the squares of the *differences*
// between the target wave and this one.  So I've deleted all the
// dot-product code.

// That gives these answers, which are almost the same as before, but not
// quite:
// +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +0 +0 +0 +0
// +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +0 +0 +0 +0 +0
// +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +0 +0 +0 +0 +0 +0 +0 +0

// That last one is 9 0s, 33 +1s, 17 0s, 33 -1s, and another 8 0s.  So
// it’s one-third high, one-third low, and one-third in between.

// But what if we suppose our signal gets low-pass filtered?  In that
// case we can do better.  Our total squared error for the 256 case is
// 16.382:
// $ ./brutewave 256
// Winner (quality -16.382):
// +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0

// But if each new sample only shifts the level by 7%, we can do 100×
// better (40 dB lower THD):
// $ ./brutewave -s .93 256
// Winner (quality -0.163):
// +0 +1 +0 +1 +0 +0 +1 +0 +1 +0 +1 +1 +0 +1 +1 +0 +1 +1 +0 +1 +1 +1 +0 +1 +1 +1 +1 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +0 +1 +1 +1 +0 +1 +0 +1 +1 +0 +1 +1 +0 +1 +0 +1 +1 +1 +0 +0 +0 +1 +0 +1 +0 +1 +0 +0 +0 +0 +0 +1 +0 +0 +0 +0 +0 -1 +1 +0 -1 +1 +0 -1 +0 -1 +1 -1 +0 +0 -1 -1 +0 +0 -1 +0 +0 -1 +0 -1 -1 +0 -1 +0 -1 -1 -1 -1 +0 -1 -1 -1 +0 -1 -1 -1 +0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +0 -1 -1 -1 -1 +0 -1 -1 -1 -1 +0 -1 -1 +0 -1 -1 +0 +0 -1 -1 +0 +0 -1 +0 -1 +0 -1 +0 +0 -1 +0 +0 +0 +0 -1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +0 +0 +0 +1

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

enum { pop_size = 1024, num_generations = 1024 };
static float quality[pop_size];
static int rank[pop_size];

// For qsort.
static int compar(const void *a, const void *b)
{
  const int *ai = a, *bi = b;
  float result = quality[*bi] - quality[*ai];
  return result < 0 ? -1 : result > 0 ? 1 : 0;
}

int main(int argc, char **argv)
{
  char **args = argv + 1;
  double smoothing = 0;
  if (argc > 2 && 0 == strcmp(args[0], "-s")) {
    smoothing = atof(args[1]);
    args += 2;
    argc -= 2;
  }

  if (argc != 2) {
    fprintf(stderr, "Usage: %s [-s .5] 53\n"
            "53 is the number of points in the sample interval.\n"
            ".5 is a single-pole RC filter smoothing factor, default 0.\n",
            argv[0]);
    return 1;
  }
  
  int n = atoi(args[0]);
  int *pop = malloc(sizeof(*pop) * n * pop_size);
  if (!pop) {
    perror("malloc");
    return 2;
  }

  double amplitude = 1;
  double *ref = malloc(sizeof(*ref) * n);
  if (!ref) {
    perror("malloc 2");
    return 3;
  }

  for (int i = 0; i < n; i++) {
    ref[i] = amplitude * sin(i * 2 * M_PI / n);
  }

  // Initialize random population members.
  srandom(53);
  for (int j = 0; j < pop_size; j++) {
    int *a = pop + j * n;
    for (int i = 0; i < n; i++) {
      a[i] = random() % 3 - 1;
    }
  }

  for (int k = 0; k < num_generations; k++) {
    // Measure quality of all entrants.
    for (int j = 0; j < pop_size; j++) {
      rank[j] = j;              // Initialize array for later sorting
      int *a = pop + j * n;
      double diffsq = 0, v = 0;
      for (int i = 0; i < n; i++) {
        v = a[i] * (1 - smoothing) + v * smoothing;
        float diff = v - ref[i];
        diffsq += diff * diff;
      }
      quality[j] = -diffsq;
    }

    qsort(rank, pop_size, sizeof *rank, compar);

    if (k == num_generations - 1) break;

    // Respawn the lower-scoring half of the population by crossover
    // on the higher-scoring half.
    int cutoff = pop_size / 2;
    for (int j = cutoff; j < pop_size; j++) {
      int *parent1 = pop + rank[random() % cutoff] * n,
        *parent2 = pop + rank[random() % cutoff] * n,
        *dest = pop + rank[j] * n;
      for (int i = 0; i < n; i++) {
        dest[i] = random() % 2 ? parent1[i] : parent2[i];
      }
    }

    // Now mutate everything.
    for (int i = 0; i < n * pop_size; i++) {
      if (random() % (n * 4) == 0) {
        pop[i] = random() % 3 - 1;
      }
    }
  }

  int *winner = pop + rank[0] * n;
  printf("Winner (quality %.3f):\n", quality[rank[0]]);
  for (int i = 0; i < n; i++) {
    printf("%s%+d", i ? " " : "", winner[i]);
  }
  printf("\n");

  return 0;
}