#!/usr/bin/python3
"""Matrix multiplication using only addition.

I ran across <https://news.ycombinator.com/item?id=36640148> today,
linking <https://arxiv.org/abs/2307.01415>, and there's an interesting
idea there I hadn't seen before:

1. Forward differences are linear: k times the forward differences of
a vector are the forward differences of k times the vector.

2. Permutation is linear: k times some permutation of a vector is the
same permutation of k times the vector.

3. Expansion by duplicating particular elements is linear: k times the
expansion of a vector v produced by duplicating each item vᵢ some
number of times nᵢ is the corresponding expansion of k times that
vector.

4. If your precision is finite, forward differences, sorting, and
duplicate elimination reduces the number of values in your vector by a
lot.

So fairly quickly you can reduce a sizeable vector down to a single
number, which you can then multiply by some factor with a normal
multiplier (not sure if this last step is in the paper); reversing the
process yields a scaled version of the original vector, using a number
of addition and permutation operations per input item.

The author explains:

> fgemm only really makes sense when matrices contain over 1000
  elements per row or column, not sure how much more than 1000 per
  vector but more than that. Small and in particular small and dense
  matrices are still best multiplied exactly the way GPUs multiply
  them, with many floating-point multiplier circuits in parallel

"""
import random

def randvec(n, maxval=None):
    if maxval is None:
        maxval = n
    return [random.randrange(maxval) for i in range(n)]

def diffs(v):
    prev = 0
    for vi in v:
        yield vi - prev
        prev = vi

def uniq(v):
    v = iter(v)
    prev = next(v)
    yield prev
    for vi in v:
        if vi == prev:
            continue
        yield vi
        prev = vi

def sortu(v):
    return uniq(sorted(v))

def reductions(v):
    while len(v) >= 3:
        v = list(sortu(v))
        yield v
        v = list(diffs(v))

def normalize(v):
    for vi in v:
        while vi < 2**256:
            vi *= 2
        yield vi

def align(v):
    for vi in v:
        while not vi % 2:
            vi //= 2
        yield vi

def nreductions(v):
    while True:
        v = list(sortu(normalize(v)))
        yield v
        if len(v) == 1:
            break
        v = list(diffs(v))

def areductions(v):
    while True:
        v = list(sortu(align(v)))
        yield v
        if len(v) == 1:
            break
        v = list(diffs(v))
    

if __name__ == '__main__':
    v = randvec(10000, maxval=2**256)#10**10)#2**32)
    vj = areductions(v)

    for i in range(20000):
        n = len(v)
        if n > 20:
            print(n)
        else:
            print(n, v)
        if len(v) < 2:
            break
        v = next(vj)