sophiaas
diff --git a/‎README.md
+25-1 b/‎README.md
+25-1
diff --git a/‎datasets.py
+13-43 b/‎datasets.py
+13-43
diff --git a/‎groups.py
+44-27 b/‎groups.py
+44-27
diff --git a/‎models_JAX.py
+93 b/‎models_JAX.py
+93
@@ -1 +1,25 @@
-# spectral-universality
+# Harmonics of Learning
+<p align="center">
+<img src="weights_rot.png" alt="Rotational harmonics" width="700" />
+</p>
+
+## Description
+Implementation of a complex-valued Power-Spectral Network trained via contrastive learning on an invariance objective for a finite group.
+As shown in the companion paper, at convergence the network learns all the irreducible unitary representations of the group. In particular, the multiplication table can be extracted from its weighs.
+
+
+We provide implementations of the model and its training in both `PyTorch` and in `JAX`.
+
+
+## Setup
+```
+python 3.8+
+pip install -r requirements.txt
+```
+
+
+## Groups
+The file `groups.py` provides implementations of various finite groups, including cyclic, dihedral and symmetric.
+
+## Train
+In order to train the models in `PyTorch` and in `JAX`, run the files `train_torch.py` and `train_JAX.py` respectively. The training parameters are set up at the beginning of these files.
@@ -1,61 +1,31 @@
 import torch
-from torchvision import transforms, datasets
-import numpy as np
+from numpy import random
 
 from groups import *
 
 
+"""
+Contrastive learning dataset for groups. A datapoint is a pair of (noisy) complex vectors in the same orbit. 
+"""
 class group_dset(torch.utils.data.Dataset):
-    def __init__(self, group, std=1.):
+    def __init__(self, group, std=1., noise=0.):
         self.group = group
         self.std = std
+        self.noise = noise
 
     def __getitem__(self, index):
-        x_re = self.std * torch.randn((self.group.order,))
-        x_im = self.std * torch.randn((self.group.order,))
-        # x_re = 2 * torch.rand((self.group.order,)) - 1.
-        # x_im = 2 * torch.rand((self.group.order,)) - 1.
-        x = torch.complex(x_re, x_im)
+        x_re = self.std * random.randn(self.group.order) 
+        x_im = self.std * random.randn(self.group.order)
+        x = x_re + 1j * x_im
         y = self.group.act(x)
+        
+        perturb_re = self.noise * random.randn(self.group.order) 
+        perturb_im = self.noise * random.randn(self.group.order) 
+        x += perturb_re + 1j * perturb_im
 
         return x, y
 
     def __len__(self):
         return 1000
 
 
-
-# class RegBiCyclic(torch.utils.data.Dataset):
-#     def __init__(self, A, B):
-#         self.A = A
-#         self.B = B
-
-#     def __getitem__(self, index):
-#         x_re_A = torch.randn((self.A,))
-#         x_im_A = torch.randn((self.A,))
-
-#         shift = torch.randint(low=0, high=self.A, size=(1,)).item()  #The index needs to start from 0 since in a product of groups the identities matter
-
-#         y_re_A = torch.roll(x_re_A, shift)
-#         y_im_A = torch.roll(x_im_A, shift)
-
-#         x_re_B = torch.randn((self.B,))
-#         x_im_B = torch.randn((self.B,))
-
-#         shift = torch.randint(low=0, high=self.B, size=(1,)).item()  #The index needs to start from 0 since in a product of groups the identities matter
-
-#         y_re_B = torch.roll(x_re_B, shift)
-#         y_im_B = torch.roll(x_im_B, shift)
-
-
-#         x_re = torch.cat([x_re_A, x_re_B], dim=-1)
-#         x_im =torch.cat([x_im_A, x_im_B], dim=-1)
-#         y_re = torch.cat([y_re_A, y_re_B], dim=-1)
-#         y_im =torch.cat([y_im_A, y_im_B], dim=-1)
-#         x = torch.complex(x_re, x_im)
-#         y = torch.complex(y_re, y_im)
-
-#         return x, y
-
-#     def __len__(self):
-#         return 1000
@@ -1,39 +1,47 @@
 import numpy as np
-import torch
+from numpy import random
 import math
 import itertools as it
 
 from utils import *
 
+
+"""
+Abstract class representing a finite group. 
+"""
 class abstr_group():
     def __init__(self, order, cayley_table, irrep_dims):
         self.order = order
         self.cayley_table = cayley_table
         self.irrep_dims = irrep_dims
 
     def act(self, x):
-        g = torch.randint(low=0, high=self.order, size=(1,)).item()
+        g = random.randint(low=0, high=self.order)
         return x[self.cayley_table[g]]
 
     def check_dims(self):
-        irrep_dims = torch.tensor(self.irrep_dims)
-        assert (irrep_dims**2).sum().item() == self.order
+        irrep_dims = np.array(self.irrep_dims)
+        assert (irrep_dims**2).sum() == self.order
 
 
-     
+"""
+Cyclic groups
+"""
 class cyclic(abstr_group):
     def __init__(self, N): 
         self.order = N
         self.irrep_dims = [1]*N
 
-        self.cayley_table = torch.zeros(N, N)
+        self.cayley_table = np.zeros((N, N))
         for i in range(N):
-            self.cayley_table[i] = torch.roll(torch.arange(0, N), -i)
-        self.cayley_table = self.cayley_table.long()
+            self.cayley_table[i] = np.roll(np.arange(0, N), -i)
+        self.cayley_table = self.cayley_table.astype(int)
 
 
 
-
+"""
+Dihedral groups
+"""
 class dihedral(abstr_group):
     def __init__(self, N):
         self.order = 2*N
@@ -44,19 +52,19 @@ def __init__(self, N):
             self.irrep_dims = [1]*2 + [2]*int((N - 1) / 2)
 
 
-        reflection = torch.Tensor([0] + [N-i for i in range(1, N)]).long()
-        self.group_elems = torch.zeros(2*N, N)
+        reflection = np.array([0] + [N-i for i in range(1, N)]).astype(int)
+        self.group_elems = np.zeros((2*N, N))
         for i in range(N):
-            cycle = torch.roll(torch.arange(0, N), i)
+            cycle = np.roll(np.arange(0, N), i)
             self.group_elems[i] = cycle
             self.group_elems[N+i] = cycle[reflection]
-        self.group_elems = self.group_elems.long()
+        self.group_elems = self.group_elems.astype(int)
 
-        self.cayley_table = torch.zeros(2*N, 2*N)
+        self.cayley_table = np.zeros((2*N, 2*N))
         for i in range(2*N):
             for j in range(2*N):
                 comp = self.group_elems[i][self.group_elems[j]]
-                self.cayley_table[i, j] = torch.argmin( ((comp.unsqueeze(0) - self.group_elems)**2).sum(-1) )
+                self.cayley_table[i, j] = np.argmin( ((np.expand_dims(comp, 0) - self.group_elems)**2).sum(-1) )
 
         if N == 2:
             C = [
@@ -65,48 +73,54 @@ def __init__(self, N):
                 [2, 3, 0, 1],
                 [3, 2, 1, 0]
                  ]
-            self.cayley_table = torch.Tensor(C)
-
-        self.cayley_table = self.cayley_table.long()
+            self.cayley_table = np.array(C)
 
+        self.cayley_table = self.cayley_table.astype(int)
 
 
+"""
+Symmetric groups
+"""
 class symmetric(abstr_group):
     def __init__(self, N):
         self.order = math.factorial(N)
 
         self.irrep_dims = [hook_length(P, N) for P in list(gen_partitions(N))]
 
-        self.group_elems = torch.zeros(self.order, N)
+        self.group_elems = np.zeros((self.order, N))
         for i, perm in enumerate(it.permutations(range(N))):
-            self.group_elems[i] = torch.Tensor(list(perm))
-        self.group_elems = self.group_elems.long()
+            self.group_elems[i] = np.array(list(perm))
+        self.group_elems = self.group_elems.astype(int)
 
-        self.cayley_table = torch.zeros(self.order, self.order)
+        self.cayley_table = np.zeros((self.order, self.order))
         for i in range(self.order):
             for j in range(self.order):
                 comp = self.group_elems[i][self.group_elems[j]]
-                self.cayley_table[i, j] = torch.argmin( ((comp.unsqueeze(0) - self.group_elems)**2).sum(-1) )
+                self.cayley_table[i, j] = np.argmin( ((np.expand_dims(comp, 0) - self.group_elems)**2).sum(-1) )
+
+        self.cayley_table = self.cayley_table.astype(int)
 
-        self.cayley_table = self.cayley_table.long()
 
 
+"""
+Direct product of groups
+"""
 def direct_product(group_1, group_2): 
     order_1 = group_1.order    
     order_2 = group_2.order
     order_res = order_1 * order_2
 
     cayley_1 = group_1.cayley_table
     cayley_2 = group_2.cayley_table
-    cayley_res = torch.zeros(order_res, order_res)
+    cayley_res = np.zeros((order_res, order_res))
     for i_1 in range(order_1):
         for i_2 in range(order_2):
                 for j_1 in range(order_1):
                     for j_2 in range(order_2):
                         g_1 = cayley_1[i_1, j_1]
                         g_2 = cayley_2[i_2, j_2]
                         cayley_res[i_1*order_2 + i_2, j_1*order_2 + j_2] = g_1*order_2 + g_2
-    cayley_res = cayley_res.long()
+    cayley_res = cayley_res.astype(int)
 
     irrep_dims_1 = group_1.irrep_dims
     irrep_dim_2 = group_2.irrep_dims
@@ -115,4 +129,7 @@ def direct_product(group_1, group_2):
         for d_2 in irrep_dim_2:
             irrep_dims_res.append(d_1 * d_2)
 
-    return abstr_group(order_res, cayley_res, irrep_dims_res)
+    return abstr_group(order_res, cayley_res, irrep_dims_res)
+
+
+
@@ -0,0 +1,93 @@
+import jax.numpy as jnp
+import jax
+from jax.lax import complex
+
+from utils import *
+
+
+
+# initializer = jax.nn.initializers.glorot_uniform(in_axis=-3, out_axis=-2)
+
+initializer = jax.nn.initializers.uniform(scale=1.)
+
+def init_weights(group_order, irrep_dims):
+    keys = jax.random.split(jax.random.PRNGKey(42), len(irrep_dims))
+    return [(2. / d_i) * initializer(k, (group_order - 1, d_i, d_i, 2), jnp.float32) - (1. / d_i)
+            for k, d_i in zip(keys, irrep_dims)
+        ]
+
+
+def pad_eye(W_i):
+    d_i = W_i.shape[-1]
+    eyecm = complex(jnp.eye(d_i), jnp.zeros((d_i, d_i)))
+    return jnp.concatenate([jnp.expand_dims(eyecm, 0), W_i], 0)
+
+
+def total_weight(W, irrep_dims, group_order):
+    W_list = []
+    for W_i, d_i in zip(W, irrep_dims):
+        Wcm = complex(W_i[..., 0], W_i[..., 1])
+        W_cm_ext = jnp.reshape(pad_eye(Wcm), (group_order, d_i * d_i))
+        W_list.append(W_cm_ext)
+    return jnp.concatenate(W_list, -1)
+
+
+
+def forward(W, x):
+    res = []
+    for W_i in W:
+        Wcm = complex(W_i[..., 0], W_i[..., 1])
+        W_cm_ext = pad_eye(Wcm)
+
+        W_i_x = (jnp.expand_dims(W_cm_ext, 0) * jnp.expand_dims(jnp.expand_dims(x, -1), -1)).sum(1)
+        W_i_x_T = jnp.conjugate(jnp.transpose(W_i_x, axes=(0, -1, -2)))
+
+        res.append(W_i_x @ W_i_x_T)
+    return res
+   
+
+
+def loss(W, x, y):
+    res_x = forward(W, x)
+    res_y = forward(W, y)
+
+    res_loss = jnp.zeros(x.shape[0])
+    for (res_x_i, res_y_i) in zip(res_x, res_y):
+        res_loss += (jnp.abs((res_x_i - res_y_i))**2).mean(-1).mean(-1)
+    
+    return res_loss / len(res_x)
+    
+
+def reg(W, irrep_dims, group_order):
+    d_tot = jnp.array(irrep_dims).sum()
+    eyecm = (d_tot) * complex(jnp.eye(group_order), jnp.zeros((group_order, group_order)))
+
+    W_tot = total_weight(W, irrep_dims, group_order)
+    W_tot_T = jnp.conjugate(jnp.transpose(W_tot, axes=(-1, -2)))
+    return (jnp.abs((eyecm - W_tot @ W_tot_T ))**2).mean()
+
+
+
+"""
+Function recovering the Cayley table from the weights of the model
+"""
+def get_table(W, group_order):
+
+    res = jnp.zeros((group_order, group_order))
+    for g in range(group_order):
+        for h in range(group_order):
+                
+            diffs = jnp.zeros(group_order)
+            for W_i in W:
+                Wcm = complex(W_i[..., 0], W_i[..., 1])
+                W_cm_ext = jnp.conjugate(jnp.transpose(pad_eye(Wcm), axes=(0, -1, -2)))
+                W_gh = W_cm_ext[g] @ W_cm_ext[h]
+                diffs += (jnp.abs(jnp.expand_dims(W_gh, 0) - W_cm_ext)**2).mean(-1).mean(-1)
+            
+            res = res.at[g, h].set(jnp.argmin(diffs))
+
+    return res
+
+
+
+