# Import modules
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

# Import PySwarms
import pyswarms as ps
import misc

def logits_function(p):
    """ Calculate roll-back the weights and biases

    Inputs
    ------
    p: np.ndarray
        The dimensions should include an unrolled version of the
        weights and biases.

    Returns
    -------
    numpy.ndarray of logits for layer 2

    """
    # Neural network architecture
    n_inputs = 2 ** n
    n_hidden = 2 ** (n + 1)
    n_classes = 2 ** n

    # Roll-back the weights and biases
    W1 = p[0:n_inputs * n_hidden].reshape((n_inputs, n_hidden))
    b1 = p[n_inputs * n_hidden:n_inputs * n_hidden + n_hidden].reshape((n_hidden,))
    W2 = p[n_inputs * n_hidden + n_hidden:n_inputs * n_hidden + n_hidden + n_hidden * n_classes].reshape(
        (n_hidden, n_classes))
    b2 = p[
         n_inputs * n_hidden + n_hidden + n_hidden * n_classes:n_inputs * n_hidden + n_hidden + n_hidden * n_classes + n_classes].reshape(
        (n_classes,))

    # Perform forward propagation
    z1 = x_train_ho.dot(W1) + b1  # Pre-activation in Layer 1
    a1 = np.tanh(z1)  # Activation in Layer 1
    z2 = a1.dot(W2) + b2  # Pre-activation in Layer 2
    logits = z2  # Logits for Layer 2

    return logits

# Forward propagation
def forward_prop(params):
    """Forward propagation as objective function

    This computes for the forward propagation of the neural network, as
    well as the loss.

    Inputs
    ------
    params: np.ndarray
        The dimensions should include an unrolled version of the
        weights and biases.

    Returns
    -------
    float
        The computed negative log-likelihood loss given the parameters
    """

    logits = logits_function(params)

    # Compute for the softmax of the logits
    exp_scores = np.exp(logits)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

    # Compute for the negative log likelihood

    corect_logprobs = -np.log(probs[range(int(samples/n)), hot_one_indices])
    loss = np.sum(corect_logprobs) / samples/n

    return loss

def f(x):
    """Higher-level method to do forward_prop in the
    whole swarm.

    Inputs
    ------
    x: numpy.ndarray of shape (n_particles, dimensions)
        The swarm that will perform the search

    Returns
    -------
    numpy.ndarray of shape (n_particles, )
        The computed loss for each particle
    """
    n_particles = x.shape[0]
    j = [forward_prop(x[i]) for i in range(n_particles)]
    return np.array(j)

def predict(X, pos, n):
    """
    Use the trained weights to perform class predictions.

    Inputs
    ------
    X: numpy.ndarray
        Input Iris dataset
    pos: numpy.ndarray
        Position matrix found by the swarm. Will be rolled
        into weights and biases.
    """
    # Neural network architecture
    n_inputs = 2 ** n
    n_hidden = 2 ** (n + 1)
    n_classes = 2 ** n

    # Roll-back the weights and biases
    W1 = pos[0:n_inputs * n_hidden].reshape((n_inputs, n_hidden))
    b1 = pos[n_inputs * n_hidden:n_inputs * n_hidden + n_hidden].reshape((n_hidden,))
    W2 = pos[n_inputs * n_hidden + n_hidden:n_inputs * n_hidden + n_hidden + n_hidden * n_classes].reshape(
        (n_hidden, n_classes))
    b2 = pos[
         n_inputs * n_hidden + n_hidden + n_hidden * n_classes:n_inputs * n_hidden + n_hidden + n_hidden * n_classes + n_classes].reshape(
        (n_classes,))

    # Perform forward propagation
    z1 = X.dot(W1) + b1  # Pre-activation in Layer 1
    a1 = np.tanh(z1)     # Activation in Layer 1
    z2 = a1.dot(W2) + b2 # Pre-activation in Layer 2
    logits = z2          # Logits for Layer 2

    y_pred = np.argmax(logits, axis=1)
    y_pred_ho = np.zeros(X.shape, dtype=bool)
    sample = 0
    for entry in y_pred:
        y_pred_ho[sample][entry] = True
        sample += 1
    return y_pred_ho


n = 4
n_inputs = 2 ** n
n_hidden = 2 ** (n + 1)
n_classes = 2 ** n

samples = 1e4

x_train = misc.generate_random_bit_array(samples).reshape((-1, n))
x_train_ho = misc.bit_matrix2one_hot(x_train)
x_test_array = misc.generate_random_bit_array(samples * 0.3)
x_test = x_test_array.reshape((-1, n))
x_test_ho = misc.bit_matrix2one_hot(x_test)

# Initialize swarm
options = {'c1': 0.5, 'c2': 0.3, 'w':0.9}

hot_one_indices = []
for item in x_train_ho:
    count = 0
    for digit in item:
        if digit:
            hot_one_indices.append(count)
        count += 1

# Call instance of PSO
dimensions = (n_inputs * n_hidden) + (n_hidden * n_classes) + n_hidden + n_classes
optimizer = ps.single.GlobalBestPSO(n_particles=100, dimensions=dimensions, options=options)

# Perform optimization
cost, pos = optimizer.optimize(f, iters=80)

results = predict(x_test_ho, pos, n)
print("Accuracy: %.4f" % accuracy_score(x_test_ho, results))