3. MLP NKIPy Tutorial#

This tutorial uses a simple Multi-Layer Perceptron (MLP) NKIPy kernel to demonstrate how NKIPy works with more complex neural network operations.

import numpy as np

from nkipy.core.trace import NKIPyKernel
from nkipy.runtime.execute import simulate_traced_kernel, baremetal_run_traced_kernel

3.1. Defining A Simple MLP NKIPy Kernel#

A simple MLP consists of:

  1. A linear transformation (matrix multiplication + bias)

  2. An activation function (SiLU/Swish)

  3. Another linear transformation

This is a basic two-layer feedforward network using SiLU activation, which is commonly used in modern neural networks.

def silu_kernel(x):
    """SiLU (Swish) activation function: x * sigmoid(x)."""
    return x * (1 / (1 + np.exp(-x)))


def mlp_kernel(x, weight1, bias1, weight2, bias2):
    """Simple MLP with two linear layers and SiLU activation.
    
    Args:
        x: Input tensor [batch_size, input_dim]
        weight1: First layer weight [input_dim, hidden_dim]
        bias1: First layer bias [hidden_dim]
        weight2: Second layer weight [hidden_dim, output_dim]
        bias2: Second layer bias [output_dim]
    
    Returns:
        Output tensor [batch_size, output_dim]
    """
    # First linear layer
    hidden = np.matmul(x, weight1) + bias1
    
    # SiLU (Swish) activation
    hidden_activated = silu_kernel(hidden)
    
    # Second linear layer
    output = np.matmul(hidden_activated, weight2) + bias2
    
    return output

3.2. Running the MLP Kernel as a NumPy function#

# Create test data
batch_size = 2
input_dim = 2048
hidden_dim = 8192
output_dim = 2048

# Input data
x = np.random.rand(batch_size, input_dim).astype(np.float32)
print(f"Input shape: {x.shape}")

# Network parameters
weight1 = np.random.rand(input_dim, hidden_dim).astype(np.float32) * 0.1
bias1 = np.random.rand(hidden_dim).astype(np.float32) * 0.1
weight2 = np.random.rand(hidden_dim, output_dim).astype(np.float32) * 0.1
bias2 = np.random.rand(output_dim).astype(np.float32) * 0.1

print(f"Weight1 shape: {weight1.shape}")
print(f"Bias1 shape: {bias1.shape}")
print(f"Weight2 shape: {weight2.shape}")
print(f"Bias2 shape: {bias2.shape}")

# Run as NumPy function
out_numpy = mlp_kernel(x, weight1, bias1, weight2, bias2)
print(f"\nNumPy output shape: {out_numpy.shape}")
print(f"NumPy output range: [{np.min(out_numpy):.4f}, {np.max(out_numpy):.4f}]")
Input shape: (2, 2048)
Weight1 shape: (2048, 8192)
Bias1 shape: (8192,)
Weight2 shape: (8192, 2048)
Bias2 shape: (2048,)

NumPy output shape: (2, 2048)
NumPy output range: [20500.1797, 21870.7598]

3.3. Tracing the MLP Kernel#

# Trace the kernel
traced_kernel = NKIPyKernel.trace(mlp_kernel)

3.4. Running the Traced Kernel with Simulation#

out_nkipy = simulate_traced_kernel(traced_kernel, x, weight1, bias1, weight2, bias2)
print(f"Simulated output shape: {out_nkipy.shape}")
print(f"Simulated output range: [{np.min(out_nkipy):.4f}, {np.max(out_nkipy):.4f}]")
print(f"Is the simulated output the same as NumPy? {np.allclose(out_nkipy, out_numpy)}")
Simulated output shape: (2, 2048)
Simulated output range: [20500.2129, 21870.7559]
Is the simulated output the same as NumPy? True

3.5. Running it On Trainium Hardware#

# Run on Trainium hardware
out_baremetal = baremetal_run_traced_kernel(traced_kernel, x, weight1, bias1, weight2, bias2)
print(f"Baremetal output shape: {out_baremetal.shape}")
print(f"Baremetal output range: [{np.min(out_baremetal):.4f}, {np.max(out_baremetal):.4f}]")
print(f"Is the baremetal output the same as NumPy? {np.allclose(out_baremetal, out_numpy)}")
Baremetal output shape: (2, 2048)
Baremetal output range: [20500.1797, 21870.7598]
Is the baremetal output the same as NumPy? True