3. MLP NKIPy Tutorial#
This tutorial uses a simple Multi-Layer Perceptron (MLP) NKIPy kernel to demonstrate how NKIPy works with more complex neural network operations.
import numpy as np
from nkipy.core.trace import NKIPyKernel
from nkipy.runtime.execute import simulate_traced_kernel, baremetal_run_traced_kernel
3.1. Defining A Simple MLP NKIPy Kernel#
A simple MLP consists of:
A linear transformation (matrix multiplication + bias)
An activation function (SiLU/Swish)
Another linear transformation
This is a basic two-layer feedforward network using SiLU activation, which is commonly used in modern neural networks.
def silu_kernel(x):
"""SiLU (Swish) activation function: x * sigmoid(x)."""
return x * (1 / (1 + np.exp(-x)))
def mlp_kernel(x, weight1, bias1, weight2, bias2):
"""Simple MLP with two linear layers and SiLU activation.
Args:
x: Input tensor [batch_size, input_dim]
weight1: First layer weight [input_dim, hidden_dim]
bias1: First layer bias [hidden_dim]
weight2: Second layer weight [hidden_dim, output_dim]
bias2: Second layer bias [output_dim]
Returns:
Output tensor [batch_size, output_dim]
"""
# First linear layer
hidden = np.matmul(x, weight1) + bias1
# SiLU (Swish) activation
hidden_activated = silu_kernel(hidden)
# Second linear layer
output = np.matmul(hidden_activated, weight2) + bias2
return output
3.2. Running the MLP Kernel as a NumPy function#
# Create test data
batch_size = 2
input_dim = 2048
hidden_dim = 8192
output_dim = 2048
# Input data
x = np.random.rand(batch_size, input_dim).astype(np.float32)
print(f"Input shape: {x.shape}")
# Network parameters
weight1 = np.random.rand(input_dim, hidden_dim).astype(np.float32) * 0.1
bias1 = np.random.rand(hidden_dim).astype(np.float32) * 0.1
weight2 = np.random.rand(hidden_dim, output_dim).astype(np.float32) * 0.1
bias2 = np.random.rand(output_dim).astype(np.float32) * 0.1
print(f"Weight1 shape: {weight1.shape}")
print(f"Bias1 shape: {bias1.shape}")
print(f"Weight2 shape: {weight2.shape}")
print(f"Bias2 shape: {bias2.shape}")
# Run as NumPy function
out_numpy = mlp_kernel(x, weight1, bias1, weight2, bias2)
print(f"\nNumPy output shape: {out_numpy.shape}")
print(f"NumPy output range: [{np.min(out_numpy):.4f}, {np.max(out_numpy):.4f}]")
Input shape: (2, 2048)
Weight1 shape: (2048, 8192)
Bias1 shape: (8192,)
Weight2 shape: (8192, 2048)
Bias2 shape: (2048,)
NumPy output shape: (2, 2048)
NumPy output range: [20500.1797, 21870.7598]
3.3. Tracing the MLP Kernel#
# Trace the kernel
traced_kernel = NKIPyKernel.trace(mlp_kernel)
3.4. Running the Traced Kernel with Simulation#
out_nkipy = simulate_traced_kernel(traced_kernel, x, weight1, bias1, weight2, bias2)
print(f"Simulated output shape: {out_nkipy.shape}")
print(f"Simulated output range: [{np.min(out_nkipy):.4f}, {np.max(out_nkipy):.4f}]")
print(f"Is the simulated output the same as NumPy? {np.allclose(out_nkipy, out_numpy)}")
Simulated output shape: (2, 2048)
Simulated output range: [20500.2129, 21870.7559]
Is the simulated output the same as NumPy? True
3.5. Running it On Trainium Hardware#
# Run on Trainium hardware
out_baremetal = baremetal_run_traced_kernel(traced_kernel, x, weight1, bias1, weight2, bias2)
print(f"Baremetal output shape: {out_baremetal.shape}")
print(f"Baremetal output range: [{np.min(out_baremetal):.4f}, {np.max(out_baremetal):.4f}]")
print(f"Is the baremetal output the same as NumPy? {np.allclose(out_baremetal, out_numpy)}")
Baremetal output shape: (2, 2048)
Baremetal output range: [20500.1797, 21870.7598]
Is the baremetal output the same as NumPy? True