import numpy as np
# 3単語 4次元ベクトル
X = np.array([
[1, 0, 1, 0],
[0, 2, 0, 2],
[1, 1, 1, 1]
])
W_q = np.random.rand(4, 2)
W_k = np.random.rand(4, 2)
W_v = np.random.rand(4, 2)
Q = X @ W_q
K = X @ W_k
V = X @ W_v
attention_scores = Q @ K.T
dk = Q.shape[-1]
attention_scores = attention_scores / np.sqrt(dk)
def softmax(x):
e_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
return e_x / e_x.sum(axis=-1, keepdims=True)
attention_weights = softmax(attention_scores)
output = attention_weights @ V
print("Input X:\n", X)
print("\nAttention Weights:\n", attention_weights)
print("\nOutput:\n", output)