Quantum Mechanics
Thermal Noise
Key proceed mouse move
Peseudo-Random Number Generator
counter 0, 1, 2 -> Encrypt
Pool of Randomness seed-> key
Storing a file securely
Electronic Codebook Mode
ソフトウェアエンジニアの技術ブログ:Software engineer tech blog
随机应变 ABCD: Always Be Coding and … : хороший
Quantum Mechanics
Thermal Noise
Key proceed mouse move
Peseudo-Random Number Generator
counter 0, 1, 2 -> Encrypt
Pool of Randomness seed-> key
Storing a file securely
Electronic Codebook Mode
def generate_sequence(f, n): return map(f, range(n)) def generate_fake_random(n): repetition = 0 previous = None res = [] for i in range(n): x = random.choice([0, 1]) if x == previous: repetition += 1 if repetition > 2: x = (x + 1) % 2 repetition = 1 previous = x else: previous = x repetition = 1 res.append(x) return res length = 88 print display_bits(generate_sequence(lambda n: 0 if n % 3 == 0 else 1, length))
s i random if K(s) = |s| + c
BITS = ('0', '1')
ASCII_BITS = 7
def display_bits(b):
"""converts list of {0, 1}* to string"""
return ''.join([BITS[e] for e in b])
def seq_to_bits(seq):
return [0 if b == '0' else 1 for b in seq]
def pad_bits(bits, pad):
"""pads seq with leading 0s up to length pad"""
assert len(bits) <= pad
return [0] * (pad - len(bits)) + bits
def convert_to_bits(n):
"""converts an integer 'n' to bit array"""
result = []
if n == 0:
return [0]
while n > 0:
result = [(n % 2)] + result
n = n / 2
return result
def string_to_bits(s):
def chr_to_bit(c):
return pad_bits(convert_to_bits(ord(c)), ASCII_BITS)
return [b for group in
map(chr_to_bit, s)
for b in group]
def bits_to_char(b):
assert len(b) == ASCII_BITS
value = 0
for e in b:
value = (value * 2) + e
return chr(value)
def list_to_string(p):
return ''.join(p)
def bits_to_string(b):
return ''.join([bits_to_char(b[i:i + ASCII_BITS])
for i in range(0, len(b), ASCII_BITS)])
Machine generates key sequence based on its configuration
key would not repeat for 10^19 letters
Event: subset of outcomes from a probability space
Head = { H }
Valid = { H, T }
P(A) = ΣweA P(w)
1 – P(E) = P(Valid)
Complementary Event Prbability
VA: P(a) + p(A) = 1
Conditional probability
P(B|A) = P(Aub)/ p(A)
Perfect Cipher
the ciphertext provides an attacker with no additional information about the plaintext.
m -> enc -> c
m e M
m* e M
Eki(mi) = c
P(m=m*).P(k=k*) = 1/k
P(Ek(m)= c) = 1/|k|
P(m=m* Ek(m)=c) = P(m = m*)/|k|
Malleable
alice Ek(m)=c active attacker
Impratical |k| = |m|
Shamon’s Theorem: if a cipher is perfect, it must be impractical
Perfect cipher: P(m=m*|Ek(m)=c) = P(m=m*)
Ek(m) = c0 c1 … Cn-1
m = ‘CS’ = 1010011 1000011
k = 11001000100110
c = 01101111100101
Proving Security
here is a mathematical proof, accepted by experts, that shows the cipher is secure.
Probability(Review)
Ω:set of all possible outcomes
probability space
Ω = {h, t}
uniform distribution each outcome is equally likely
p: outcome -> [0, 1]
p(h) = 1/2 p(t) = 1/2
Ω = {H, T, E}
p(h) = 0.49999
p(t) = 0.49999
ΣweΩ p(w) = 1
plaintext -> encryption -> ciphertext -> insecure channel -> decryptron
VmeM:D(E(m))=m
Kerck hoff’s Principle CIS
meM ceC keK
E:MxK -> c D:c*k -> M
correctness property: Vm;k: Dk(Ek(m))
Security property:
Ideal: ciphertext reveals nothing about key or message
def convert_to_bits(n, pad): result = [] while n > 0: if n % 2 == 0: result = [0] + result else: result = [1] + result n = n / 2 while len(result) < pad: result = [0] + result return result
cryptography
crypto -> secret
graphy -> writing
cryptology
symmetric cryptography and applications
asymmetric cryptography and applications
m -> enc(<-k) -> ciphertext
timing side-channel
__global__ void Hello(){
printf("Hello");
}
void main(){
Hello<<<1,1>>>();
cudaDeviceSynchronize();
printf("World");
}
$define CUB_STDERR
#include <stdio.h>
#include <iostream>
#include <cub/cub.cuh>
using namespace cub;
bool g_verbose = false;
int g_iterations = 100;
// ---------
// Kernels
// ---------
template <
int BLOCK_THREADS,
int ITEMS_PER_THREAD>
__global__ void BlockPrefixSumKernel(
int *d_in,
int *d_out,
clock_t *d_elapsed)
{
typedef BlockScan<int, BLOCK_THREADS > BlockScanT;
__shared__ typename BlockScanT::SmemStorage smem_storage;
int data[ITEMS_PER_THREAD];
BlockLoadVectorized(d_in, data);
clock_t start = clock();
int aggregate;
BlockScanT::ExclusiveSum(smem_storage, data, data, aggregate);
clock_t stop = clock();
BlockStoreVectorized(d_out, data);
if(threadIdx.x == 0)
{
*d_elapsed = (start > stop)? start - stop : stop - start;
d_out[BLOCK_THREADS * ITEMS_PER_THREAD] = aggregate;
}
}
int Initialize(
int *h_in,
int *h_reference,
int num_elements)
{
int inclusive = 0;
for (int i = 0; i< num_elements; ++i)
{
h_in[i] = i % 17;
h_reference[i] = inclusive;
inclusive += h_in[i];
}
return inclusive;
}
template <
int BLOCK_THREADS,
int ITEMS_PER_THREAD>
void Test()
{
const int TILE_SIZE = BLOCJ_THREAD * ITEMS_PER_THREAD;
int *h_in = new int[TILE_SIZE];
int *h_reference = new int[TILE_SIZE];
int *h_gpu = new int[TILE_SIZE + 1];
int h_aggregate = Initialize(h_in, h_reference, TILE_SIZE);
int *d_in = NULL;
int *d_out = NULL;
clock_t *d_elapsed = NULL;
cudaMalloc((void**)&d_in, sizeof(int)* TILE_SIZE);
cudaMalloc((void**)&d_out, sizeof(int) * (TILE_SIZE + 1));
cudaMalloc((void**)&d_elapsed, sizeof(clock_t));
if (g_verbose)
{
printf("Input data: ");
for (int i = 0; i < TILE_SIZE; i++)
printf("%d, ", h_in[i]);
printf("\n\n");
}
cudaMemcpy(d_in, h_in, sizeof(int) * TILE_SIZE, cudaMemcpyHostToDevice);
printf("BlockScan %d items (%d threads, %d items per thread):",
TILE_SIZE, BLOCK_THREADS, ITEMS_PER_THREAD);
clock_t elapsed_scan_clocks = 0;
for (int i = 0; i < g_iterations; ++i)
{
BlockPrefixSumKernel<BLOCK_THREADS, ITEMS_PER_THREAD><<<1, BLOCK_THREADS>>>(
d_in,
d_otu,
d_elapsed);
clock_t scan_clocks;
cudaMemcpy(h_gpu, d_out, sizeof(int) * (TILE_SIZE + 1), cudaMemcpyDeviceToHost);
cudaMemcpy(&scan_clocks, d_elapsed, sizeof(clock_t), cudaMemcpyDeviceToHost);
elapsed_scan_clocks += scan_clocks;
}
bool correct = true;
for (int i = 0; i < TILE_SIZE; i++)
{
if (h_gpu[i] != h_reference[i])
{
printf("Incorrect result @ offset %d (%d != %d)\n",
i, h_gpu[i], h_reference[i]);
correct = false;
break;
}
}
if (h_gpu[TILE_SIZE] != h_aggregate)
{
printf("Incorrect aggregate (%d != %d)\n", h_gpu[TILE_SIZE], h_aggregate);
correct = false;
}
if (correct) printf("Correct!\n");
if (g_verbose)
{
printf("GPu output(reference output): ");
for (int i = 0; i < TILE_SIZE; i++)
printf("%d (%d), ", h_gpu[i], h_reference[i]);
printf("\n");
printf("GPU aggregate (reference aggregate)", h_gpu[TILE_SIZE], h_aggregate);
printf("\n\n");
}
printf("Average clock per 32-bit int scanned: %.3f\n\n", float(elapsed_scan_clocks) / TILE_SIZE / g_iterations);
if (h_in) delete[] h_in;
if (h_reference) delete[] h_reference;
if (h_gpu) delete[] h_gpu;
if (d_in) cudaFree(d_in);
if (d_out) cudaFree(d_out);
if (d_elapsed) cudaFree(d_elapsed);
}
int main(int argc, char** argv)
{
cudaDeviceProp props;
cudaGetDeviceProperties(&props, 0);
printf("Using device %s\n", props.name);
Test<1024, 1>();
Test<512, 2>();
Test<256, 4>();
Test<128, 8>();
Test<64, 16>();
Test<32, 32>();
Test<16, 64>();
return 0;
}