Skip to content
Snippets Groups Projects
Commit 2224c9d4 authored by Pierre Kunzli's avatar Pierre Kunzli
Browse files

ajout correction premiere serie exo

parent ddf055ad
No related branches found
No related tags found
No related merge requests found
#include <iostream>
#include <vector>
#include <cassert>
#include <algorithm>
// CUDA kernel pour l'addition de vecteurs
__global__ void vectorAdd(const int* a, const int* b, int* c, int N) {
// Thread ID globale
int tid = (blockIdx.x*blockDim.x) + threadIdx.x;
// Certains ne doivent rien faire !
if (tid < N) c[tid] = a[tid] + b[tid];
}
int main() {
// Taille des vecteurs à additionner (puissance de 2)
const int N = 1 << 28;
const size_t n_bytes = sizeof(int)*N;
std::cout << "Number of integers: " << N << std::endl;
std::cout << "Vector size: " << (double) n_bytes/1000000000.0 << " [GB]" << std::endl;
// les trois vecteurs: C = A + B, la raison d'un
int* h_a = (int*) malloc(n_bytes);
int* h_b = (int*) malloc(n_bytes);
int* h_c = (int*) malloc(n_bytes);
// Du random: 0 à 99
for (int i = 0; i < N; i++) {
h_a[i] = rand() % 100;
h_b[i] = rand() % 100;
}
// allocation sur le device
int* d_a;
int* d_b;
int* d_c;
cudaMalloc(&d_a, n_bytes);
cudaMalloc(&d_b, n_bytes);
cudaMalloc(&d_c, n_bytes);
// Host -> Device
cudaMemcpy(d_a, h_a, n_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, h_b, n_bytes, cudaMemcpyHostToDevice);
// Threads per block
int NUM_THREADS = 1024;
// Block per Grid: il faut au moins autant de threads que d'éléments.
// Mais on a que 1024 threads par block, donc on n'ajoute un block
// en plus si nécessaire (i.e. padding)
int NUM_BLOCKS = (N + NUM_THREADS - 1) / NUM_THREADS;
// On lance le Kernel: on note que c'est asynchrone
vectorAdd<<<NUM_BLOCKS, NUM_THREADS>>>(d_a, d_b, d_c, N);
// Device -> Host: on rammène C et on note que cudaMemcpy est synchrone
cudaMemcpy(h_c, d_c, n_bytes, cudaMemcpyDeviceToHost);
// Tout est ok ?
for (int i = 0; i < N; i++) {
assert(h_c[i] == h_a[i] + h_b[i]);
}
// On libère la mémoire
free(h_a);
free(h_b);
free(h_c);
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
std::cout << "Addition de vecteurs: OK" << std::endl;
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment