Commit adda0de1 authored by Robin Kobus's avatar Robin Kobus
Browse files

added L08 exercise

parent 8ad5a41a
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"%%openmp L08_dynamic_schedule\n",
"#include <iostream> // std::cout\n",
"#include <cstdint> // uint64_t\n",
"#include <vector> // std::vector\n",
"#include <thread>\n",
"#include <mutex>\n",
"\n",
"#include \"hpc_helpers/include/hpc_helpers.h\"\n",
"#include \"hpc_helpers/include/timers.cuh\"\n",
"#include \"hpc_helpers/include/mnist_IO.hpp\"\n",
"#include \"hpc_helpers/include/check_results.h\"\n",
"\n",
"using index_t = uint32_t;\n",
"using data_t = int32_t;\n",
"\n",
"std::mutex mutex;\n",
"\n",
"std::vector<data_t> static_all_pairs(\n",
" const std::vector<unsigned char>& mnist,\n",
" index_t num_images,\n",
" index_t num_pixels,\n",
" index_t num_threads=8,\n",
" index_t chunk_size=16)\n",
"{\n",
" std::vector<data_t> all_pairs(num_images*num_images);\n",
"\n",
" auto block_cyclic = [&] (const index_t& id) -> void {\n",
"\n",
" // cycle through blocks\n",
" for (index_t lower = id * chunk_size;\n",
" lower < num_images;\n",
" lower += num_threads * chunk_size)\n",
" {\n",
" // compute the upper border of the block (exclusive)\n",
" const index_t upper = std::min(lower+chunk_size,num_images);\n",
"\n",
" // for all entries below the diagonal (i'=I)\n",
" for (index_t i = lower; i < upper; i++) {\n",
" for (index_t I = 0; I <= i; I++) {\n",
"\n",
" // compute squared Euclidean distance\n",
" data_t accum = data_t(0);\n",
" for (index_t j = 0; j < num_pixels; j++) {\n",
" data_t residue = data_t(mnist[i*num_pixels+j])\n",
" - data_t(mnist[I*num_pixels+j]);\n",
" accum += residue * residue;\n",
" }\n",
"\n",
" // write Delta[i,i'] = Delta[i',i]\n",
" all_pairs[i*num_images+I] =\n",
" all_pairs[I*num_images+i] = accum;\n",
" }\n",
" }\n",
" }\n",
" };\n",
"\n",
" // business as usual\n",
" std::vector<std::thread> threads;\n",
"\n",
" for (index_t id = 0; id < num_threads; id++)\n",
" threads.emplace_back(block_cyclic, id);\n",
"\n",
" for (auto& thread : threads)\n",
" thread.join();\n",
"\n",
" return all_pairs;\n",
"}\n",
"\n",
"///////////////////////////////////////////////////////////////////////////////\n",
"// STUDENTS PART (fill in the gaps)\n",
"///////////////////////////////////////////////////////////////////////////////\n",
"\n",
"std::vector<data_t> dynamic_all_pairs(\n",
" const std::vector<unsigned char>& mnist,\n",
" index_t num_images,\n",
" index_t num_pixels,\n",
" index_t num_threads=8,\n",
" index_t chunk_size=16)\n",
"{\n",
" std::vector<data_t> all_pairs(num_images*num_images);\n",
"\n",
" // declare mutex and current lower index\n",
" index_t global_lower = 0;\n",
"\n",
" auto dynamic_block_cyclic = [&] () -> void {\n",
"\n",
" // assume we have not done anything\n",
" index_t lower = 0;\n",
"\n",
" // while there are still num_images to compute\n",
" while (lower < num_images) {\n",
"\n",
" // TODO: update lower with global lower using a mutex\n",
" {\n",
"\n",
" }\n",
"\n",
" // compute the upper border of the block (exclusive)\n",
" const index_t upper = std::min(lower+chunk_size,num_images);\n",
"\n",
" // for all entries below the diagonal (i'=I)\n",
" for (index_t i = lower; i < upper; i++) {\n",
" for (index_t I = 0; I <= i; I++) {\n",
"\n",
" // compute squared Euclidean distance\n",
" data_t accum = data_t(0);\n",
" for (index_t j = 0; j < num_pixels; j++) {\n",
" data_t residue = data_t(mnist[i*num_pixels+j])\n",
" - data_t(mnist[I*num_pixels+j]);\n",
" accum += residue * residue;\n",
" }\n",
"\n",
" // write Delta[i,i'] = Delta[i',i]\n",
" all_pairs[i*num_images+I] =\n",
" all_pairs[I*num_images+i] = accum;\n",
" }\n",
" }\n",
" }\n",
" };\n",
"\n",
" // business as usual\n",
" std::vector<std::thread> threads;\n",
"\n",
" for (index_t id = 0; id < num_threads; id++)\n",
" threads.emplace_back(dynamic_block_cyclic);\n",
"\n",
" for (auto& thread : threads)\n",
" thread.join();\n",
"\n",
" return all_pairs;\n",
"}\n",
"\n",
"///////////////////////////////////////////////////////////////////////////////\n",
"\n",
"int main() {\n",
" const index_t num_images = 4096;\n",
" const index_t num_pixels = 28*28;\n",
"\n",
" helpers::CpuTimer load_data_from_disk_timer(\"load data from disk\");\n",
" load_data_from_disk_timer.start();\n",
" auto mnist = helpers::load_mnist_images(\"/data/hpc/t10k-images-idx3-ubyte\");\n",
" load_data_from_disk_timer.print();\n",
"\n",
" helpers::CpuTimer static_all_pairs_timer(\"compute distances static\");\n",
" static_all_pairs_timer.start();\n",
" auto truth = static_all_pairs(mnist, num_images, num_pixels);\n",
" static_all_pairs_timer.print();\n",
"\n",
" helpers::CpuTimer dynamic_all_pairs_tiemr(\"compute distances dynamic\");\n",
" dynamic_all_pairs_tiemr.start();\n",
" auto test = dynamic_all_pairs(mnist, num_images, num_pixels);\n",
" dynamic_all_pairs_tiemr.print();\n",
"\n",
" helpers::CpuTimer check_results_timer(\"check results\");\n",
" check_results_timer.start();\n",
" bool no_errors = helpers::check_results(test, truth);\n",
" check_results_timer.print();\n",
"\n",
" if(no_errors)\n",
" std::cout << \"Parallel programming is fun!\" << std::endl;\n",
"}\n",
""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment