143 lines
6.0 KiB
C++
143 lines
6.0 KiB
C++
// Boost.uBLAS
|
|
//
|
|
// Copyright (c) 2018 Fady Essam
|
|
// Copyright (c) 2018 Stefan Seefeld
|
|
//
|
|
// Distributed under the Boost Software License, Version 1.0.
|
|
// (See accompanying file LICENSE_1_0.txt or
|
|
// copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
#ifndef boost_numeric_ublas_opencl_transpose_hpp_
|
|
#define boost_numeric_ublas_opencl_transpose_hpp_
|
|
|
|
#include <boost/numeric/ublas/opencl/library.hpp>
|
|
#include <boost/numeric/ublas/opencl/vector.hpp>
|
|
#include <boost/numeric/ublas/opencl/matrix.hpp>
|
|
|
|
// Kernel for transposition of various data types
|
|
#define OPENCL_TRANSPOSITION_KERNEL(DATA_TYPE) \
|
|
"__kernel void transpose(__global " #DATA_TYPE "* in, __global " #DATA_TYPE "* result, unsigned int width, unsigned int height) \n" \
|
|
"{ \n" \
|
|
" unsigned int column_index = get_global_id(0); \n" \
|
|
" unsigned int row_index = get_global_id(1); \n" \
|
|
" if (column_index < width && row_index < height) \n" \
|
|
" { \n" \
|
|
" unsigned int index_in = column_index + width * row_index; \n" \
|
|
" unsigned int index_result = row_index + height * column_index; \n" \
|
|
" result[index_result] = in[index_in]; \n" \
|
|
" } \n" \
|
|
"} \n"
|
|
|
|
|
|
namespace boost { namespace numeric { namespace ublas { namespace opencl {
|
|
|
|
template<class T, class L1, class L2>
|
|
typename std::enable_if<is_numeric<T>::value>::type
|
|
change_layout(ublas::matrix<T, L1, opencl::storage> const &m,
|
|
ublas::matrix<T, L2, opencl::storage> &result,
|
|
compute::command_queue& queue)
|
|
{
|
|
assert(m.size1() == result.size1() && m.size2() == result.size2());
|
|
assert(m.device() == result.device() && m.device() == queue.get_device());
|
|
assert(!(std::is_same<L1, L2>::value));
|
|
char const *kernel;
|
|
if (std::is_same<T, float>::value)
|
|
kernel = OPENCL_TRANSPOSITION_KERNEL(float);
|
|
else if (std::is_same<T, double>::value)
|
|
kernel = OPENCL_TRANSPOSITION_KERNEL(double);
|
|
else if (std::is_same<T, std::complex<float>>::value)
|
|
kernel = OPENCL_TRANSPOSITION_KERNEL(float2);
|
|
else if (std::is_same<T, std::complex<double>>::value)
|
|
kernel = OPENCL_TRANSPOSITION_KERNEL(double2);
|
|
size_t len = strlen(kernel);
|
|
cl_int err;
|
|
cl_context c_context = queue.get_context().get();
|
|
cl_program program = clCreateProgramWithSource(c_context, 1, &kernel, &len, &err);
|
|
clBuildProgram(program, 1, &queue.get_device().get(), NULL, NULL, NULL);
|
|
cl_kernel c_kernel = clCreateKernel(program, "transpose", &err);
|
|
size_t width = std::is_same < L1, ublas::basic_row_major<>>::value ? m.size2() : m.size1();
|
|
size_t height = std::is_same < L1, ublas::basic_row_major<>>::value ? m.size1() : m.size2();
|
|
size_t global_size[2] = { width , height };
|
|
clSetKernelArg(c_kernel, 0, sizeof(T*), &m.begin().get_buffer().get());
|
|
clSetKernelArg(c_kernel, 1, sizeof(T*), &result.begin().get_buffer().get());
|
|
clSetKernelArg(c_kernel, 2, sizeof(unsigned int), &width);
|
|
clSetKernelArg(c_kernel, 3, sizeof(unsigned int), &height);
|
|
cl_command_queue c_queue = queue.get();
|
|
cl_event event = NULL;
|
|
clEnqueueNDRangeKernel(c_queue, c_kernel, 2, NULL, global_size, NULL, 0, NULL, &event);
|
|
clWaitForEvents(1, &event);
|
|
}
|
|
|
|
template<class T, class L1, class L2, class A>
|
|
typename std::enable_if<is_numeric<T>::value>::type
|
|
change_layout(ublas::matrix<T, L1, A> const &m,
|
|
ublas::matrix<T, L2, A> &result,
|
|
compute::command_queue& queue)
|
|
{
|
|
ublas::matrix<T, L1, opencl::storage> mdev(m, queue);
|
|
ublas::matrix<T, L2, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context());
|
|
change_layout(mdev, rdev, queue);
|
|
rdev.to_host(result, queue);
|
|
}
|
|
|
|
template<class T, class L>
|
|
typename std::enable_if<is_numeric<T>::value>::type
|
|
trans(ublas::matrix<T, L, opencl::storage> const &m,
|
|
ublas::matrix<T, L, opencl::storage> &result,
|
|
compute::command_queue& queue)
|
|
{
|
|
assert(m.size1() == result.size2() && m.size2() == result.size1());
|
|
assert(m.device() == result.device() && m.device() == queue.get_device());
|
|
char const *kernel;
|
|
if (std::is_same<T, float>::value)
|
|
kernel = OPENCL_TRANSPOSITION_KERNEL(float);
|
|
else if (std::is_same<T, double>::value)
|
|
kernel = OPENCL_TRANSPOSITION_KERNEL(double);
|
|
else if (std::is_same<T, std::complex<float>>::value)
|
|
kernel = OPENCL_TRANSPOSITION_KERNEL(float2);
|
|
else if (std::is_same<T, std::complex<double>>::value)
|
|
kernel = OPENCL_TRANSPOSITION_KERNEL(double2);
|
|
size_t len = strlen(kernel);
|
|
cl_int err;
|
|
cl_context c_context = queue.get_context().get();
|
|
cl_program program = clCreateProgramWithSource(c_context, 1, &kernel, &len, &err);
|
|
clBuildProgram(program, 1, &queue.get_device().get(), NULL, NULL, NULL);
|
|
cl_kernel c_kernel = clCreateKernel(program, "transpose", &err);
|
|
size_t width = std::is_same <L, ublas::basic_row_major<>>::value ? m.size2() : m.size1();
|
|
size_t height = std::is_same <L, ublas::basic_row_major<>>::value ? m.size1() : m.size2();
|
|
size_t global_size[2] = { width , height };
|
|
clSetKernelArg(c_kernel, 0, sizeof(T*), &m.begin().get_buffer().get());
|
|
clSetKernelArg(c_kernel, 1, sizeof(T*), &result.begin().get_buffer().get());
|
|
clSetKernelArg(c_kernel, 2, sizeof(unsigned int), &width);
|
|
clSetKernelArg(c_kernel, 3, sizeof(unsigned int), &height);
|
|
cl_command_queue c_queue = queue.get();
|
|
cl_event event = NULL;
|
|
clEnqueueNDRangeKernel(c_queue, c_kernel, 2, NULL, global_size, NULL, 0, NULL, &event);
|
|
clWaitForEvents(1, &event);
|
|
}
|
|
|
|
template<class T, class L, class A>
|
|
typename std::enable_if<is_numeric<T>::value>::type
|
|
trans(ublas::matrix<T, L, A> const &m,
|
|
ublas::matrix<T, L, A> &result,
|
|
compute::command_queue& queue)
|
|
{
|
|
ublas::matrix<T, L, opencl::storage> mdev(m, queue);
|
|
ublas::matrix<T, L, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context());
|
|
trans(mdev, rdev, queue);
|
|
rdev.to_host(result, queue);
|
|
}
|
|
|
|
template<class T, class L, class A>
|
|
typename std::enable_if<is_numeric<T>::value, ublas::matrix<T, L, A>>::type
|
|
trans(ublas::matrix<T, L, A>& m, compute::command_queue& queue)
|
|
{
|
|
ublas::matrix<T, L, A> result(m.size2(), m.size1());
|
|
trans(m, result, queue);
|
|
return result;
|
|
}
|
|
|
|
}}}}
|
|
|
|
#endif
|