/*
 * This software is governed by the CeCILL-B license under French law and
 * abiding by the rules of distribution of free software.  You can  use, 
 * modify and/ or redistribute the software under the terms of the CeCILL-B
 * license as circulated by CEA, CNRS and INRIA at the following URL
 * "http://www.cecill.info" or the LICENCE.txt file present in this project.
*/

#ifndef CUDA_CURRENT_DEVICE_HPP_
#define CUDA_CURRENT_DEVICE_HPP_

#include <cuda.h>
#include "memory_debug.hpp"

/**
 * @file cuda_current_device.hpp
 * @brief Some CUDA macro/func utilities related to the currently used GPU
 */

// TODO use Cuda_utils namespace:

/// @return true if the sizes are under the GPU limits
bool check_kernel_size(CUdevice device, const int3 block_size, const int3 grid_size);
bool check_kernel_size(CUdevice device, int block_size, int grid_size);

/// Current device
/// @return current active device (driver identifier)
CUdevice get_cu_device();

// -----------------------------------------------------------------------------

#ifndef NDEBUG
/// @def CUDA_CHECK_KERNEL_SIZE
/// @brief Check block and grid size according to current device assert(false)
/// if too large
#define CUDA_CHECK_KERNEL_SIZE(block, grid) do{                           \
    if(!check_kernel_size(get_cu_device(), (block), (grid))){             \
        fprintf(stderr,"CUDA error: wrong kernel size at %s, line %d\n",  \
        __FILE__, __LINE__);                                              \
        fprintf(stderr,"block size: %d grid_size: %d\n", block, grid);    \
        fflush(stderr);                                                   \
        Mem_debug::cuda_print_memory_trace();                             \
        assert(false);                                                    \
    }                                                                     \
} while(0)

#else

#define CUDA_CHECK_KERNEL_SIZE(block, grid) do{}while(0)

#endif

// -----------------------------------------------------------------------------

/** @def CUDA_LAUNCH_ARRAY
    @brief macro shortcut to launch a CUDA kernel on a linear array
    with error checking

    Usage:
    @code
    __global__
    void a_kernel(int* array, int size_array){
        int p = blockIdx.x * blockDim.x + threadIdx.x;
        if(p < size_array){
            // Do stuff with array
        }
    }

    void main(){
        int size_of_array = 100;
        int* an_array_of_ints = 0;
        // Allocate device ptr 'an_array_of_ints'
        CUDA_LAUNCH_ARRAY(a_kernel, 8, size_of_array, an_array_of_ints, size_of_array);
    }
    @endcode
*/
#define CUDA_LAUNCH_ARRAY(kernel_name, block_size, array_size, ...)          \
    do{                                                                      \
        const int bl = (block_size);                                         \
        const int gr = (((array_size) + (block_size) - 1) / (block_size));   \
        CUDA_CHECK_KERNEL_SIZE(bl, gr);                                      \
        kernel_name<<<gr, bl>>>(__VA_ARGS__);                                \
        CUDA_CHECK_ERRORS();                                                 \
    }while(0)

// -----------------------------------------------------------------------------

#endif // CUDA_CURRENT_DEVICE_HPP_
