/*
 * This software is governed by the CeCILL-B license under French law and
 * abiding by the rules of distribution of free software.  You can  use, 
 * modify and/ or redistribute the software under the terms of the CeCILL-B
 * license as circulated by CEA, CNRS and INRIA at the following URL
 * "http://www.cecill.info" or the LICENCE.txt file present in this project.
*/

#ifndef MEMORY_DEBUG_HPP__
#define MEMORY_DEBUG_HPP__

#include <stdio.h>

// Define only if you want to debug device memory
//#define TRACE_MEMORY

#define DEFAULT_STACK_SIZE 32
#define MAX_NAME_LEN 64

/** @brief Memory toolkit designe to keep CUDA memory traces

    usage:
    @code
        // In all headers using cuda malloc/free like functions define this:
        #define TRACE_MEMORY
        #include memory_debug.hpp

        // Wherever you want to check the device memory state:
        void fun() {

            cuda_print_memory_trace();
        }
    @endcode

    When TRACE_MEMORY is define every call to cudaMallocXXX() and cudaFreeXXX()
    is replaced with a similar function which perform the call and register
    it. This toolkit give means to register those calls wherever they are
    with static methods.

    A call to cudaMallocXXX() is registered by pushing it into a global stack
    and call to cudaFreeXXX() pops it. The stack can be printed whenever
    needed with Memory_stack::print().

    N.B: If TRACE_MEMORY is defined convenient macro CUDA_SAFE_CALL(x) defined
    in cuda_assert.hpp prints the memory stack when a CUDA error occurs.

    @see cuda_utils.hpp CUDA_SAFE_CALL(x)
*/
struct Memory_stack{

    typedef enum{
        LINEAR_MEMORY,
        CUDA_ARRAY
    } Mem_kind;

    static void push(const void* address,
                     size_t size,
                     const char* name,
                     Mem_kind type);

    static void pop(const void* address);

    static void print();

private:
    struct Mem_s{
        inline Mem_s() {}
        inline Mem_s(const void* a,
                     size_t s,
                     const char* n,
                     Mem_kind k):
            address(a),
            size(s),
            kind(k)
        {
            for(int i = 0; i < MAX_NAME_LEN-1; i++){
                name[i] = n[i];
                if(n[i] == '\0')
                    break;
            }
            name[MAX_NAME_LEN-1] = '\0';
        }

        const void* address;
        size_t size;
        Mem_kind kind;
        char name[MAX_NAME_LEN];

    };

    static Mem_s* entries;
    static int stack_size;
    static int n;

    static void realloc();
};

// =============================================================================
namespace Mem_debug {
// =============================================================================

/// Print the total, free and used device memory usage.
inline void cuda_print_rusage();

/// Print the device memory stack
inline void cuda_print_memory_trace();

} // END NAMESPACE Mem_debug ===================================================

#include "memory_debug.inl"

#endif // MEMORY_DEBUG_HPP__
