/*
 * This software is governed by the CeCILL-B license under French law and
 * abiding by the rules of distribution of free software.  You can  use, 
 * modify and/ or redistribute the software under the terms of the CeCILL-B
 * license as circulated by CEA, CNRS and INRIA at the following URL
 * "http://www.cecill.info" or the LICENCE.txt file present in this project.
*/

#ifdef NDEBUG

namespace Cuda_utils {
void unit_tests(){ }
}
#else

#include <iostream>
#include <vector>

#include "transfo.hpp"
#include "vec3_cu.hpp"
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>

#include "cuda_utils/cuda_utils.hpp"

// =============================================================================
namespace Cuda_utils {
// =============================================================================

using namespace Device;

// -----------------------------------------------------------------------------

template<typename Real>
void rand_init_reals(Cuda_utils::Device::Array<Real>& a)
{
    Cuda_utils::Host::Array<Real> tmp( a.size() );
    for(int i = 0; i < a.size(); ++i) {
        tmp[i] = (Real)rand() * (1000.f/RAND_MAX) + 1;
    }
    a.copy_from( tmp );
}

// -----------------------------------------------------------------------------

void rand_init_bools(Cuda_utils::Device::Array<bool>& a)
{
    Cuda_utils::Host::Array<bool> tmp( a.size() );
    for(int i = 0; i < a.size(); ++i) {
        tmp[i] = rand() > (RAND_MAX / 2);
    }
    a.copy_from( tmp );
}

// -----------------------------------------------------------------------------

template <typename T>
void rand_init_vecs(Cuda_utils::Device::Array<T>& a)
{
    Cuda_utils::Host::Array<T> tmp( a.size() );
    for(int i = 0; i < a.size(); ++i) {
        tmp[i] = T::random(1.f);
    }
    a.copy_from( tmp );
}

// -----------------------------------------------------------------------------

void noise_tolerant_error(int acc, float mean_error, const std::string& test_name)
{
    if( acc > 0)
    {
        if( (mean_error / (float)acc) < 0.00001f)
        {
            std::cout << test_name+" CONSIDERED SUCCESSFUL" << std::endl;
            std::cout << "However: " << acc;
            std::cout << " discrepancy were detected. ";
            std::cout << "\nThe mean deviation between CPU/GPU equals: ";
            std::cout <<  mean_error / (float)acc << std::endl;
        }
        else
        {
            std::cout << test_name+" FAILED" << std::endl;
            std::cout << "The mean deviation between CPU/GPU equals: ";
            std::cout <<  mean_error / (float)acc << std::endl;
            assert(false);
        }
    }else
        std::cout << test_name+" SUCCEED" << std::endl;
}

// -----------------------------------------------------------------------------

struct MyIncr {
    typedef int value_type;
    IF_CUDA_DEVICE_HOST
    static value_type map(int val) { return val+1; }
};

// -----------------------------------------------------------------------------

void unit_tests()
{
    const int size = 1000;
    Device::Array<float>   scale1(size);
    Device::Array<float>   scale2(size);
    Device::Array<float>   scale3(size);
    Device::Array<int>     scale4(size);
    Device::Array<int>     scale5(size);
    Device::Array<bool>    bool1 (size);
    Device::Array<Vec3_cu> vec3_1(size);
    Device::Array<Vec3_cu> vec3_2(size);
    Device::Array<Vec2_cu> vec2_1(size);
    Device::Array<Vec2_cu> vec2_2(size);
    Device::Array<int>     d_resi(size);
    Device::Array<float>   d_resf(size);
    rand_init_reals( scale1 );
    rand_init_reals( scale2 );
    rand_init_reals( scale3 );
    rand_init_reals( scale4 );
    rand_init_reals( scale5 );
    rand_init_bools( bool1  );
    rand_init_vecs ( vec3_1 );
    rand_init_vecs ( vec3_2 );
    rand_init_vecs ( vec2_1 );
    rand_init_vecs ( vec2_2 );

    ////////
    //TEST 1: floats | bin_ops -> '*'  '/' | scale_op '3.f'
    d_resf = scale3 * (scale2 * 3.f / scale1);
    for(int i = 0; i < size; ++i) {
        float res1 = scale3.fetch(i) * (scale2.fetch(i) * 3.f / scale1.fetch(i));
        float res2 = d_resf.fetch(i);
        if( res2 != res1 ){
            std::cerr << "TEST FAILED: expr=" << res2 << "cpu=" << res1 << std::endl;
            assert(false);
        }
    }
    std::cout << "TEST1 SUCCEED" << std::endl;


    ////////
    //TEST 2: int | bin_op '-' '%' '+' | scale_op '2'
    d_resi = (scale4 - scale5 + scale5 * 2) % scale4;
    for(int i = 0; i < size; ++i) {
        int res1 = (scale4.fetch(i) - scale5.fetch(i) + scale5.fetch(i) *2) % scale4.fetch(i);
        int res2 = d_resi.fetch(i);

        if( res2 != res1 ) {
            std::cerr << "TEST FAILED: expr=" << res2 << "cpu=" << res1 << std::endl;
            assert(false);
        }
    }
    std::cout << "TEST2 SUCCEED" << std::endl;

    ////////
    //TEST 3
    d_resf = cast_to<float>::expr( (scale4 + 1 - scale5 - 2) % scale4 );

    for(int i = 0; i < size; ++i) {
        float res1 = float((scale4.fetch(i) + 1 - scale5.fetch(i) - 2) % scale4.fetch(i));
        float res2 = float(d_resf.fetch(i));

        if( res2 != res1 ) {
            std::cerr << "TEST FAILED: expr=" << res2 << "cpu=" << res1 << std::endl;
            assert(false);
        }
    }
    std::cout << "TEST3 SUCCEED" << std::endl;

    ////////
    //TEST 4
    d_resi = map<MyIncr>::to( (scale4 - incr_expr(scale5*2)) % scale4 );

    for(int i = 0; i < size; ++i) {
        int res1 = 1 + ((scale4.fetch(i) - (1+(scale5.fetch(i)*2))) % scale4.fetch(i));
        int res2 = d_resi.fetch(i);

        if( res2 != res1 ) {
            std::cerr << "TEST FAILED: expr=" << res2 << "cpu=" << res1 << std::endl;
            assert(false);
        }
    }
    std::cout << "TEST4 SUCCEED" << std::endl;

    ////////
    //TEST 5
    d_resf = sin_expr(scale1 * -2.f) + cos_expr(scale2) - tan_expr(scale3);

    int acc = 0;
    float mean_error = 0.f;
    for(int i = 0; i < size; ++i) {
        float res1 = sin( scale1.fetch(i) * -2.f ) + cos(scale2.fetch(i)) - tan(scale3.fetch(i));
        float res2 = d_resf.fetch(i);

        if( res2 != res1 ) {
            acc++;
            mean_error += std::abs(res1 - res2);
        }
    }

    noise_tolerant_error(acc, mean_error, "TEST5");


    ////////
    //TEST 6 two args maps: clamp
    d_resf = clamp_expr(scale1, 1.f, 20.f);

    for(int i = 0; i < size; ++i) {
        float res1 = details::clamp( scale1.fetch(i), 1.f, 20.f);
        float res2 = d_resf.fetch(i);

        if( res2 != res1 ) {
            std::cerr << "TEST FAILED: expr=" << res2 << "cpu=" << res1 << std::endl;
            assert(false);
        }
    }
    std::cout << "TEST6 SUCCEED" << std::endl;

    ////////
    //TEST 7 two args maps: clamp
    d_resf = ternary_expr(bool1, scale1 + scale3, scale2);

    for(int i = 0; i < size; ++i) {
        float res1 = bool1.fetch(i) ? scale1.fetch(i) + scale3.fetch(i) : scale2.fetch(i);
        float res2 = d_resf.fetch(i);

        if( res2 != res1 ) {
            std::cerr << "TEST FAILED: expr=" << res2 << "cpu=" << res1 << std::endl;
            assert(false);
        }
    }
    std::cout << "TEST7 SUCCEED" << std::endl;

    ////////
    //TEST 8 one arg fun: norm norm_squared
    d_resf = norm_squared_expr( vec2_1 * norm_expr( vec3_1 * -2.f ) );

    acc = 0;
    mean_error = 0.f;
    for(int i = 0; i < size; ++i) {
        float res1 = details::norm_squared( vec2_1.fetch(i) * details::norm( vec3_1.fetch(i) * -2.f ) );
        float res2 = d_resf.fetch(i);

        if( res2 != res1 ) {
            acc++;
            mean_error += std::abs(res1 - res2);
        }
    }
    noise_tolerant_error(acc, mean_error, "TEST8");

    ////////
    //TEST 9 bin op fun: dot cross
    d_resf = dot_expr( cross_expr(vec3_1, vec3_2), vec3_2);

    acc = 0;
    mean_error = 0.f;
    for(int i = 0; i < size; ++i) {
        float res1 = details::dot( details::cross(vec3_1.fetch(i), vec3_2.fetch(i)), vec3_2.fetch(i));
        float res2 = d_resf.fetch(i);

        if( res2 != res1 ) {
            acc++;
            mean_error += std::abs(res1 - res2);
        }
    }
    noise_tolerant_error(acc, mean_error, "TEST9");
}

}// END NAMESPACE CUDA_UTILS ===================================================

#endif
