/*
* Copyright (c) 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

#include <trace.h>
#include <arch/x86.h>
#include <arch/fpu.h>
#include <string.h>
#include <kernel/thread.h>

#define LOCAL_TRACE 0

#if X86_WITH_FPU

#define FPU_MASK_ALL_EXCEPTIONS 1

/* CPUID EAX = 1 return values */

#define ECX_SSE3    (0x00000001 << 0)
#define ECX_SSSE3   (0x00000001 << 9)
#define ECX_SSE4_1  (0x00000001 << 19)
#define ECX_SSE4_2  (0x00000001 << 20)
#define EDX_FXSR    (0x00000001 << 24)
#define EDX_SSE     (0x00000001 << 25)
#define EDX_SSE2    (0x00000001 << 26)
#define EDX_FPU     (0x00000001 << 0)

#define FPU_CAP(ecx, edx) ((edx & EDX_FPU) != 0)

#define SSE_CAP(ecx, edx) ( \
    ((ecx & (ECX_SSE3 | ECX_SSSE3 | ECX_SSE4_1 | ECX_SSE4_2)) != 0) || \
    ((edx & (EDX_SSE | EDX_SSE2)) != 0) \
    )

#define FXSAVE_CAP(ecx, edx) ((edx & EDX_FXSR) != 0)

static int fp_supported;
static thread_t *fp_owner;

/* FXSAVE area comprises 512 bytes starting with 16-byte aligned */
static uint8_t __ALIGNED(16) fpu_init_states[512]= {0};

static void get_cpu_cap(uint32_t *ecx, uint32_t *edx)
{
    uint32_t eax = 1;

    __asm__ __volatile__
    ("cpuid" : "=c" (*ecx), "=d" (*edx) : "a" (eax));
}

void fpu_init(void)
{
    uint32_t ecx = 0, edx = 0;
    uint16_t fcw;
    uint32_t mxcsr;

#ifdef ARCH_X86_64
    uint64_t x;
#else
    uint32_t x;
#endif

    fp_supported = 0;
    fp_owner = NULL;

    get_cpu_cap(&ecx, &edx);

    if (!FPU_CAP(ecx, edx) || !SSE_CAP(ecx, edx) || !FXSAVE_CAP(ecx, edx))
        return;

    fp_supported = 1;

    /* No x87 emul, monitor co-processor */

    x = x86_get_cr0();
    x &= ~X86_CR0_EM;
    x |= X86_CR0_NE;
    x |= X86_CR0_MP;
    x86_set_cr0(x);

    /* Init x87 */
    __asm__ __volatile__ ("finit");
    __asm__ __volatile__("fstcw %0" : "=m" (fcw));
#if FPU_MASK_ALL_EXCEPTIONS
    /* mask all exceptions */
    fcw |= 0x3f;
#else
    /* unmask all exceptions */
    fcw &= 0xffc0;
#endif
    __asm__ __volatile__("fldcw %0" : : "m" (fcw));

    /* Init SSE */
    x = x86_get_cr4();
    x |= X86_CR4_OSXMMEXPT;
    x |= X86_CR4_OSFXSR;
    x &= ~X86_CR4_OSXSAVE;
    x86_set_cr4(x);

    __asm__ __volatile__("stmxcsr %0" : "=m" (mxcsr));
#if FPU_MASK_ALL_EXCEPTIONS
    /* mask all exceptions */
    mxcsr = (0x3f << 7);
#else
    /* unmask all exceptions */
    mxcsr &= 0x0000003f;
#endif
    __asm__ __volatile__("ldmxcsr %0" : : "m" (mxcsr));

    /* save fpu initial states, and used when new thread creates */
    __asm__ __volatile__("fxsave %0" : "=m" (fpu_init_states));

    x86_set_cr0(x86_get_cr0() | X86_CR0_TS);
    return;
}

void fpu_init_thread_states(thread_t *t)
{
    t->arch.fpu_states = (vaddr_t *)round_up(((vaddr_t)t->arch.fpu_buffer), 16);
    memcpy(t->arch.fpu_states, fpu_init_states, sizeof(fpu_init_states));
}

void fpu_context_switch(thread_t *old_thread, thread_t *new_thread)
{
    if (fp_supported == 0)
        return;

    if (new_thread != fp_owner)
        x86_set_cr0(x86_get_cr0() | X86_CR0_TS);
    else
        x86_set_cr0(x86_get_cr0() & ~X86_CR0_TS);

    if (old_thread == fp_owner && old_thread->state == THREAD_DEATH) {
        LTRACEF("dead fp_owner thread\n");
        fp_owner = NULL;
    }

    return;
}

void fpu_dev_na_handler(void)
{
    thread_t *self;

    x86_set_cr0(x86_get_cr0() & ~X86_CR0_TS);

    if (fp_supported == 0)
        return;

    self = get_current_thread();

    LTRACEF("owner %p self %p\n", fp_owner, self);
    if ((fp_owner != NULL) && (fp_owner != self)) {
        __asm__ __volatile__("fxsave %0" : "=m" (*fp_owner->arch.fpu_states));
        __asm__ __volatile__("fxrstor %0" : : "m" (*self->arch.fpu_states));
    }

    fp_owner = self;
    return;
}
#endif

/* End of file */
