/*
 * Copyright © 2008 Nokia Corporation
 *
 * Permission to use, copy, modify, distribute and sell this software and its
 * documentation for any purpose is hereby granted without fee, provided that
 * the above copyright notice appear in all copies and that both that
 * copyright notice and this permission notice appear in supporting
 * documentation, and that the names of the authors and/or copyright holders
 * not be used in advertising or publicity pertaining to distribution of the
 * software without specific, written prior permission.  The authors and
 * copyright holders make no representations about the suitability of this
 * software for any purpose.  It is provided "as is" without any express
 * or implied warranty.
 *
 * THE AUTHORS AND COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO
 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS, IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
 * RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
 * CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 * Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
 *
 * Portions based on fbpict.c
 */
 
#include "fbarmv6.h"
#include "fb.h"
#include "fbpict.h"

#ifdef USE_ARMV6

#include "fbarmv6_internal.h"

void fbCompositeSolidMask_nx8x0565_armv6 (FbComposeData *params)
{
    CARD32  src, srca;
    CARD16  *dstLine, *dst;
    CARD8   *maskLine, *mask;
    FbStride    dstStride, maskStride;
    CARD16  w, h;

    fbComposeGetSolid(params->src, src, params->dest->format);

    srca = src >> 24;
    if (src == 0)
    return;

    fbComposeGetStart (params->dest, params->xDest, params->yDest, CARD16,
                       dstStride, dstLine, 1);
    fbComposeGetStart (params->mask, params->xMask, params->yMask, CARD8,
                       maskStride, maskLine, 1);

    dst = dstLine;
    mask = maskLine;
    h = params->height;
    w = params->width;

    fbCompositeSolidMask_nx8x0565_internal_armv6(dst, mask, src, w,
                                                 dstStride - w, maskStride - w, h);

    fbFinishAccess (params->mask->pDrawable);
    fbFinishAccess (params->dest->pDrawable);
}

void fbCompositeSrcAdd_8000x8000_armv6 (FbComposeData *params)
{
    CARD8   *dstLine, *dst;
    CARD8   *srcLine, *src;
    FbStride    dstStride, srcStride;
    CARD16  w, h;

    fbComposeGetStart (params->src, params->xSrc, params->ySrc, CARD8,
                       srcStride, srcLine, 1);
    fbComposeGetStart (params->dest, params->xDest, params->yDest, CARD8,
                       dstStride, dstLine, 1);

    dst = dstLine;
    src = srcLine;
    h = params->height;
    w = params->width;

    fbCompositeSrcAdd_8000x8000_internal_armv6(dst, src, w, dstStride, srcStride, h);

    fbFinishAccess (params->dest->pDrawable);
    fbFinishAccess (params->src->pDrawable);
}

void fbComposite_x8r8g8b8_src_r5g6b5_armv6 (FbComposeData *params)
{
    CARD16	*dstLine, *dst;
    CARD32	*srcLine, *src;
    FbStride	dstStride, srcStride;
    CARD16	w, h;

    fbComposeGetStart (params->src, params->xSrc, params->ySrc, CARD32,
                       srcStride, srcLine, 1);
    fbComposeGetStart (params->dest, params->xDest, params->yDest, CARD16,
                       dstStride, dstLine, 1);

    dst = dstLine;
    src = srcLine;
    h = params->height;
    w = params->width;

    if (w < 7)
        fbComposite_x8r8g8b8_src_r5g6b5_internal_mixed_armv6_c(dst, src, w, dstStride, srcStride, h);
    else
        fbComposite_x8r8g8b8_src_r5g6b5_internal_armv6(dst, src, w, dstStride, srcStride, h);

    fbFinishAccess (params->dest->pDrawable);
    fbFinishAccess (params->src->pDrawable);
}

#if defined(__ARMEL__) && defined(__ARM_EABI__) && defined(__linux__)

void __attribute__((naked)) fbmemcpy_arm(void *dst, void *src, int w)
{
    asm volatile(
        /**
        * Helper macro for memcpy function, it can copy data from source (r1) to 
        * destination (r0) buffers fixing alignment in the process. Destination
        * buffer should be aligned already (4 bytes alignment is required.
        * Size of the block to copy is in r2 register
        */
        ".macro  UNALIGNED_MEMCPY shift\n"
        "        sub     r1, #(\\shift)\n"
        "        ldr     ip, [r1], #4\n"

        "        tst     r0, #4\n"
        "        movne   r3, ip, lsr #(\\shift * 8)\n"
        "        ldrne   ip, [r1], #4\n"
        "        subne   r2, r2, #4\n"
        "        orrne   r3, r3, ip, asl #(32 - \\shift * 8)\n"
        "        strne   r3, [r0], #4\n"

        "        tst     r0, #8\n"
        "        movne   r3, ip, lsr #(\\shift * 8)\n"
        "        ldmneia r1!, {r4, ip}\n"
        "        subne   r2, r2, #8\n"
        "        orrne   r3, r3, r4, asl #(32 - \\shift * 8)\n"
        "        movne   r4, r4, lsr #(\\shift * 8)\n"
        "        orrne   r4, r4, ip, asl #(32 - \\shift * 8)\n"
        "        stmneia r0!, {r3-r4}\n"
        "        cmp     r2, #32\n"
        "        blt     3f\n"
        "        pld     [r1, #48]\n"
        "        stmfd   sp!, {r7, r8, r10, r11}\n"
        "1:\n"
        "        pld     [r1, #80]\n"
        "        subs    r2, r2, #32\n"
        "        movge   r3, ip, lsr #(\\shift * 8)\n"
        "        ldmgeia r1!, {r4-r6, r7, r8, r10, r11, ip}\n"
        "        orrge   r3, r3, r4, asl #(32 - \\shift * 8)\n"
        "        movge   r4, r4, lsr #(\\shift * 8)\n"
        "        orrge   r4, r4, r5, asl #(32 - \\shift * 8)\n"
        "        movge   r5, r5, lsr #(\\shift * 8)\n"
        "        orrge   r5, r5, r6, asl #(32 - \\shift * 8)\n"
        "        movge   r6, r6, lsr #(\\shift * 8)\n"
        "        orrge   r6, r6, r7, asl #(32 - \\shift * 8)\n"
        "        stmgeia r0!, {r3-r6}\n"
        "        movge   r7, r7, lsr #(\\shift * 8)\n"
        "        orrge   r7, r7, r8, asl #(32 - \\shift * 8)\n"
        "        movge   r8, r8, lsr #(\\shift * 8)\n"
        "        orrge   r8, r8, r10, asl #(32 - \\shift * 8)\n"
        "        movge   r10, r10, lsr #(\\shift * 8)\n"
        "        orrge   r10, r10, r11, asl #(32 - \\shift * 8)\n"
        "        movge   r11, r11, lsr #(\\shift * 8)\n"
        "        orrge   r11, r11, ip, asl #(32 - \\shift * 8)\n"
        "        stmgeia r0!, {r7, r8, r10, r11}\n"
        "        bgt     1b\n"
        "2:\n"
        "        ldmfd   sp!, {r7, r8, r10, r11}\n"
        "3:\n"   /* copy remaining data */
        "        tst     r2, #16\n"
        "        movne   r3, ip, lsr #(\\shift * 8)\n"
        "        ldmneia r1!, {r4-r6, ip}\n"
        "        orrne   r3, r3, r4, asl #(32 - \\shift * 8)\n"
        "        movne   r4, r4, lsr #(\\shift * 8)\n"
        "        orrne   r4, r4, r5, asl #(32 - \\shift * 8)\n"
        "        movge   r5, r5, lsr #(\\shift * 8)\n"
        "        orrge   r5, r5, r6, asl #(32 - \\shift * 8)\n"
        "        movge   r6, r6, lsr #(\\shift * 8)\n"
        "        orrge   r6, r6, ip, asl #(32 - \\shift * 8)\n"
        "        stmneia r0!, {r3-r6}\n"

        "        tst     r2, #8\n"
        "        movne   r3, ip, lsr #(\\shift * 8)\n"
        "        ldmneia r1!, {r4, ip}\n"
        "        orrne   r3, r3, r4, asl #(32 - \\shift * 8)\n"
        "        movne   r4, r4, lsr #(\\shift * 8)\n"
        "        orrne   r4, r4, ip, asl #(32 - \\shift * 8)\n"
        "        stmneia r0!, {r3-r4}\n"

        "        tst     r2, #4\n"
        "        movne   r3, ip, lsr #(\\shift * 8)\n"
        "        ldrne   ip, [r1], #4\n"
        "        sub     r1, r1, #(4 - \\shift)\n"
        "        orrne   r3, r3, ip, asl #(32 - \\shift * 8)\n"
        "        strne   r3, [r0], #4\n"

        "        tst     r2, #2\n"
        "        ldrneb  r3, [r1], #1\n"
        "        ldrneb  r4, [r1], #1\n"
        "        ldr     r5, [sp], #4\n"
        "        strneb  r3, [r0], #1\n"
        "        strneb  r4, [r0], #1\n"

        "        tst     r2, #1\n"
        "        ldrneb  r3, [r1], #1\n"
        "        ldr     r6, [sp], #4\n"
        "        strneb  r3, [r0], #1\n"

        "        ldmfd   sp!, {r0, r4}\n"

        "        bx      lr\n"
        ".endm\n"

        "    cmp     r2, #20\n"
        "    blt     9f\n"
             /* copy data until destination address is 4 bytes aligned */
        "    tst     r0, #1\n"
        "    ldrneb  r3, [r1], #1\n"
        "    stmfd   sp!, {r0, r4}\n"
        "    subne   r2, r2, #1\n"
        "    strneb  r3, [r0], #1\n"
        "    tst     r0, #2\n"
        "    ldrneb  r3, [r1], #1\n"
        "    ldrneb  r4, [r1], #1\n"
        "    stmfd   sp!, {r5, r6}\n"
        "    subne   r2, r2, #2\n"
        "    orrne   r3, r3, r4, asl #8\n"
        "    strneh  r3, [r0], #2\n"
             /* destination address is 4 bytes aligned */
             /* now we should handle 4 cases of source address alignment */
        "    tst     r1, #1\n"
        "    bne     6f\n"
        "    tst     r1, #2\n"
        "    bne     7f\n"

             /* both source and destination are 4 bytes aligned */
        "    stmfd   sp!, {r7, r8, r10, r11}\n"
        "    tst     r0, #4\n"
        "    ldrne   r4, [r1], #4\n"
        "    subne   r2, r2, #4\n"
        "    strne   r4, [r0], #4\n"
        "    tst     r0, #8\n"
        "    ldmneia r1!, {r3-r4}\n"
        "    subne   r2, r2, #8\n"
        "    stmneia r0!, {r3-r4}\n"
    "1:\n"
        "    subs    r2, r2, #32\n"
        "    ldmgeia r1!, {r3-r6, r7, r8, r10, r11}\n"
        "    pld     [r1, #48]\n"
        "    stmgeia r0!, {r3-r6}\n"
        "    stmgeia r0!, {r7, r8, r10, r11}\n"
        "    bgt     1b\n"
    "2:\n"
        "    ldmfd   sp!, {r7, r8, r10, r11}\n"
        "    tst     r2, #16\n"
        "    ldmneia r1!, {r3-r6}\n"
        "    stmneia r0!, {r3-r6}\n"
        "    tst     r2, #8\n"
        "    ldmneia r1!, {r3-r4}\n"
        "    stmneia r0!, {r3-r4}\n"
        "    tst     r2, #4\n"
        "    ldrne   r3, [r1], #4\n"
        "    mov     ip, r0\n"
        "    strne   r3, [ip], #4\n"
        "    tst     r2, #2\n"
        "    ldrneh  r3, [r1], #2\n"
        "    ldmfd   sp!, {r5, r6}\n"
        "    strneh  r3, [ip], #2\n"
        "    tst     r2, #1\n"
        "    ldrneb  r3, [r1], #1\n"
        "    ldmfd   sp!, {r0, r4}\n"
        "    strneb  r3, [ip], #1\n"

        "    bx      lr\n"

    "6:\n"
        "    tst    r1, #2\n"
        "    bne    8f\n"
        "    UNALIGNED_MEMCPY 1\n"
    "7:\n"
        "    UNALIGNED_MEMCPY 2\n"
    "8:\n"
        "    UNALIGNED_MEMCPY 3\n"
    "9:\n"
        "    stmfd  sp!, {r0, r4}\n"
    "1:      subs   r2, r2, #3\n"
        "    ldrgeb ip, [r0]\n"
        "    ldrgeb r3, [r1], #1\n"
        "    ldrgeb r4, [r1], #1\n"
        "    ldrgeb ip, [r1], #1\n"
        "    strgeb r3, [r0], #1\n"
        "    strgeb r4, [r0], #1\n"
        "    strgeb ip, [r0], #1\n"
        "    bge    1b\n"
        "    adds   r2, r2, #2\n"
        "    ldrgeb r3, [r1], #1\n"
        "    mov    ip, r0\n"
        "    ldr    r0, [sp], #4\n"
        "    strgeb r3, [ip], #1\n"
        "    ldrgtb r3, [r1], #1\n"
        "    ldr    r4, [sp], #4\n"
        "    strgtb r3, [ip], #1\n"
        "    bx     lr\n"
    );
}

#else

void fbmemcpy_arm(void *dst, void *src, int w)
{
    memcpy(dst, src, w);
}

#endif

#endif
