pikapython/package/Arm2D/__arm_2d_utils_helium.h

/*
 * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* ----------------------------------------------------------------------
 * Project:      Arm-2D Library
 * Title:        arm-2d_utils_helium.h
 * Description:  Provides helium utility routines
 *
 * $Date:        20. May 2021
 * $Revision:    V 0.0.1
 *
 * Target Processor:  Cortex-M cores with Helium
 *
 * -------------------------------------------------------------------- */


#ifndef __ARM_2D_UTILS_HELIUM_H__
#define __ARM_2D_UTILS_HELIUM_H__


#if __ARM_2D_HAS_HELIUM_INTEGER__ == 1

/*============================ INCLUDES ======================================*/
#include "arm_2d.h"
#include <arm_math.h>

#ifdef   __cplusplus
extern "C" {
#endif

/*============================ MACROS ========================================*/
/*============================ MACROFIED FUNCTIONS ===========================*/

__STATIC_FORCEINLINE
void __arm_2d_rgb565_unpack_single_vec(uint16x8_t in,
                                            uint16x8_t * R, uint16x8_t * G, uint16x8_t * B)
{
    uint16x8_t      vecMaskR = vdupq_n_u16(0x001f);
    uint16x8_t      vecMaskG = vdupq_n_u16(0x003f);

    *R = (in & vecMaskR) * 8;
    *B = ((in >> 11)) * 8;
    *G = ((in >> 5) & vecMaskG) * 4;
}


__STATIC_FORCEINLINE
uint16x8_t __arm_2d_rgb565_pack_single_vec(uint16x8_t R, uint16x8_t G, uint16x8_t B)
{
    uint16x8_t      vecMaskBpck = vdupq_n_u16(0x00f8);
    uint16x8_t      vecMaskGpck = vdupq_n_u16(0x00fc);

    uint16x8_t      vOut = vorrq(vshrq(R, 3),
                                 vmulq(vandq(G, vecMaskGpck), 8));

    vOut = vorrq(vOut, vmulq(vandq(B, vecMaskBpck), 256));

    return vOut;
}

__STATIC_FORCEINLINE
uint16x8_t __arm_2d_rgb565_alpha_blending_single_vec(
                                            uint16x8_t      hwSource1,
                                            uint16x8_t      hwSource2,
                                            uint_fast8_t    chRatio)
{
    uint16_t        ratio1x8 = (256 - chRatio) * 8;
    uint16_t        ratio1x4 = (256 - chRatio) * 4;
    uint16_t        ratio2x8 = (chRatio) * 8;
    uint16_t        ratio2x4 = (chRatio) * 4;
    uint16x8_t      vecMaskR = vdupq_n_u16(0x001f);
    uint16x8_t      vecMaskG = vdupq_n_u16(0x003f);
    uint16x8_t      vecMaskBpck = vdupq_n_u16(0x00f8);
    uint16x8_t      vecMaskGpck = vdupq_n_u16(0x00fc);
    uint16x8_t      vecR0, vecB0, vecG0;
    uint16x8_t      vecR1, vecB1, vecG1;

    /* unpack 1st stream */
    vecR0 = hwSource1 & vecMaskR;
    vecB0 = hwSource1 >> 11;
    vecG0 = hwSource1 >> 5;
    vecG0 = vecG0 & vecMaskG;


    /* unpack 2nd stream */
    vecR1 = hwSource2 & vecMaskR;
    vecB1 = hwSource2 >> 11;
    vecG1 = hwSource2 >> 5;
    vecG1 = vecG1 & vecMaskG;


    /* merge */
    vecR0 = vecR0 * ratio1x8 + vecR1 * ratio2x8;
    vecR0 = vecR0 >> 8;

    vecG0 = vecG0 * ratio1x4 + vecG1 * ratio2x4;
    vecG0 = vecG0 >> 8;

    vecB0 = vecB0 * ratio1x8 + vecB1 * ratio2x8;
    vecB0 = vecB0 >> 8;


    /* pack */
    return vecR0 >> 3 | vmulq((vecG0 & vecMaskGpck), 8)
        | vmulq((vecB0 & vecMaskBpck), 256);
}


__STATIC_FORCEINLINE
uint16x8_t __arm_2d_rgb565_blending_opacity_single_vec(
                                            uint16x8_t      hwSource1,
                                            uint16x8_t      hwSource2,
                                            uint16x8_t      vecHwOpacity)
{
    uint16x8_t      vecAlpha = vsubq_u16(vdupq_n_u16(256), vecHwOpacity);
    uint16x8_t      vecR, vecG, vecB;
    uint16x8_t      vecSrcR, vecSrcG, vecSrcB;

    /* unpack sources */
    __arm_2d_rgb565_unpack_single_vec(hwSource1, &vecR, &vecG, &vecB);
    __arm_2d_rgb565_unpack_single_vec(hwSource2, &vecSrcR, &vecSrcG, &vecSrcB);

    /* merge */
    vecR = vecR * vecHwOpacity + vecSrcR * vecAlpha;
    vecR = vecR >> 8;

    vecG = vecG * vecHwOpacity + vecSrcG * vecAlpha;
    vecG = vecG >> 8;

    vecB = vecB * vecHwOpacity + vecSrcB * vecAlpha;
    vecB = vecB >> 8;

    /* pack */
    return __arm_2d_rgb565_pack_single_vec(vecR, vecG, vecB);
}


__STATIC_FORCEINLINE
uint16x8_t __rgb888_alpha_blending_direct_single_vec(
                                        uint16x8_t      wSource1,  /* widened input bytes */
                                        uint16x8_t      wSource2,  /* widened input bytes */
                                        uint_fast8_t    chRatio)
{
    uint16_t        chRatioCompl = 256 - (uint16_t) chRatio;
    uint16x8_t      vecOut;

    vecOut = vmulq_n_u16(wSource1, (uint16_t) chRatio);
    vecOut = vmlaq_n_u16(vecOut, wSource2, chRatioCompl);

    /* widened output */
    return vecOut >> 8;
}


/*============================ TYPES =========================================*/
/*============================ GLOBAL VARIABLES ==============================*/
/*============================ PROTOTYPES ====================================*/

#ifdef   __cplusplus
}
#endif

#endif // (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI)

#endif // __ARM_2D_UTILS_HELIUM_H__
add arm2d lib 2021-11-09 22:19:51 +08:00			`/*`
			`* Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.`
			`*`
			`* SPDX-License-Identifier: Apache-2.0`
			`*`
			`* Licensed under the Apache License, Version 2.0 (the License); you may`
			`* not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
			`*`
			`* www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an AS IS BASIS, WITHOUT`
			`* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
			`* limitations under the License.`
			`*/`

			`/* ----------------------------------------------------------------------`
			`* Project: Arm-2D Library`
			`* Title: arm-2d_utils_helium.h`
			`* Description: Provides helium utility routines`
			`*`
			`* $Date: 20. May 2021`
			`* $Revision: V 0.0.1`
			`*`
			`* Target Processor: Cortex-M cores with Helium`
			`*`
			`* -------------------------------------------------------------------- */`



			`#ifndef __ARM_2D_UTILS_HELIUM_H__`
			`#define __ARM_2D_UTILS_HELIUM_H__`


			`#if __ARM_2D_HAS_HELIUM_INTEGER__ == 1`

			`/============================ INCLUDES ======================================/`
			`#include "arm_2d.h"`
			`#include <arm_math.h>`

			`#ifdef __cplusplus`
			`extern "C" {`
			`#endif`

			`/============================ MACROS ========================================/`
			`/============================ MACROFIED FUNCTIONS ===========================/`

			`__STATIC_FORCEINLINE`
			`void __arm_2d_rgb565_unpack_single_vec(uint16x8_t in,`
			`uint16x8_t * R, uint16x8_t * G, uint16x8_t * B)`
			`{`
			`uint16x8_t vecMaskR = vdupq_n_u16(0x001f);`
			`uint16x8_t vecMaskG = vdupq_n_u16(0x003f);`

			`R = (in & vecMaskR) 8;`
			`B = ((in >> 11)) 8;`
			`G = ((in >> 5) & vecMaskG) 4;`
			`}`


			`__STATIC_FORCEINLINE`
			`uint16x8_t __arm_2d_rgb565_pack_single_vec(uint16x8_t R, uint16x8_t G, uint16x8_t B)`
			`{`
			`uint16x8_t vecMaskBpck = vdupq_n_u16(0x00f8);`
			`uint16x8_t vecMaskGpck = vdupq_n_u16(0x00fc);`

			`uint16x8_t vOut = vorrq(vshrq(R, 3),`
			`vmulq(vandq(G, vecMaskGpck), 8));`

			`vOut = vorrq(vOut, vmulq(vandq(B, vecMaskBpck), 256));`

			`return vOut;`
			`}`

			`__STATIC_FORCEINLINE`
			`uint16x8_t __arm_2d_rgb565_alpha_blending_single_vec(`
			`uint16x8_t hwSource1,`
			`uint16x8_t hwSource2,`
			`uint_fast8_t chRatio)`
			`{`
			`uint16_t ratio1x8 = (256 - chRatio) * 8;`
			`uint16_t ratio1x4 = (256 - chRatio) * 4;`
			`uint16_t ratio2x8 = (chRatio) * 8;`
			`uint16_t ratio2x4 = (chRatio) * 4;`
			`uint16x8_t vecMaskR = vdupq_n_u16(0x001f);`
			`uint16x8_t vecMaskG = vdupq_n_u16(0x003f);`
			`uint16x8_t vecMaskBpck = vdupq_n_u16(0x00f8);`
			`uint16x8_t vecMaskGpck = vdupq_n_u16(0x00fc);`
			`uint16x8_t vecR0, vecB0, vecG0;`
			`uint16x8_t vecR1, vecB1, vecG1;`

			`/* unpack 1st stream */`
			`vecR0 = hwSource1 & vecMaskR;`
			`vecB0 = hwSource1 >> 11;`
			`vecG0 = hwSource1 >> 5;`
			`vecG0 = vecG0 & vecMaskG;`


			`/* unpack 2nd stream */`
			`vecR1 = hwSource2 & vecMaskR;`
			`vecB1 = hwSource2 >> 11;`
			`vecG1 = hwSource2 >> 5;`
			`vecG1 = vecG1 & vecMaskG;`


			`/* merge */`
			`vecR0 = vecR0 * ratio1x8 + vecR1 * ratio2x8;`
			`vecR0 = vecR0 >> 8;`

			`vecG0 = vecG0 * ratio1x4 + vecG1 * ratio2x4;`
			`vecG0 = vecG0 >> 8;`

			`vecB0 = vecB0 * ratio1x8 + vecB1 * ratio2x8;`
			`vecB0 = vecB0 >> 8;`


			`/* pack */`
			`return vecR0 >> 3 \| vmulq((vecG0 & vecMaskGpck), 8)`
			`\| vmulq((vecB0 & vecMaskBpck), 256);`
			`}`


			`__STATIC_FORCEINLINE`
			`uint16x8_t __arm_2d_rgb565_blending_opacity_single_vec(`
			`uint16x8_t hwSource1,`
			`uint16x8_t hwSource2,`
			`uint16x8_t vecHwOpacity)`
			`{`
			`uint16x8_t vecAlpha = vsubq_u16(vdupq_n_u16(256), vecHwOpacity);`
			`uint16x8_t vecR, vecG, vecB;`
			`uint16x8_t vecSrcR, vecSrcG, vecSrcB;`

			`/* unpack sources */`
			`__arm_2d_rgb565_unpack_single_vec(hwSource1, &vecR, &vecG, &vecB);`
			`__arm_2d_rgb565_unpack_single_vec(hwSource2, &vecSrcR, &vecSrcG, &vecSrcB);`

			`/* merge */`
			`vecR = vecR * vecHwOpacity + vecSrcR * vecAlpha;`
			`vecR = vecR >> 8;`

			`vecG = vecG * vecHwOpacity + vecSrcG * vecAlpha;`
			`vecG = vecG >> 8;`

			`vecB = vecB * vecHwOpacity + vecSrcB * vecAlpha;`
			`vecB = vecB >> 8;`

			`/* pack */`
			`return __arm_2d_rgb565_pack_single_vec(vecR, vecG, vecB);`
			`}`


			`__STATIC_FORCEINLINE`
			`uint16x8_t __rgb888_alpha_blending_direct_single_vec(`
			`uint16x8_t wSource1, /* widened input bytes */`
			`uint16x8_t wSource2, /* widened input bytes */`
			`uint_fast8_t chRatio)`
			`{`
			`uint16_t chRatioCompl = 256 - (uint16_t) chRatio;`
			`uint16x8_t vecOut;`

			`vecOut = vmulq_n_u16(wSource1, (uint16_t) chRatio);`
			`vecOut = vmlaq_n_u16(vecOut, wSource2, chRatioCompl);`

			`/* widened output */`
			`return vecOut >> 8;`
			`}`



			`/============================ TYPES =========================================/`
			`/============================ GLOBAL VARIABLES ==============================/`
			`/============================ PROTOTYPES ====================================/`

			`#ifdef __cplusplus`
			`}`
			`#endif`

			`#endif // (ARM_MATH_HELIUM) \|\| defined(ARM_MATH_MVEF) \|\| defined(ARM_MATH_MVEI)`

			`#endif // __ARM_2D_UTILS_HELIUM_H__`