pikapython/package/Arm2D/__arm_2d_utils_helium.h
2021-11-09 22:19:51 +08:00

184 lines
5.5 KiB
C

/*
* Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* ----------------------------------------------------------------------
* Project: Arm-2D Library
* Title: arm-2d_utils_helium.h
* Description: Provides helium utility routines
*
* $Date: 20. May 2021
* $Revision: V 0.0.1
*
* Target Processor: Cortex-M cores with Helium
*
* -------------------------------------------------------------------- */
#ifndef __ARM_2D_UTILS_HELIUM_H__
#define __ARM_2D_UTILS_HELIUM_H__
#if __ARM_2D_HAS_HELIUM_INTEGER__ == 1
/*============================ INCLUDES ======================================*/
#include "arm_2d.h"
#include <arm_math.h>
#ifdef __cplusplus
extern "C" {
#endif
/*============================ MACROS ========================================*/
/*============================ MACROFIED FUNCTIONS ===========================*/
__STATIC_FORCEINLINE
void __arm_2d_rgb565_unpack_single_vec(uint16x8_t in,
uint16x8_t * R, uint16x8_t * G, uint16x8_t * B)
{
uint16x8_t vecMaskR = vdupq_n_u16(0x001f);
uint16x8_t vecMaskG = vdupq_n_u16(0x003f);
*R = (in & vecMaskR) * 8;
*B = ((in >> 11)) * 8;
*G = ((in >> 5) & vecMaskG) * 4;
}
__STATIC_FORCEINLINE
uint16x8_t __arm_2d_rgb565_pack_single_vec(uint16x8_t R, uint16x8_t G, uint16x8_t B)
{
uint16x8_t vecMaskBpck = vdupq_n_u16(0x00f8);
uint16x8_t vecMaskGpck = vdupq_n_u16(0x00fc);
uint16x8_t vOut = vorrq(vshrq(R, 3),
vmulq(vandq(G, vecMaskGpck), 8));
vOut = vorrq(vOut, vmulq(vandq(B, vecMaskBpck), 256));
return vOut;
}
__STATIC_FORCEINLINE
uint16x8_t __arm_2d_rgb565_alpha_blending_single_vec(
uint16x8_t hwSource1,
uint16x8_t hwSource2,
uint_fast8_t chRatio)
{
uint16_t ratio1x8 = (256 - chRatio) * 8;
uint16_t ratio1x4 = (256 - chRatio) * 4;
uint16_t ratio2x8 = (chRatio) * 8;
uint16_t ratio2x4 = (chRatio) * 4;
uint16x8_t vecMaskR = vdupq_n_u16(0x001f);
uint16x8_t vecMaskG = vdupq_n_u16(0x003f);
uint16x8_t vecMaskBpck = vdupq_n_u16(0x00f8);
uint16x8_t vecMaskGpck = vdupq_n_u16(0x00fc);
uint16x8_t vecR0, vecB0, vecG0;
uint16x8_t vecR1, vecB1, vecG1;
/* unpack 1st stream */
vecR0 = hwSource1 & vecMaskR;
vecB0 = hwSource1 >> 11;
vecG0 = hwSource1 >> 5;
vecG0 = vecG0 & vecMaskG;
/* unpack 2nd stream */
vecR1 = hwSource2 & vecMaskR;
vecB1 = hwSource2 >> 11;
vecG1 = hwSource2 >> 5;
vecG1 = vecG1 & vecMaskG;
/* merge */
vecR0 = vecR0 * ratio1x8 + vecR1 * ratio2x8;
vecR0 = vecR0 >> 8;
vecG0 = vecG0 * ratio1x4 + vecG1 * ratio2x4;
vecG0 = vecG0 >> 8;
vecB0 = vecB0 * ratio1x8 + vecB1 * ratio2x8;
vecB0 = vecB0 >> 8;
/* pack */
return vecR0 >> 3 | vmulq((vecG0 & vecMaskGpck), 8)
| vmulq((vecB0 & vecMaskBpck), 256);
}
__STATIC_FORCEINLINE
uint16x8_t __arm_2d_rgb565_blending_opacity_single_vec(
uint16x8_t hwSource1,
uint16x8_t hwSource2,
uint16x8_t vecHwOpacity)
{
uint16x8_t vecAlpha = vsubq_u16(vdupq_n_u16(256), vecHwOpacity);
uint16x8_t vecR, vecG, vecB;
uint16x8_t vecSrcR, vecSrcG, vecSrcB;
/* unpack sources */
__arm_2d_rgb565_unpack_single_vec(hwSource1, &vecR, &vecG, &vecB);
__arm_2d_rgb565_unpack_single_vec(hwSource2, &vecSrcR, &vecSrcG, &vecSrcB);
/* merge */
vecR = vecR * vecHwOpacity + vecSrcR * vecAlpha;
vecR = vecR >> 8;
vecG = vecG * vecHwOpacity + vecSrcG * vecAlpha;
vecG = vecG >> 8;
vecB = vecB * vecHwOpacity + vecSrcB * vecAlpha;
vecB = vecB >> 8;
/* pack */
return __arm_2d_rgb565_pack_single_vec(vecR, vecG, vecB);
}
__STATIC_FORCEINLINE
uint16x8_t __rgb888_alpha_blending_direct_single_vec(
uint16x8_t wSource1, /* widened input bytes */
uint16x8_t wSource2, /* widened input bytes */
uint_fast8_t chRatio)
{
uint16_t chRatioCompl = 256 - (uint16_t) chRatio;
uint16x8_t vecOut;
vecOut = vmulq_n_u16(wSource1, (uint16_t) chRatio);
vecOut = vmlaq_n_u16(vecOut, wSource2, chRatioCompl);
/* widened output */
return vecOut >> 8;
}
/*============================ TYPES =========================================*/
/*============================ GLOBAL VARIABLES ==============================*/
/*============================ PROTOTYPES ====================================*/
#ifdef __cplusplus
}
#endif
#endif // (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI)
#endif // __ARM_2D_UTILS_HELIUM_H__