/* * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the License); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an AS IS BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* ---------------------------------------------------------------------- * Project: Arm-2D Library * Title: arm-2d_utils_helium.h * Description: Provides helium utility routines * * $Date: 20. May 2021 * $Revision: V 0.0.1 * * Target Processor: Cortex-M cores with Helium * * -------------------------------------------------------------------- */ #ifndef __ARM_2D_UTILS_HELIUM_H__ #define __ARM_2D_UTILS_HELIUM_H__ #if __ARM_2D_HAS_HELIUM_INTEGER__ == 1 /*============================ INCLUDES ======================================*/ #include "arm_2d.h" #include #ifdef __cplusplus extern "C" { #endif /*============================ MACROS ========================================*/ /*============================ MACROFIED FUNCTIONS ===========================*/ __STATIC_FORCEINLINE void __arm_2d_rgb565_unpack_single_vec(uint16x8_t in, uint16x8_t * R, uint16x8_t * G, uint16x8_t * B) { uint16x8_t vecMaskR = vdupq_n_u16(0x001f); uint16x8_t vecMaskG = vdupq_n_u16(0x003f); *R = (in & vecMaskR) * 8; *B = ((in >> 11)) * 8; *G = ((in >> 5) & vecMaskG) * 4; } __STATIC_FORCEINLINE uint16x8_t __arm_2d_rgb565_pack_single_vec(uint16x8_t R, uint16x8_t G, uint16x8_t B) { uint16x8_t vecMaskBpck = vdupq_n_u16(0x00f8); uint16x8_t vecMaskGpck = vdupq_n_u16(0x00fc); uint16x8_t vOut = vorrq(vshrq(R, 3), vmulq(vandq(G, vecMaskGpck), 8)); vOut = vorrq(vOut, vmulq(vandq(B, vecMaskBpck), 256)); return vOut; } __STATIC_FORCEINLINE uint16x8_t __arm_2d_rgb565_alpha_blending_single_vec( uint16x8_t hwSource1, uint16x8_t hwSource2, uint_fast8_t chRatio) { uint16_t ratio1x8 = (256 - chRatio) * 8; uint16_t ratio1x4 = (256 - chRatio) * 4; uint16_t ratio2x8 = (chRatio) * 8; uint16_t ratio2x4 = (chRatio) * 4; uint16x8_t vecMaskR = vdupq_n_u16(0x001f); uint16x8_t vecMaskG = vdupq_n_u16(0x003f); uint16x8_t vecMaskBpck = vdupq_n_u16(0x00f8); uint16x8_t vecMaskGpck = vdupq_n_u16(0x00fc); uint16x8_t vecR0, vecB0, vecG0; uint16x8_t vecR1, vecB1, vecG1; /* unpack 1st stream */ vecR0 = hwSource1 & vecMaskR; vecB0 = hwSource1 >> 11; vecG0 = hwSource1 >> 5; vecG0 = vecG0 & vecMaskG; /* unpack 2nd stream */ vecR1 = hwSource2 & vecMaskR; vecB1 = hwSource2 >> 11; vecG1 = hwSource2 >> 5; vecG1 = vecG1 & vecMaskG; /* merge */ vecR0 = vecR0 * ratio1x8 + vecR1 * ratio2x8; vecR0 = vecR0 >> 8; vecG0 = vecG0 * ratio1x4 + vecG1 * ratio2x4; vecG0 = vecG0 >> 8; vecB0 = vecB0 * ratio1x8 + vecB1 * ratio2x8; vecB0 = vecB0 >> 8; /* pack */ return vecR0 >> 3 | vmulq((vecG0 & vecMaskGpck), 8) | vmulq((vecB0 & vecMaskBpck), 256); } __STATIC_FORCEINLINE uint16x8_t __arm_2d_rgb565_blending_opacity_single_vec( uint16x8_t hwSource1, uint16x8_t hwSource2, uint16x8_t vecHwOpacity) { uint16x8_t vecAlpha = vsubq_u16(vdupq_n_u16(256), vecHwOpacity); uint16x8_t vecR, vecG, vecB; uint16x8_t vecSrcR, vecSrcG, vecSrcB; /* unpack sources */ __arm_2d_rgb565_unpack_single_vec(hwSource1, &vecR, &vecG, &vecB); __arm_2d_rgb565_unpack_single_vec(hwSource2, &vecSrcR, &vecSrcG, &vecSrcB); /* merge */ vecR = vecR * vecHwOpacity + vecSrcR * vecAlpha; vecR = vecR >> 8; vecG = vecG * vecHwOpacity + vecSrcG * vecAlpha; vecG = vecG >> 8; vecB = vecB * vecHwOpacity + vecSrcB * vecAlpha; vecB = vecB >> 8; /* pack */ return __arm_2d_rgb565_pack_single_vec(vecR, vecG, vecB); } __STATIC_FORCEINLINE uint16x8_t __rgb888_alpha_blending_direct_single_vec( uint16x8_t wSource1, /* widened input bytes */ uint16x8_t wSource2, /* widened input bytes */ uint_fast8_t chRatio) { uint16_t chRatioCompl = 256 - (uint16_t) chRatio; uint16x8_t vecOut; vecOut = vmulq_n_u16(wSource1, (uint16_t) chRatio); vecOut = vmlaq_n_u16(vecOut, wSource2, chRatioCompl); /* widened output */ return vecOut >> 8; } /*============================ TYPES =========================================*/ /*============================ GLOBAL VARIABLES ==============================*/ /*============================ PROTOTYPES ====================================*/ #ifdef __cplusplus } #endif #endif // (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI) #endif // __ARM_2D_UTILS_HELIUM_H__