/* * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The OpenAirInterface Software Alliance licenses this file to You under * the OAI Public License, Version 1.1 (the "License"); you may not use this file * except in compliance with the License. * You may obtain a copy of the License at * * http://www.openairinterface.org/?page_id=698 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *------------------------------------------------------------------------------- * For more information about the OpenAirInterface (OAI) Software Alliance: * contact@openairinterface.org */ /*! \file PHY/sse_intrin.h * \brief SSE includes and compatibility functions. * * This header collects all SSE compatibility functions. To use SSE inside a source file, include only sse_intrin.h. * The host CPU needs to have support for SSE2 at least. SSE3 and SSE4.1 functions are emulated if the CPU lacks support for them. * This will slow down the softmodem, but may be valuable if only offline signal processing is required. * * * Has been changed in August 2022 to rely on SIMD Everywhere (SIMDE) from MIT * by bruno.mongazon-cazavet@nokia-bell-labs.com * * All AVX22 code is mapped to SIMDE which transparently relies on AVX2 HW (avx2-capable host) or SIMDE emulation * (non-avx2-capable host). * To force using SIMDE emulation on avx2-capable host use the --noavx2 flag. * avx512 code is not mapped to SIMDE. It depends on --noavx512 flag. * If the --noavx512 is set the OAI AVX512 emulation using AVX2 is used. * If the --noavx512 is not set, AVX512 HW is used on avx512-capable host while OAI AVX512 emulation using AVX2 * is used on non-avx512-capable host. * * \author S. Held, Laurent THOMAS * \email sebastian.held@imst.de, laurent.thomas@open-cells.com * \company IMST GmbH, Open Cells Project * \date 2019 * \version 0.2 */ #ifndef SSE_INTRIN_H #define SSE_INTRIN_H #if defined(__x86_64) || defined(__i386__) /* x86 processors */ #include <simde/x86/mmx.h> #include <simde/x86/sse.h> #include <simde/x86/sse2.h> #include <simde/x86/sse3.h> #include <simde/x86/ssse3.h> #include <simde/x86/sse4.1.h> #include <simde/x86/sse4.2.h> #include <simde/x86/avx2.h> #include <simde/x86/fma.h> #if defined(__AVX512BW__) || defined(__AVX512F__) #include <immintrin.h> #endif #elif defined(__arm__) || defined(__aarch64__) /* ARM processors */ #include <simde/arm/neon.h> #endif // x86_64 || i386 /* * OAI specific */ #if defined(__x86_64__) || defined(__i386__) #define vect128 __m128i #elif defined(__arm__) || defined(__aarch64__) #define vect128 int16x8_t #endif static const short minusConjug128[8]__attribute__((aligned(16))) = {-1,1,-1,1,-1,1,-1,1}; static inline vect128 mulByConjugate128(vect128 *a, vect128 *b, int8_t output_shift) { #if defined(__x86_64__) || defined(__i386__) vect128 realPart = _mm_madd_epi16(*a,*b); realPart = _mm_srai_epi32(realPart,output_shift); vect128 imagPart = _mm_shufflelo_epi16(*b,_MM_SHUFFLE(2,3,0,1)); imagPart = _mm_shufflehi_epi16(imagPart,_MM_SHUFFLE(2,3,0,1)); imagPart = _mm_sign_epi16(imagPart,*(vect128 *)minusConjug128); imagPart = _mm_madd_epi16(imagPart,*a); imagPart = _mm_srai_epi32(imagPart,output_shift); vect128 lowPart = _mm_unpacklo_epi32(realPart,imagPart); vect128 highPart = _mm_unpackhi_epi32(realPart,imagPart); return ( _mm_packs_epi32(lowPart,highPart)); #elif defined(__arm__) || defined(__aarch64__) AssertFatal(false, "not developped\n"); #endif } #if defined(__x86_64__) || defined(__i386__) #define displaySamples128(vect) {\ __m128i x=vect; \ printf("vector: %s = (%hd,%hd) (%hd,%hd) (%hd,%hd) (%hd,%hd)\n", #vect, \ _mm_extract_epi16(x,0), \ _mm_extract_epi16(x,1),\ _mm_extract_epi16(x,2),\ _mm_extract_epi16(x,3),\ _mm_extract_epi16(x,4),\ _mm_extract_epi16(x,5),\ _mm_extract_epi16(x,6),\ _mm_extract_epi16(x,7));\ } #elif defined(__arm__) || defined(__aarch64__) displaySamples128(vect) {} //TBD #endif #endif // SSE_INTRIN_H