How do I compile C code with assembly with arm neon instructions in Android Studio?

Question

I am trying to compile a math library for project that uses arm neon assembly instructions. However a list of errors is generated whenever I try to build the file, such as

/Users/steve/AndroidStudioProjects/AndroidTestModernTimeStretch/app/src/main/cpp/ModernTimeStretch/math_neon/math_atan2f.c:153:29: warning: value size does not match register size specified by the constraint and modifier [-Wasm-operand-widths] :: "r"(__atan2f_lut), "r"(__atan2f_pi_2) ^ /Users/steve/AndroidStudioProjects/AndroidTestModernTimeStretch/app/src/main/cpp/ModernTimeStretch/math_neon/math_atan2f.c:118:18: note: use constraint modifier "w" "vdup.f32 d4, %1 \n\t" //d4 = {pi/2, pi/2}; ^~ %w1 /Users/steve/AndroidStudioProjects/AndroidTestModernTimeStretch/app/src/main/cpp/ModernTimeStretch/math_neon/math_atan2f.c:157:1: warning: control reaches end of non-void function [-Wreturn-type] } ^
/Users/steve/AndroidStudioProjects/AndroidTestModernTimeStretch/app/src/main/cpp/ModernTimeStretch/math_neon/math_atan2f.c:170:1: warning: control reaches end of non-void function [-Wreturn-type] }; ^
/Users/steve/AndroidStudioProjects/AndroidTestModernTimeStretch/app/src/main/cpp/ModernTimeStretch/math_neon/math_atan2f.c:104:2: error: unexpected token in argument list "vdup.f32 d17, d0[1] \n\t" //d17 = {x, x}; ^ :1:20: note: instantiated into assembly here vdup.f32 d17, d0[1]
^ /Users/steve/AndroidStudioProjects/AndroidTestModernTimeStretch/app/src/main/cpp/ModernTimeStretch/math_neon/math_atan2f.c:104:30: error: unexpected token in argument list "vdup.f32 d17, d0[1] \n\t" //d17 = {x, x}; ^ :2:20: note: instantiated into assembly here vdup.f32 d16, d0[0]
^ /Users/steve/AndroidStudioProjects/AndroidTestModernTimeStretch/app/src/main/cpp/ModernTimeStretch/math_neon/math_atan2f.c:105:30: error: unrecognized instruction mnemonic, did you mean: frecpe, urecpe? "vdup.f32 d16, d0[0] \n\t" //d16 = {y, y}; ^ :3:2: note: instantiated into assembly here vrecpe.f32 d18, d17
^ /Users/steve/AndroidStudioProjects/AndroidTestModernTimeStretch/app/src/main/cpp/ModernTimeStretch/math_neon/math_atan2f.c:108:29: error: unrecognized instruction mnemonic, did you mean: frecps? "vrecpe.f32 d18, d17 \n\t" //d16 = ~ 1 / d1; ^ :4:2: note: instantiated into assembly here vrecps.f32 d19, d18, d17
^ /Users/steve/AndroidStudioProjects/AndroidTestModernTimeStretch/app/src/main/cpp/ModernTimeStretch/math_neon/math_atan2f.c:109:33: error: unrecognized instruction mnemonic, did you mean: fmul, mul, pmul? "vrecps.f32 d19, d18, d17 \n\t" //d17 = 2.0 - d16 * d1; ^ :5:2: note: instantia

The function in question is:

float atan2f_neon_hfp(float y, float x)
{
#ifdef __MATH_NEON
    asm volatile (

    "vdup.f32       d17, d0[1]              \n\t"   //d17 = {x, x};
    "vdup.f32       d16, d0[0]              \n\t"   //d16 = {y, y};
    
    //1.0 / x
    "vrecpe.f32     d18, d17                \n\t"   //d16 = ~ 1 / d1; 
    "vrecps.f32     d19, d18, d17           \n\t"   //d17 = 2.0 - d16 * d1; 
    "vmul.f32       d18, d18, d19           \n\t"   //d16 = d16 * d17; 
    "vrecps.f32     d19, d18, d17           \n\t"   //d17 = 2.0 - d16 * d1; 
    "vmul.f32       d18, d18, d19           \n\t"   //d16 = d16 * d17; 

    //y * (1.0 /x)
    "vmul.f32       d0, d16, d18            \n\t"   //d0 = d16 * d18; 


    "vdup.f32       d4, %1                  \n\t"   //d4 = {pi/2, pi/2};
    "vmov.f32       d6, d0                  \n\t"   //d6 = d0;
    "vabs.f32       d0, d0                  \n\t"   //d0 = fabs(d0) ;

    //fast reciporical approximation
    "vrecpe.f32     d1, d0                  \n\t"   //d1 = ~ 1 / d0; 
    "vrecps.f32     d2, d1, d0              \n\t"   //d2 = 2.0 - d1 * d0; 
    "vmul.f32       d1, d1, d2              \n\t"   //d1 = d1 * d2; 
    "vrecps.f32     d2, d1, d0              \n\t"   //d2 = 2.0 - d1 * d0; 
    "vmul.f32       d1, d1, d2              \n\t"   //d1 = d1 * d2; 

    //if |x| > 1.0 -> ax = 1/ax, r = pi/2
    "vadd.f32       d1, d1, d0              \n\t"   //d1 = d1 + d0; 
    "vmov.f32       d2, #1.0                \n\t"   //d2 = 1.0;
    "vcgt.f32       d3, d0, d2              \n\t"   //d3 = (d0 > d2);
    "vcvt.f32.u32   d3, d3                  \n\t"   //d3 = (float) d3;
    "vmls.f32       d0, d1, d3              \n\t"   //d0 = d0 - d1 * d3;    
    "vmul.f32       d7, d3, d4              \n\t"   //d7 = d3 * d4;     
        
    //polynomial:
    "vmul.f32       d2, d0, d0              \n\t"   //d2 = d0*d0 = {ax^2, ax^2} 
    "vld1.32        {d4, d5}, [%0]          \n\t"   //d4 = {p7, p3}, d5 = {p5, p1}
    "vmul.f32       d3, d2, d2              \n\t"   //d3 = d2*d2 = {x^4, x^4}       
    "vmul.f32       q0, q2, d0[0]           \n\t"   //q0 = q2 * d0[0] = {p7x, p3x, p5x, p1x}
    "vmla.f32       d1, d0, d2[0]           \n\t"   //d1 = d1 + d0*d2[0] = {p5x + p7x^3, p1x + p3x^3}       
    "vmla.f32       d1, d3, d1[0]           \n\t"   //d1 = d1 + d3*d1[0] = {..., p1x + p3x^3 + p5x^5 + p7x^7}       
    "vadd.f32       d1, d1, d7              \n\t"   //d1 = d1 + d7      
    
    "vadd.f32       d2, d1, d1              \n\t"   //d2 = d1 + d1      
    "vclt.f32       d3, d6, #0              \n\t"   //d3 = (d6 < 0) 
    "vcvt.f32.u32   d3, d3                  \n\t"   //d3 = (float) d3   
    "vmls.f32       d1, d3, d2              \n\t"   //d1 = d1 - d2 * d3;

    "vmov.f32       s0, s3                  \n\t"   //s0 = s3

    :: "r"(__atan2f_lut),  "r"(__atan2f_pi_2) 
    : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"
    );
#endif
}

Relevant build.gradle:

ndk {
            abiFilters 'armeabi-v7a', 'arm64-v8a', 'x86', 'x86_64'
        }
        externalNativeBuild {
            cmake {
                arguments '-DANDROID_PLATFORM=android-19', '-DANDROID_ARM_NEON=ON', '-DANDROID_STL=c++_shared', "-DPATH_TO_LIBS:STRING=${libs_path}"
                cFlags '-O3', '-fsigned-char'
                cppFlags '-fsigned-char', '-std=c++17'
            }
        }

And CMakeLists.txt:

cmake_minimum_required(VERSION 3.10.2)
project(ModernTimeStretch)

set(CMAKE_CXX_STANDARD 17)
add_library(ModernTimeStretch STATIC
        ModernTimeStretch.cpp
        PhaseVocoder.cpp
        kiss_fft/kiss_fft.c
        kiss_fft/kiss_fftr.c
        math_neon/math_atan2f.c
        )

It looks to me like this is 32-bit ARM assembly, but that you are trying to compile it into a 64-bit program. — Nate Eldredge, Feb 02 '21 at 00:58

How do I compile C code with assembly with arm neon instructions in Android Studio?

0 Answers0