I am trying to compile a math library for project that uses arm neon assembly instructions. However a list of errors is generated whenever I try to build the file, such as
/Users/steve/AndroidStudioProjects/AndroidTestModernTimeStretch/app/src/main/cpp/ModernTimeStretch/math_neon/math_atan2f.c:153:29: warning: value size does not match register size specified by the constraint and modifier [-Wasm-operand-widths] :: "r"(__atan2f_lut), "r"(__atan2f_pi_2) ^ /Users/steve/AndroidStudioProjects/AndroidTestModernTimeStretch/app/src/main/cpp/ModernTimeStretch/math_neon/math_atan2f.c:118:18: note: use constraint modifier "w" "vdup.f32 d4, %1 \n\t" //d4 = {pi/2, pi/2}; ^~ %w1 /Users/steve/AndroidStudioProjects/AndroidTestModernTimeStretch/app/src/main/cpp/ModernTimeStretch/math_neon/math_atan2f.c:157:1: warning: control reaches end of non-void function [-Wreturn-type] } ^
/Users/steve/AndroidStudioProjects/AndroidTestModernTimeStretch/app/src/main/cpp/ModernTimeStretch/math_neon/math_atan2f.c:170:1: warning: control reaches end of non-void function [-Wreturn-type] }; ^
/Users/steve/AndroidStudioProjects/AndroidTestModernTimeStretch/app/src/main/cpp/ModernTimeStretch/math_neon/math_atan2f.c:104:2: error: unexpected token in argument list "vdup.f32 d17, d0[1] \n\t" //d17 = {x, x}; ^ :1:20: note: instantiated into assembly here vdup.f32 d17, d0[1]
^ /Users/steve/AndroidStudioProjects/AndroidTestModernTimeStretch/app/src/main/cpp/ModernTimeStretch/math_neon/math_atan2f.c:104:30: error: unexpected token in argument list "vdup.f32 d17, d0[1] \n\t" //d17 = {x, x}; ^ :2:20: note: instantiated into assembly here vdup.f32 d16, d0[0]
^ /Users/steve/AndroidStudioProjects/AndroidTestModernTimeStretch/app/src/main/cpp/ModernTimeStretch/math_neon/math_atan2f.c:105:30: error: unrecognized instruction mnemonic, did you mean: frecpe, urecpe? "vdup.f32 d16, d0[0] \n\t" //d16 = {y, y}; ^ :3:2: note: instantiated into assembly here vrecpe.f32 d18, d17
^ /Users/steve/AndroidStudioProjects/AndroidTestModernTimeStretch/app/src/main/cpp/ModernTimeStretch/math_neon/math_atan2f.c:108:29: error: unrecognized instruction mnemonic, did you mean: frecps? "vrecpe.f32 d18, d17 \n\t" //d16 = ~ 1 / d1; ^ :4:2: note: instantiated into assembly here vrecps.f32 d19, d18, d17
^ /Users/steve/AndroidStudioProjects/AndroidTestModernTimeStretch/app/src/main/cpp/ModernTimeStretch/math_neon/math_atan2f.c:109:33: error: unrecognized instruction mnemonic, did you mean: fmul, mul, pmul? "vrecps.f32 d19, d18, d17 \n\t" //d17 = 2.0 - d16 * d1; ^ :5:2: note: instantia
The function in question is:
float atan2f_neon_hfp(float y, float x)
{
#ifdef __MATH_NEON
asm volatile (
"vdup.f32 d17, d0[1] \n\t" //d17 = {x, x};
"vdup.f32 d16, d0[0] \n\t" //d16 = {y, y};
//1.0 / x
"vrecpe.f32 d18, d17 \n\t" //d16 = ~ 1 / d1;
"vrecps.f32 d19, d18, d17 \n\t" //d17 = 2.0 - d16 * d1;
"vmul.f32 d18, d18, d19 \n\t" //d16 = d16 * d17;
"vrecps.f32 d19, d18, d17 \n\t" //d17 = 2.0 - d16 * d1;
"vmul.f32 d18, d18, d19 \n\t" //d16 = d16 * d17;
//y * (1.0 /x)
"vmul.f32 d0, d16, d18 \n\t" //d0 = d16 * d18;
"vdup.f32 d4, %1 \n\t" //d4 = {pi/2, pi/2};
"vmov.f32 d6, d0 \n\t" //d6 = d0;
"vabs.f32 d0, d0 \n\t" //d0 = fabs(d0) ;
//fast reciporical approximation
"vrecpe.f32 d1, d0 \n\t" //d1 = ~ 1 / d0;
"vrecps.f32 d2, d1, d0 \n\t" //d2 = 2.0 - d1 * d0;
"vmul.f32 d1, d1, d2 \n\t" //d1 = d1 * d2;
"vrecps.f32 d2, d1, d0 \n\t" //d2 = 2.0 - d1 * d0;
"vmul.f32 d1, d1, d2 \n\t" //d1 = d1 * d2;
//if |x| > 1.0 -> ax = 1/ax, r = pi/2
"vadd.f32 d1, d1, d0 \n\t" //d1 = d1 + d0;
"vmov.f32 d2, #1.0 \n\t" //d2 = 1.0;
"vcgt.f32 d3, d0, d2 \n\t" //d3 = (d0 > d2);
"vcvt.f32.u32 d3, d3 \n\t" //d3 = (float) d3;
"vmls.f32 d0, d1, d3 \n\t" //d0 = d0 - d1 * d3;
"vmul.f32 d7, d3, d4 \n\t" //d7 = d3 * d4;
//polynomial:
"vmul.f32 d2, d0, d0 \n\t" //d2 = d0*d0 = {ax^2, ax^2}
"vld1.32 {d4, d5}, [%0] \n\t" //d4 = {p7, p3}, d5 = {p5, p1}
"vmul.f32 d3, d2, d2 \n\t" //d3 = d2*d2 = {x^4, x^4}
"vmul.f32 q0, q2, d0[0] \n\t" //q0 = q2 * d0[0] = {p7x, p3x, p5x, p1x}
"vmla.f32 d1, d0, d2[0] \n\t" //d1 = d1 + d0*d2[0] = {p5x + p7x^3, p1x + p3x^3}
"vmla.f32 d1, d3, d1[0] \n\t" //d1 = d1 + d3*d1[0] = {..., p1x + p3x^3 + p5x^5 + p7x^7}
"vadd.f32 d1, d1, d7 \n\t" //d1 = d1 + d7
"vadd.f32 d2, d1, d1 \n\t" //d2 = d1 + d1
"vclt.f32 d3, d6, #0 \n\t" //d3 = (d6 < 0)
"vcvt.f32.u32 d3, d3 \n\t" //d3 = (float) d3
"vmls.f32 d1, d3, d2 \n\t" //d1 = d1 - d2 * d3;
"vmov.f32 s0, s3 \n\t" //s0 = s3
:: "r"(__atan2f_lut), "r"(__atan2f_pi_2)
: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"
);
#endif
}
Relevant build.gradle:
ndk {
abiFilters 'armeabi-v7a', 'arm64-v8a', 'x86', 'x86_64'
}
externalNativeBuild {
cmake {
arguments '-DANDROID_PLATFORM=android-19', '-DANDROID_ARM_NEON=ON', '-DANDROID_STL=c++_shared', "-DPATH_TO_LIBS:STRING=${libs_path}"
cFlags '-O3', '-fsigned-char'
cppFlags '-fsigned-char', '-std=c++17'
}
}
And CMakeLists.txt:
cmake_minimum_required(VERSION 3.10.2)
project(ModernTimeStretch)
set(CMAKE_CXX_STANDARD 17)
add_library(ModernTimeStretch STATIC
ModernTimeStretch.cpp
PhaseVocoder.cpp
kiss_fft/kiss_fft.c
kiss_fft/kiss_fftr.c
math_neon/math_atan2f.c
)