             AREA |C$$code|,CODE,READONLY

             GET "Hdr.Common"

             EXPORT vfp_arcsin
vfp_arcsin   p1arg
             vmul.F64 d4,d0,d0
             vmov.F64 d5,#1.0
             vsub.F64 d5,d5,d4
             vsqrt.F64 d4,d5
             vdiv.F64 d0,d0,d4        ; arcsin=arctan(x/sqrt(1-x^2))
             pcall
             b vfp_arctan

             EXPORT vfp_arccos
vfp_arccos   p1arg
             vmul.F64 d4,d0,d0
             vmov.F64 d5,#1.0
             vsub.F64 d5,d5,d4
             vsqrt.F64 d4,d5
             vdiv.F64 d0,d4,d0        ; arccos=arctan(sqrt(1-x^2)/x)
             pcall
             b vfp_arctan

             EXPORT vfp_atan2
vfp_atan2    mov ip,sp
             stmdb sp!,{fp,ip,lr,pc}
             sub fp,ip,#4    
             p2arg
             p1arg
             vcmp.F64 d0,#0
             vmrs apsr_nzcv,fpscr
             beq %FT30
             blt %FT10
             vdiv.F64 d0,d1,d0
             pcall
             ldmdb fp,{fp,sp,lr}
             b vfp_arctan              ; first or fourth quadrant theta=arctan(y/x)
10           vcmp.F64 d1,#0
             vmrs apsr_nzcv,fpscr
             blt %FT20
             vdiv.F64 d0,d1,d0
             pcall
             bl vfp_arctan             ; second quadrant theta=pi+arctan(y/x)
             adr r2,piby2
             vldr.64 d1,[r2]
             vmov.F64 d4,#2.0
             vmul.F64 d1,d1,d4
             vadd.F64 d0,d1,d0
             pres
             ldmdb fp,{fp,sp,pc}
20           vdiv.F64 d0,d1,d0
             pcall
             bl vfp_arctan             ; third quadrant theta=arctan(y/x)-pi
             adr r2,piby2
             vldr.64 d1,[r2]
             vmov.F64 d4,#2.0
             vmul.F64 d1,d1,d4
             vsub.F64 d0,d0,d1
             pres
             ldmdb fp,{fp,sp,pc}
30           adr r2,piby2              ; x=0 so return pi/2 or -pi/2
             vldr.64 d0,[r2]
             vcmp.F64 d1,#0
             vmrs apsr_nzcv,fpscr
             vneglt.F64 d0,d0
             pres
             ldmdb fp,{fp,sp,pc}

             EXPORT vfp_arctan
vfp_arctan   p1arg
             mov r1,#1
             vcmp.F64 d0,#0
             vmrs apsr_nzcv,fpscr
             mvnmi r1,r1
             vnegmi.F64 d0,d0
             vmov.F64 d1,#0.0
             adr r2,tthirds
             vldr.64 d4,[r2]
             vcmp.F64 d0,d4
             vmrs apsr_nzcv,fpscr
             bgt %FT20
             b arctan_main            ; x<=0.66 so no range reduction
20           adr r2,tan3pi8
             vldr.64 d4,[r2]
             vcmp.F64 d0,d4
             vmrs apsr_nzcv,fpscr
             bgt %FT30
             adr r2,piby2             ; range reduction where x>0.66 and x<=tan(3pi/8)
             vldr.64 d1,[r2]
             vmov.F64 d4,#2.0
             vdiv.F64 d1,d1,d4
             vmov.F64 d4,#1.0
             vsub.F64 d5,d0,d4
             vadd.F64 d0,d0,d4
             vdiv.F64 d0,d5,d0        ; x=(x-1)/(x+1)
             b arctan_main
30           adr r2,piby2             ; range reduction where x>tan(3pi/8)
             vldr.64 d1,[r2]
             vneg.F64 d1,d1
             mvn r1,r1
             vmov.F64 d4,#1.0
             vdiv.F64 d0,d4,d0        ; x=1/x and fall through to arctan_main

arctan_main  vmul.F64 d5,d0,d0        ; d5=x^2
             adr r2,atncoeff
             add r3,r2,#80
             vldr.64 d6,[r2],#8       ; d6=psum
             vldr.64 d7,[r2],#8       ; d7=qsum
             vadd.F64 d7,d7,d5
10           vldr.64 d4,[r2],#8
             vmla.F64 d4,d6,d5        ; psum=psum*x^2 + term
             vmov.F64 d6,d4
             vldr.64 d4,[r2],#8
             vmla.F64 d4,d7,d5        ; qsum=qsum*x^2 + term
             vmov.F64 d7,d4
             cmp r2,r3
             blt %BT10
             vmov.F64 d4,#1.0
             vmul.F64 d6,d6,d5        ; psum*x^2
             vdiv.F64 d6,d6,d7        ; psum*x^2 / qsum
             vadd.F64 d4,d6,d4
             vmul.F64 d4,d4,d0        ; x*(1 + psum*x^2 / qsum)
             vadd.F64 d0,d1,d4        ; add in constant term
             movs r1,r1
             vnegmi.F64 d0,d0         ; change sign if necessary
             pres
             mov pc,r14

tan3pi8      DCFD 2.41421356237309504880        ; tan(3pi/8)
piby2        DCFD 1.5707963267948966192
tthirds      DCFD 0.66

atncoeff     DCFD -8.750608600031904122785E-1   ; p[0]
             DCFD 2.485846490142306297962E1     ; q[0], coefficients are interleaved
             DCFD -1.615753718733365076637E1
             DCFD 1.650270098316988542046E2
             DCFD -7.500855792314704667340E1
             DCFD 4.328810604912902668951E2
             DCFD -1.228866684490136173410E2
             DCFD 4.853903996359136964868E2
             DCFD -6.485021904942025371773E1
             DCFD 1.945506571482613964425E2
             END
