             AREA |C$$code|,CODE,READONLY

             GET "Hdr.Common"

; macro to perform range reduction
             MACRO
             range $x

             adr r2,piby2
             vldr.64 d5,[r2]
             vmov.F64 d6,#2.0
             vmul.F64 d5,d5,d6        ; d5=pi
             vdiv.F64 $x,$x,d5
             vmov.F64 d6,#0.5
             vadd.F64 d7,$x,d6
             vcmp.F64 d7,#0
             vmrs apsr_nzcv,fpscr
             vmovmi.F64 d6,#1.0
             vsubmi.F64 d7,d7,d6
             vcvt.S32.F64 s2,d7
             vcvt.F64.S32 d7,s2
             vmov r1,s2
             vsub.F64 $x,$x,d7
             vmul.F64 $x,$x,d5        ; range reduced argument in x
             MEND

             EXPORT vfp_cos
vfp_cos      p1arg
             range d0
             adr r2,piby2
             vldr.64 d5,[r2]
             vmov.F64 d6,#2.0
             vdiv.F64 d6,d5,d6
             vcmp.F64 d0,d6
             vmrs apsr_nzcv,fpscr
             ble %FT10
             vsub.F64 d0,d5,d0        ; if x>pi/4
             b sin_series             ; cos(x)=sin(pi/2-x)
10           vneg.F64 d6,d6
             vcmp.F64 d0,d6
             vmrs apsr_nzcv,fpscr
             bge %FT20
             vadd.F64 d0,d5,d0        ; if x<-pi/4
             b sin_series             ; cos(x)=sin(pi/2+x)
20           b cos_series             ; otherwise use power series directly

             EXPORT vfp_sin
vfp_sin      p1arg
             range d0
             adr r2,piby2
             vldr.64 d5,[r2]
             vmov.F64 d6,#2.0
             vdiv.F64 d6,d5,d6
             vcmp.F64 d0,d6
             vmrs apsr_nzcv,fpscr
             ble %FT10
             vsub.F64 d0,d5,d0        ; if x>pi/4
             b cos_series             ; sin(x)=cos(pi/2-x)
10           vneg.F64 d6,d6
             vcmp.F64 d0,d6
             vmrs apsr_nzcv,fpscr
             bge %FT20
             vadd.F64 d0,d5,d0        ; if x<-pi/4
             add r1,r1,#1
             b cos_series             ; sin(x)=-cos(pi/2+x)
20           b sin_series             ; otherwise use power series directly

             EXPORT vfp_tan
vfp_tan      mov ip,sp
             stmdb sp!,{fp,ip,lr,pc}
             sub fp,ip,#4    
             p1arg
             range d0
             adr r2,piby2
             vldr.64 d5,[r2]
             vmov.F64 d6,#2.0
             vdiv.F64 d6,d5,d6
             vcmp.F64 d0,d6
             vmrs apsr_nzcv,fpscr
             ble %FT10
             vsub.F64 d4,d5,d0        ; if x>pi/4
             vmov.F64 d0,d4
             bl sin_series            ; sin(x)=cos(pi/2-x)
             vmov.F64 d1,d0
             vmov.F64 d0,d4
             mov r1,r2
             bl cos_series
             vdiv.F64 d0,d0,d1        ; calculate tangent from sin and cos values
             pres
             ldmdb fp,{fp,sp,pc}
10           vneg.F64 d6,d6
             vcmp.F64 d0,d6
             vmrs apsr_nzcv,fpscr
             bge %FT20
             vadd.F64 d4,d5,d0        ; if x<-pi/4
             vmov.F64 d0,d4
             bl sin_series
             vmov.F64 d1,d0
             vmov.F64 d0,d4
             add r1,r2,#1
             bl cos_series            ; sin(x)=-cos(pi/2+x)
             vdiv.F64 d0,d0,d1        ; calculate tangent from sin and cos values
             pres
             ldmdb fp,{fp,sp,pc}
20           vmov.F64 d4,d0           ; otherwise use power series directly
             bl sin_series
             vmov.F64 d1,d0
             vmov.F64 d0,d4
             mov r1,r2
             bl cos_series
             vdiv.F64 d0,d1,d0        ; calculate tangent from sin and cos values
             pres
             ldmdb fp,{fp,sp,pc}

cos_series   vmul.F64 d5,d0,d0        ; d5=x^2
             adr r2,coscoeff
             add r3,r2,#56
             vldr.64 d6,[r2],#8       ; d6=sum
10           vldr.64 d7,[r2],#8
             vmla.F64 d7,d6,d5        ; sum=sum*x^2 + term
             vmov.F64 d6,d7
             cmp r2,r3
             blt %BT10
             vmov.F64 d0,#1.0
             vmla.F64 d0,d6,d5        ; 1 + sum*x^2
             tst r1,#1                ; negative sign?
             vnegne.F64 d0,d0
             pres
             mov pc,r14

coscoeff     DCFD -1.147074559772972289E-11
             DCFD 2.087675698786809605E-9
             DCFD -2.755731922398588828E-7
             DCFD 2.480158730158729818E-5
             DCFD -1.388888888888888725E-3
             DCFD 4.166666666666666435E-2
             DCFD -5.000000000000000000E-1
piby2        DCFD 1.5707963267948966192

sin_series   vmul.F64 d5,d0,d0        ; d5=x^2
             adr r2,sincoeff
             add r3,r2,#56
             vldr.64 d6,[r2],#8       ; d6=sum
10           vldr.64 d7,[r2],#8
             vmla.F64 d7,d6,d5        ; sum=sum*x^2 + term
             vmov.F64 d6,d7
             cmp r2,r3
             blt %BT10
             vmov.F64 d7,#1.0
             vmla.F64 d7,d6,d5
             vmul.F64 d0,d7,d0        ; x * (1 + sum*x^2)
             tst r1,#1                ; negative sign?
             vnegne.F64 d0,d0
             mov r2,r1
             pres
             mov pc,r14

sincoeff     DCFD -7.647163731819817415E-13
             DCFD 1.605904383682161593E-10
             DCFD -2.505210838544172022E-8
             DCFD 2.755731922398589251E-6
             DCFD -1.984126984126984126E-4
             DCFD 8.333333333333333333E-3
             DCFD -1.666666666666666666E-1
             END
