             AREA |C$$code|,CODE,READONLY

             GET "Hdr.Common"
             IMPORT vfp_ln
             IMPORT vfp_exp

; Evaluates gamma(x) using approximation due to Lanczos
; the following gammln function shares much of the code

             EXPORT vfp_gamma
vfp_gamma    mov ip,sp
             stmdb sp!,{fp,ip,lr,pc}
             sub fp,ip,#4
             p1arg

; First preserve x in d2 across calls to ln
; and form tmp=x+gamma+1/2 in d3
; then tmp=(x+1/2)*ln(tmp)-tmp gives ln of first two terms

             vmov.F64 d2,d0          ; x in d2 remains untouched by calls to ln
             adr r2,gamma
             vldr.64 d5,[r2]
             vadd.F64 d3,d2,d5       ; tmp=x+gamma+1/2 in d3
             vmov.F64 d0,d3
             pcall
             bl vfp_ln
             vmov.F64 d5,#0.5
             vadd.F64 d4,d2,d5       ; x+1/2
             vnmls.F64 d3,d4,d0      ; tmp=(x+1/2)*ln(tmp)-tmp

; call gamser to evaluate the series of correction terms
; and calcuate sqrt(2*pi)*series/x

             bl gamser
             adr r2,root2pi
             vldr.64 d5,[r2]
             vmul.F64 d0,d0,d5
             vdiv.F64 d2,d0,d2
             vmov.F64 d0,d3
             pcall
             bl vfp_exp

; The final result is given by multiplying exp(tmp) and series

             vmul.F64 d0,d0,d2
             pres
             ldmdb fp,{fp,sp,pc}

; Evaluates ln gamma(x) using approximation due to Lanczos
; similar to the above except using tmp and ln(series)

             EXPORT vfp_gammln
vfp_gammln   mov ip,sp
             stmdb sp!,{fp,ip,lr,pc}
             sub fp,ip,#4
             p1arg

; First preserve x in d2 across calls to ln
; and form tmp=x+gamma+1/2 in d3
; then tmp=(x+1/2)*ln(tmp)-tmp gives ln of first two terms

             vmov.F64 d2,d0          ; x in d2 remains untouched by calls to ln
             adr r2,gamma
             vldr.64 d5,[r2]
             vadd.F64 d3,d2,d5       ; tmp=x+gamma+1/2 in d3
             vmov.F64 d0,d3
             pcall
             bl vfp_ln
             vmov.F64 d5,#0.5
             vadd.F64 d4,d2,d5       ; x+1/2
             vnmls.F64 d3,d4,d0      ; tmp=(x+1/2)*ln(tmp)-tmp

; call gamser to evaluate the series of correction terms
; and calcuate sqrt(2*pi)*series/x

             bl gamser
             adr r2,root2pi
             vldr.64 d5,[r2]
             vmul.F64 d0,d0,d5
             vdiv.F64 d0,d0,d2
             pcall
             bl vfp_ln

; The final result is given by adding tmp and ln(series)

             vadd.F64 d0,d3,d0
             pres
             ldmdb fp,{fp,sp,pc}
gamma        DCFD 5.2421875
root2pi      DCFD 2.5066282743610005

; Evaluates the series c0+c1/(x+1)+... as part of gamma function
; Assumes x is in d2 and returns result in d0

gamser       vmov.F64 d5,#1.0
             vadd.F64 d4,d2,d5
             adr r2,gamcoeff
             add r3,r2,#120
             vldr.64 d0,[r2],#8
10           vldr.64 d6,[r2],#8
             vdiv.F64 d6,d6,d4
             vadd.F64 d0,d0,d6
             vadd.F64 d4,d4,d5
             cmp r2,r3
             blt %BT10
             mov pc,r14

gamcoeff     DCFD 0.999999999999997092
             DCFD 57.1562356658629235
             DCFD -59.5979603554754912
             DCFD 14.1360979747417471
             DCFD -0.491913816097620199
             DCFD 0.339946499848118887E-4
             DCFD 0.465236289270485756E-4
             DCFD -0.983744753048795646E-4
             DCFD 0.158088703224912494E-3
             DCFD -0.210264441724104883E-3
             DCFD 0.217439618115212643E-3
             DCFD -0.164318016536763890E-3
             DCFD 0.844182239838527433E-4
             DCFD -0.261908384015814087E-4
             DCFD 0.368991826595316234E-5

; main procedure for error function. The error function is
; related to the incomplete gamma function.
; erf(x)=P(1/2, x^2). Uses either igamser or igamcf directly.

             EXPORT vfp_erf
vfp_erf      mov ip,sp
             stmdb sp!,{fp,ip,lr,pc}
             sub fp,ip,#4
             p1arg
             vmul.F64 d1,d0,d0
             vmov.F64 d0,#0.5
             vmov.F64 d2,#1.5
             vcmp.F64 d1,d2
             vmrs apsr_nzcv,fpscr
             bge %FT10
             bl igamser             ; if x^2<a+1 use power series
             pres
             ldmdb fp,{fp,sp,pc}
10           bl igamcf
             vmov.F64 d1,#1.0
             vsub.F64 d0,d1,d0      ; if x^2>=a+1 use continued fraction
             pres
             ldmdb fp,{fp,sp,pc}

             EXPORT vfp_erfc
vfp_erfc     mov ip,sp
             stmdb sp!,{fp,ip,lr,pc}
             sub fp,ip,#4
             p1arg
             vmul.F64 d1,d0,d0
             vmov.F64 d0,#0.5
             vmov.F64 d2,#1.5
             vcmp.F64 d1,d2
             vmrs apsr_nzcv,fpscr
             bge %FT10
             bl igamser             ; if x^2<a+1 use power series
             vmov.F64 d1,#1.0
             vsub.F64 d0,d1,d0
             pres
             ldmdb fp,{fp,sp,pc}
10           bl igamcf              ; if x^2>=a+1 use continued fraction
             pres
             ldmdb fp,{fp,sp,pc}

; main procedure for incomplete gamma function. Uses either
; igamser or igamcf depending on value of x and a.

             EXPORT vfp_igamp
vfp_igamp    mov ip,sp
             stmdb sp!,{fp,ip,lr,pc}
             sub fp,ip,#4
             p2arg
             p1arg
             vmov.F64 d2,#1.0
             vadd.F64 d2,d0,d2
             vcmp.F64 d1,d2
             vmrs apsr_nzcv,fpscr
             bge %FT10
             bl igamser             ; if x<a+1 use power series
             pres
             ldmdb fp,{fp,sp,pc}
10           bl igamcf
             vmov.F64 d1,#1.0
             vsub.F64 d0,d1,d0      ; if x>=a+1 use continued fraction
             pres
             ldmdb fp,{fp,sp,pc}

             EXPORT vfp_igamq
vfp_igamq    mov ip,sp
             stmdb sp!,{fp,ip,lr,pc}
             sub fp,ip,#4
             p2arg
             p1arg
             vmov.F64 d2,#1.0
             vadd.F64 d2,d0,d2
             vcmp.F64 d1,d2
             vmrs apsr_nzcv,fpscr
             bge %FT10
             bl igamser             ; if x<a+1 use power series
             vmov.F64 d1,#1.0
             vsub.F64 d0,d1,d0
             pres
             ldmdb fp,{fp,sp,pc}
10           bl igamcf              ; if x>=a+1 use continued fraction
             pres
             ldmdb fp,{fp,sp,pc}


; Incomplete gamma function P(a,x) using a power series
; converges well for x < a+1, used by igamp above.
; Enter with a and x in d0 and d1 directly.

igamser      mov ip,sp
             stmdb sp!,{fp,ip,lr,pc}
             sub fp,ip,#4
             vstmdb.F64 sp!,{d8,d9}

; First preserve a in d8 and x in d9, then find gammln a

             vmov.F64 d8,d0
             vmov.F64 d9,d1
             pcall
             bl vfp_gammln

; Set up constants, d5=1 and d2=epsilon. Then sum=delta=1/a
; Keep sum in d4 and delta in d6 and ap in d7

             vmov.F64 d5,#1.0
             adr r2,epsilon
             vldr.64 d2,[r2]
             vdiv.F64 d4,d5,d8
             vmov.F64 d6,d4
             vadd.F64 d7,d8,d5

; On each iteration delta=delta*(x/ap) and sum=sum+delta
; d1 and d3 are used as temporary registers

10           vdiv.F64 d1,d9,d7
             vmul.F64 d6,d6,d1
             vadd.F64 d4,d4,d6
             vabs.F64 d1,d6
             vabs.F64 d3,d4
             vmul.F64 d3,d3,d2

; finish when delta < sum*epsilon, otherwise increment ap
; and go back for another iteration

             vcmp.F64 d1,d3
             vmrs apsr_nzcv,fpscr
             blt %FT20
             vadd.F64 d7,d7,d5
             b %BT10

; preserve gln and sum in d2 and d3 respectively
; then return sum*exp(-x + a*ln x - gln)

20           vmov.F64 d2,d0
             vmov.F64 d3,d4
             vmov.F64 d0,d9
             pcall
             bl vfp_ln
             vmul.F64 d0,d0,d8
             vsub.F64 d0,d0,d9
             vsub.F64 d0,d0,d2
             pcall
             bl vfp_exp
             vmul.F64 d0,d3,d0
             vldmia.F64 sp!,{d8,d9}
             ldmdb fp,{fp,sp,pc}

epsilon      DCFD 1E-16

; Incomplete gamma function Q(a,x) using a continued fraction
; used by igamp above, for x > a+1.
; Enter with a and x in d0 and d1 directly.

igamcf       mov ip,sp
             stmdb sp!,{fp,ip,lr,pc}
             sub fp,ip,#4
             vstmdb.F64 sp!,{d8-d11}

; First preserve a in d8 and x in d9, then find gammln a

             vmov.F64 d8,d0
             vmov.F64 d9,d1
             pcall
             bl vfp_gammln

; Then initialise some variables: b=x+1-a in d1, d=1/b in d2
; c=1/fpmin in d10 and h=d in d11. The constant fpmin is the
; smallest meaningful dp. value kept in d7

             vmov.F64 d5,#1.0
             vadd.F64 d1,d9,d5
             vsub.F64 d1,d1,d8
             vdiv.F64 d2,d5,d1
             adr r2,fpmin
             vldr.64 d7,[r2]
             vdiv.F64 d10,d5,d7
             vmov.F64 d11,d2

; For each iteration, first find the numerator as n(n-a),
; increase b by 2 and the denominator as an*d+b:

             mov r2,#0
10           add r2,r2,#1
             vmov.F64 d5,#2.0
             vadd.F64 d1,d1,d5
             vmov s6,r2
             vcvt.F64.S32 d4,s6
             vneg.F64 d6,d4
             vsub.F64 d4,d4,d8
             vmul.F64 d4,d6,d4
             vmul.F64 d2,d4,d2
             vadd.F64 d2,d2,d1
             vabs.F64 d3,d2
             vcmp.F64 d3,d7
             vmrs apsr_nzcv,fpscr
             vmovlt.F64 d2,d7         ; if abs(d) < fpmin then d=fpmin
             vdiv.F64 d3,d4,d10
             vadd.F64 d10,d1,d3       ; c=b+an/c
             vabs.F64 d3,d10
             vcmp.F64 d3,d7
             vmrs apsr_nzcv,fpscr
             vmovlt.F64 d10,d7        ; if abs(c) < fpmin then c=fpmin

; get ready to multiply by the inverse of the next term,
; set d=1/d, del=d*c. Then the fraction so far is h*del:

             vmov.F64 d5,#1.0
             vdiv.F64 d2,d5,d2        ; d=1/d
             vmul.F64 d3,d2,d10
             vmul.F64 d11,d11,d3
             vsub.F64 d3,d3,d5

; finish when del is sufficiently close to 1:

             vabs.F64 d3,d3
             adr r3,epsilon
             vldr.64 d5,[r3]
             vcmp.F64 d3,d5
             vmrs apsr_nzcv,fpscr
             bgt %BT10

; preserve gln in d2
; then return h*exp(-x + a*ln x - gln)

             vmov.F64 d2,d0
             vmov.F64 d0,d9
             pcall
             bl vfp_ln
             vmul.F64 d0,d0,d8
             vsub.F64 d0,d0,d9
             vsub.F64 d0,d0,d2
             pcall
             bl vfp_exp
             vmul.F64 d0,d0,d11
             vldmia.F64 sp!,{d8-d11}
             ldmdb fp,{fp,sp,pc}

fpmin        DCFD 1E-290

             END
