Theory Central_Limit_Theorem

(*  Title:     HOL/Probability/Central_Limit_Theorem.thy
    Authors:   Jeremy Avigad (CMU), Luke Serafin (CMU)
*)

section ‹The Central Limit Theorem›

theory Central_Limit_Theorem
  imports Levy
begin

theorem (in prob_space) central_limit_theorem_zero_mean:
  fixes X :: "nat  'a  real"
    and μ :: "real measure"
    and σ :: real
    and S :: "nat  'a  real"
  assumes X_indep: "indep_vars (λi. borel) X UNIV"
    and X_mean_0: "n. expectation (X n) = 0"
    and σ_pos: "σ > 0"
    and X_square_integrable: "n. integrable M (λx. (X n x)2)"
    and X_variance: "n. variance (X n) = σ2"
    and X_distrib: "n. distr M borel (X n) = μ"
  defines "S n  λx. i<n. X i x"
  shows "weak_conv_m (λn. distr M borel (λx. S n x / sqrt (n * σ2))) std_normal_distribution"
proof -
  let ?S' = "λn x. S n x / sqrt (real n * σ2)"
  define φ where "φ n = char (distr M borel (?S' n))" for n
  define ψ where "ψ n t = char μ (t / sqrt (σ2 * n))" for n t

  have X_rv [simp, measurable]: "random_variable borel (X n)" for n
    using X_indep unfolding indep_vars_def2 by simp
  have X_integrable [simp, intro]: "integrable M (X n)" for n
    by (rule square_integrable_imp_integrable[OF _ X_square_integrable]) simp_all
  interpret μ: real_distribution μ
    by (subst X_distrib [symmetric, of 0], rule real_distribution_distr, simp)

  (* these are equivalent to the hypotheses on X, given X_distr *)
  have μ_integrable [simp]: "integrable μ (λx. x)"
    and μ_mean_integrable [simp]: "μ.expectation (λx. x) = 0"
    and μ_square_integrable [simp]: "integrable μ (λx. x^2)"
    and μ_variance [simp]: "μ.expectation (λx. x^2) = σ2"
    using assms by (simp_all add: X_distrib [symmetric, of 0] integrable_distr_eq integral_distr)

  have main: "F n in sequentially.
      cmod (φ n t - (1 + (-(t^2) / 2) / n)^n) 
      t2 / (6 * σ2) * (LINT x|μ. min (6 * x2) (¦t / sqrt (σ2 * n)¦ * ¦x¦ ^ 3))" for t
  proof (rule eventually_sequentiallyI)
    fix n :: nat
    assume "n  nat (ceiling (t^2 / 4))"
    hence n: "n  t^2 / 4" by (subst nat_ceiling_le_eq [symmetric])
    let ?t = "t / sqrt (σ2 * n)"

    define ψ' where "ψ' n i = char (distr M borel (λx. X i x / sqrt (σ2 * n)))" for n i
    have *: "n i t. ψ' n i t = ψ n t"
      unfolding ψ_def ψ'_def char_def
      by (subst X_distrib [symmetric]) (auto simp: integral_distr)

    have "φ n t = char (distr M borel (λx. i<n. X i x / sqrt (σ2 * real n))) t"
      by (auto simp: φ_def S_def sum_divide_distrib ac_simps)
    also have " = ( i < n. ψ' n i t)"
      unfolding ψ'_def
      apply (rule char_distr_sum)
      apply (rule indep_vars_compose2[where X=X])
      apply (rule indep_vars_subset)
      apply (rule X_indep)
      apply auto
      done
    also have " = (ψ n t)^n"
      by (auto simp add: * prod_constant)
    finally have φ_eq: "φ n t =(ψ n t)^n" .

    have "norm (ψ n t - (1 - ?t^2 * σ2 / 2))  ?t2 / 6 * (LINT x|μ. min (6 * x2) (¦?t¦ * ¦x¦ ^ 3))"
      unfolding ψ_def by (rule μ.char_approx3, auto)
    also have "?t^2 * σ2 = t^2 / n"
      using σ_pos by (simp add: power_divide)
    also have "t^2 / n / 2 = (t^2 / 2) / n"
      by simp
    finally have **: "norm (ψ n t - (1 + (-(t^2) / 2) / n)) 
      ?t2 / 6 * (LINT x|μ. min (6 * x2) (¦?t¦ * ¦x¦ ^ 3))" by simp

    have "norm (φ n t - (complex_of_real (1 + (-(t^2) / 2) / n))^n) 
         n * norm (ψ n t - (complex_of_real (1 + (-(t^2) / 2) / n)))"
      using n
      by (auto intro!: norm_power_diff μ.cmod_char_le_1 abs_leI
               simp del: of_real_diff simp: of_real_diff[symmetric] divide_le_eq φ_eq ψ_def)
    also have "  n * (?t2 / 6 * (LINT x|μ. min (6 * x2) (¦?t¦ * ¦x¦ ^ 3)))"
      by (rule mult_left_mono [OF **], simp)
    also have " = (t2 / (6 * σ2) * (LINT x|μ. min (6 * x2) (¦?t¦ * ¦x¦ ^ 3)))"
      using σ_pos by (simp add: field_simps min_absorb2)
    finally show "norm (φ n t - (1 + (-(t^2) / 2) / n)^n) 
        (t2 / (6 * σ2) * (LINT x|μ. min (6 * x2) (¦?t¦ * ¦x¦ ^ 3)))"
      by simp
  qed

  show ?thesis
  proof (rule levy_continuity)
    fix t
    let ?t = "λn. t / sqrt (σ2 * n)"
    have "x. (λn. min (6 * x2) (¦t¦ * ¦x¦ ^ 3 / ¦sqrt (σ2 * real n)¦))  0"
      using σ_pos
      by (auto simp: real_sqrt_mult min_absorb2
               intro!: tendsto_min[THEN tendsto_eq_rhs] sqrt_at_top[THEN filterlim_compose]
                       filterlim_tendsto_pos_mult_at_top filterlim_at_top_imp_at_infinity
                       tendsto_divide_0 filterlim_real_sequentially)
    then have "(λn. LINT x|μ. min (6 * x2) (¦?t n¦ * ¦x¦ ^ 3))  (LINT x|μ. 0)"
      by (intro integral_dominated_convergence [where w = "λx. 6 * x^2"]) auto
    then have *: "(λn. t2 / (6 * σ2) * (LINT x|μ. min (6 * x2) (¦?t n¦ * ¦x¦ ^ 3)))  0"
      by (simp only: integral_zero tendsto_mult_right_zero)

    have "(λn. complex_of_real ((1 + (-(t^2) / 2) / n)^n))  complex_of_real (exp (-(t^2) / 2))"
      by (rule isCont_tendsto_compose [OF _ tendsto_exp_limit_sequentially]) auto
    then have "(λn. φ n t)  complex_of_real (exp (-(t^2) / 2))"
      by (rule Lim_transform) (rule Lim_null_comparison [OF main *])
    then show "(λn. char (distr M borel (?S' n)) t)  char std_normal_distribution t"
      by (simp add: φ_def char_std_normal_distribution)
  qed (auto intro!: real_dist_normal_dist simp: S_def)
qed

theorem (in prob_space) central_limit_theorem:
  fixes X :: "nat  'a  real"
    and μ :: "real measure"
    and σ :: real
    and S :: "nat  'a  real"
  assumes X_indep: "indep_vars (λi. borel) X UNIV"
    and X_mean: "n. expectation (X n) = m"
    and σ_pos: "σ > 0"
    and X_square_integrable: "n. integrable M (λx. (X n x)2)"
    and X_variance: "n. variance (X n) = σ2"
    and X_distrib: "n. distr M borel (X n) = μ"
  defines "X' i x  X i x - m"
  shows "weak_conv_m (λn. distr M borel (λx. (i<n. X' i x) / sqrt (n*σ2))) std_normal_distribution"
proof (intro central_limit_theorem_zero_mean)
  show "indep_vars (λi. borel) X' UNIV"
    unfolding X'_def[abs_def] using X_indep by (rule indep_vars_compose2) auto
  have X_rv [simp, measurable]: "random_variable borel (X n)" for n
    using X_indep unfolding indep_vars_def2 by simp
  have X_integrable [simp, intro]: "integrable M (X n)" for n
    by (rule square_integrable_imp_integrable[OF _ X_square_integrable]) simp_all
  show "expectation (X' n) = 0" for n
    using X_mean by (auto simp: X'_def[abs_def] prob_space)
  show "σ > 0" "integrable M (λx. (X' n x)2)" "variance (X' n) = σ2" for n
    using 0 < σ X_integrable X_mean X_square_integrable X_variance unfolding X'_def
    by (auto simp: prob_space power2_diff)
  show "distr M borel (X' n) = distr μ borel (λx. x - m)" for n
    unfolding X_distrib[of n, symmetric] using X_integrable
    by (subst distr_distr) (auto simp: X'_def[abs_def] comp_def)
qed

end