Theory Eratosthenes

(*  Title:      HOL/Number_Theory/Eratosthenes.thy
    Author:     Florian Haftmann, TU Muenchen
*)

section ‹The sieve of Eratosthenes›

theory Eratosthenes
  imports Main "HOL-Computational_Algebra.Primes"
begin


subsection ‹Preliminary: strict divisibility›

context dvd
begin

abbreviation dvd_strict :: "'a  'a  bool" (infixl "dvd'_strict" 50)
where
  "b dvd_strict a  b dvd a  ¬ a dvd b"

end


subsection ‹Main corpus›

text ‹The sieve is modelled as a list of booleans, where constFalse means \emph{marked out}.›

type_synonym marks = "bool list"

definition numbers_of_marks :: "nat  marks  nat set"
where
  "numbers_of_marks n bs = fst ` {x  set (enumerate n bs). snd x}"

lemma numbers_of_marks_simps [simp, code]:
  "numbers_of_marks n [] = {}"
  "numbers_of_marks n (True # bs) = insert n (numbers_of_marks (Suc n) bs)"
  "numbers_of_marks n (False # bs) = numbers_of_marks (Suc n) bs"
  by (auto simp add: numbers_of_marks_def intro!: image_eqI)

lemma numbers_of_marks_Suc:
  "numbers_of_marks (Suc n) bs = Suc ` numbers_of_marks n bs"
  by (auto simp add: numbers_of_marks_def enumerate_Suc_eq image_iff Bex_def)

lemma numbers_of_marks_replicate_False [simp]:
  "numbers_of_marks n (replicate m False) = {}"
  by (auto simp add: numbers_of_marks_def enumerate_replicate_eq)

lemma numbers_of_marks_replicate_True [simp]:
  "numbers_of_marks n (replicate m True) = {n..<n+m}"
  by (auto simp add: numbers_of_marks_def enumerate_replicate_eq image_def)

lemma in_numbers_of_marks_eq:
  "m  numbers_of_marks n bs  m  {n..<n + length bs}  bs ! (m - n)"
  by (simp add: numbers_of_marks_def in_set_enumerate_eq image_iff add.commute)

lemma sorted_list_of_set_numbers_of_marks:
  "sorted_list_of_set (numbers_of_marks n bs) = map fst (filter snd (enumerate n bs))"
  by (auto simp add: numbers_of_marks_def distinct_map
    intro!: sorted_filter distinct_filter inj_onI sorted_distinct_set_unique)


text ‹Marking out multiples in a sieve›

definition mark_out :: "nat  marks  marks"
where
  "mark_out n bs = map (λ(q, b). b  ¬ Suc n dvd Suc (Suc q)) (enumerate n bs)"

lemma mark_out_Nil [simp]: "mark_out n [] = []"
  by (simp add: mark_out_def)

lemma length_mark_out [simp]: "length (mark_out n bs) = length bs"
  by (simp add: mark_out_def)

lemma numbers_of_marks_mark_out:
    "numbers_of_marks n (mark_out m bs) = {q  numbers_of_marks n bs. ¬ Suc m dvd Suc q - n}"
  by (auto simp add: numbers_of_marks_def mark_out_def in_set_enumerate_eq image_iff
    nth_enumerate_eq less_eq_dvd_minus)


text ‹Auxiliary operation for efficient implementation›

definition mark_out_aux :: "nat  nat  marks  marks"
where
  "mark_out_aux n m bs =
    map (λ(q, b). b  (q < m + n  ¬ Suc n dvd Suc (Suc q) + (n - m mod Suc n))) (enumerate n bs)"

lemma mark_out_code [code]: "mark_out n bs = mark_out_aux n n bs"
proof -
  have aux: False
    if A: "Suc n dvd Suc (Suc a)"
    and B: "a < n + n"
    and C: "n  a"
    for a
  proof (cases "n = 0")
    case True
    with A B C show ?thesis by simp
  next
    case False
    define m where "m = Suc n"
    then have "m > 0" by simp
    from False have "n > 0" by simp
    from A obtain q where q: "Suc (Suc a) = Suc n * q" by (rule dvdE)
    have "q > 0"
    proof (rule ccontr)
      assume "¬ q > 0"
      with q show False by simp
    qed
    with n > 0 have "Suc n * q  2" by (auto simp add: gr0_conv_Suc)
    with q have a: "a = Suc n * q - 2" by simp
    with B have "q + n * q < n + n + 2" by auto
    then have "m * q < m * 2" by (simp add: m_def)
    with m > 0 have "q < 2" by simp
    with q > 0 have "q = 1" by simp
    with a have "a = n - 1" by simp
    with n > 0 C show False by simp
  qed
  show ?thesis
    by (auto simp add: mark_out_def mark_out_aux_def in_set_enumerate_eq intro: aux)
qed

lemma mark_out_aux_simps [simp, code]:
  "mark_out_aux n m [] = []"
  "mark_out_aux n 0 (b # bs) = False # mark_out_aux n n bs"
  "mark_out_aux n (Suc m) (b # bs) = b # mark_out_aux n m bs"
proof goal_cases
  case 1
  show ?case
    by (simp add: mark_out_aux_def)
next
  case 2
  show ?case
    by (auto simp add: mark_out_code [symmetric] mark_out_aux_def mark_out_def
      enumerate_Suc_eq in_set_enumerate_eq less_eq_dvd_minus)
next
  case 3
  { define v where "v = Suc m"
    define w where "w = Suc n"
    fix q
    assume "m + n  q"
    then obtain r where q: "q = m + n + r" by (auto simp add: le_iff_add)
    { fix u
      from w_def have "u mod w < w" by simp
      then have "u + (w - u mod w) = w + (u - u mod w)"
        by simp
      then have "u + (w - u mod w) = w + u div w * w"
        by (simp add: minus_mod_eq_div_mult)
    }
    then have "w dvd v + w + r + (w - v mod w)  w dvd m + w + r + (w - m mod w)"
      by (simp add: add.assoc add.left_commute [of m] add.left_commute [of v]
        dvd_add_left_iff dvd_add_right_iff)
    moreover from q have "Suc q = m + w + r" by (simp add: w_def)
    moreover from q have "Suc (Suc q) = v + w + r" by (simp add: v_def w_def)
    ultimately have "w dvd Suc (Suc (q + (w - v mod w)))  w dvd Suc (q + (w - m mod w))"
      by (simp only: add_Suc [symmetric])
    then have "Suc n dvd Suc (Suc (Suc (q + n) - Suc m mod Suc n)) 
      Suc n dvd Suc (Suc (q + n - m mod Suc n))"
      by (simp add: v_def w_def Suc_diff_le trans_le_add2)
  }
  then show ?case
    by (auto simp add: mark_out_aux_def
      enumerate_Suc_eq in_set_enumerate_eq not_less)
qed


text ‹Main entry point to sieve›

fun sieve :: "nat  marks  marks"
where
  "sieve n [] = []"
| "sieve n (False # bs) = False # sieve (Suc n) bs"
| "sieve n (True # bs) = True # sieve (Suc n) (mark_out n bs)"

text ‹
  There are the following possible optimisations here:

  \begin{itemize}

    \item constsieve can abort as soon as termn is too big to let
      constmark_out have any effect.

    \item Search for further primes can be given up as soon as the search
      position exceeds the square root of the maximum candidate.

  \end{itemize}

  This is left as an constructive exercise to the reader.
›

lemma numbers_of_marks_sieve:
  "numbers_of_marks (Suc n) (sieve n bs) =
    {q  numbers_of_marks (Suc n) bs. m  numbers_of_marks (Suc n) bs. ¬ m dvd_strict q}"
proof (induct n bs rule: sieve.induct)
  case 1
  show ?case by simp
next
  case 2
  then show ?case by simp
next
  case (3 n bs)
  have aux: "n  Suc ` M  n > 0  n - 1  M" (is "?lhs  ?rhs") for M n
  proof
    show ?rhs if ?lhs using that by auto
    show ?lhs if ?rhs
    proof -
      from that have "n > 0" and "n - 1  M" by auto
      then have "Suc (n - 1)  Suc ` M" by blast
      with n > 0 show "n  Suc ` M" by simp
    qed
  qed
  have aux1: False if "Suc (Suc n)  m" and "m dvd Suc n" for m :: nat
  proof -
    from m dvd Suc n obtain q where "Suc n = m * q" ..
    with Suc (Suc n)  m have "Suc (m * q)  m" by simp
    then have "m * q < m" by arith
    then have "q = 0" by simp
    with Suc n = m * q show ?thesis by simp
  qed
  have aux2: "m dvd q"
    if 1: "q>0. 1 < q  Suc n < q  q  Suc (n + length bs) 
      bs ! (q - Suc (Suc n))  ¬ Suc n dvd q  q dvd m  m dvd q"
    and 2: "¬ Suc n dvd m" "q dvd m"
    and 3: "Suc n < q" "q  Suc (n + length bs)" "bs ! (q - Suc (Suc n))"
    for m q :: nat
  proof -
    from 1 have *: "q. Suc n < q  q  Suc (n + length bs) 
      bs ! (q - Suc (Suc n))  ¬ Suc n dvd q  q dvd m  m dvd q"
      by auto
    from 2 have "¬ Suc n dvd q" by (auto elim: dvdE)
    moreover note 3
    moreover note q dvd m
    ultimately show ?thesis by (auto intro: *)
  qed
  from 3 show ?case
    apply (simp_all add: numbers_of_marks_mark_out numbers_of_marks_Suc Compr_image_eq
      inj_image_eq_iff in_numbers_of_marks_eq Ball_def imp_conjL aux)
    apply safe
    apply (simp_all add: less_diff_conv2 le_diff_conv2 dvd_minus_self not_less)
    apply (clarsimp dest!: aux1)
    apply (simp add: Suc_le_eq less_Suc_eq_le)
    apply (rule aux2)
    apply (clarsimp dest!: aux1)+
    done
qed


text ‹Relation of the sieve algorithm to actual primes›

definition primes_upto :: "nat  nat list"
where
  "primes_upto n = sorted_list_of_set {m. m  n  prime m}"

lemma set_primes_upto: "set (primes_upto n) = {m. m  n  prime m}"
  by (simp add: primes_upto_def)

lemma sorted_primes_upto [iff]: "sorted (primes_upto n)"
  by (simp add: primes_upto_def)

lemma distinct_primes_upto [iff]: "distinct (primes_upto n)"
  by (simp add: primes_upto_def)

lemma set_primes_upto_sieve:
  "set (primes_upto n) = numbers_of_marks 2 (sieve 1 (replicate (n - 1) True))"
proof -
  consider "n = 0  n = 1" | "n > 1" by arith
  then show ?thesis
  proof cases
    case 1
    then show ?thesis
      by (auto simp add: numbers_of_marks_sieve numeral_2_eq_2 set_primes_upto
        dest: prime_gt_Suc_0_nat)
  next
    case 2
    {
      fix m q
      assume "Suc (Suc 0)  q"
        and "q < Suc n"
        and "m dvd q"
      then have "m < Suc n" by (auto dest: dvd_imp_le)
      assume *: "m{Suc (Suc 0)..<Suc n}. m dvd q  q dvd m"
        and "m dvd q" and "m  1"
      have "m = q"
      proof (cases "m = 0")
        case True with m dvd q show ?thesis by simp
      next
        case False with m  1 have "Suc (Suc 0)  m" by arith
        with m < Suc n * m dvd q have "q dvd m" by simp
        with m dvd q show ?thesis by (simp add: dvd_antisym)
      qed
    }
    then have aux: "m q. Suc (Suc 0)  q 
      q < Suc n 
      m dvd q 
      m{Suc (Suc 0)..<Suc n}. m dvd q  q dvd m 
      m dvd q  m  q  m = 1" by auto
    from 2 show ?thesis
      apply (auto simp add: numbers_of_marks_sieve numeral_2_eq_2 set_primes_upto
        dest: prime_gt_Suc_0_nat)
      apply (metis One_nat_def Suc_le_eq less_not_refl prime_nat_iff)
      apply (metis One_nat_def Suc_le_eq aux prime_nat_iff)
      done
  qed
qed

lemma primes_upto_sieve [code]:
  "primes_upto n = map fst (filter snd (enumerate 2 (sieve 1 (replicate (n - 1) True))))"
proof -
  have "primes_upto n = sorted_list_of_set (numbers_of_marks 2 (sieve 1 (replicate (n - 1) True)))"
    apply (rule sorted_distinct_set_unique)
    apply (simp_all only: set_primes_upto_sieve numbers_of_marks_def)
    apply auto
    done
  then show ?thesis
    by (simp add: sorted_list_of_set_numbers_of_marks)
qed

lemma prime_in_primes_upto: "prime n  n  set (primes_upto n)"
  by (simp add: set_primes_upto)


subsection ‹Application: smallest prime beyond a certain number›

definition smallest_prime_beyond :: "nat  nat"
where
  "smallest_prime_beyond n = (LEAST p. prime p  p  n)"

lemma prime_smallest_prime_beyond [iff]: "prime (smallest_prime_beyond n)" (is ?P)
  and smallest_prime_beyond_le [iff]: "smallest_prime_beyond n  n" (is ?Q)
proof -
  let ?least = "LEAST p. prime p  p  n"
  from primes_infinite obtain q where "prime q  q  n"
    by (metis finite_nat_set_iff_bounded_le mem_Collect_eq nat_le_linear)
  then have "prime ?least  ?least  n"
    by (rule LeastI)
  then show ?P and ?Q
    by (simp_all add: smallest_prime_beyond_def)
qed

lemma smallest_prime_beyond_smallest: "prime p  p  n  smallest_prime_beyond n  p"
  by (simp only: smallest_prime_beyond_def) (auto intro: Least_le)

lemma smallest_prime_beyond_eq:
  "prime p  p  n  (q. prime q  q  n  q  p)  smallest_prime_beyond n = p"
  by (simp only: smallest_prime_beyond_def) (auto intro: Least_equality)

definition smallest_prime_between :: "nat  nat  nat option"
where
  "smallest_prime_between m n =
    (if (p. prime p  m  p  p  n) then Some (smallest_prime_beyond m) else None)"

lemma smallest_prime_between_None:
  "smallest_prime_between m n = None  (q. m  q  q  n  ¬ prime q)"
  by (auto simp add: smallest_prime_between_def)

lemma smallest_prime_betwen_Some:
  "smallest_prime_between m n = Some p  smallest_prime_beyond m = p  p  n"
  by (auto simp add: smallest_prime_between_def dest: smallest_prime_beyond_smallest [of _ m])

lemma [code]: "smallest_prime_between m n = List.find (λp. p  m) (primes_upto n)"
proof -
  have "List.find (λp. p  m) (primes_upto n) = Some (smallest_prime_beyond m)"
    if assms: "m  p" "prime p" "p  n" for p
  proof -
    define A where "A = {p. p  n  prime p  m  p}"
    from assms have "smallest_prime_beyond m  p"
      by (auto intro: smallest_prime_beyond_smallest)
    from this p  n have *: "smallest_prime_beyond m  n"
      by (rule order_trans)
    from assms have ex: "pn. prime p  m  p"
      by auto
    then have "finite A"
      by (auto simp add: A_def)
    with * have "Min A = smallest_prime_beyond m"
      by (auto simp add: A_def intro: Min_eqI smallest_prime_beyond_smallest)
    with ex sorted_primes_upto show ?thesis
      by (auto simp add: set_primes_upto sorted_find_Min A_def)
  qed
  then show ?thesis
    by (auto simp add: smallest_prime_between_def find_None_iff set_primes_upto
      intro!: sym [of _ None])
qed

definition smallest_prime_beyond_aux :: "nat  nat  nat"
where
  "smallest_prime_beyond_aux k n = smallest_prime_beyond n"

lemma [code]:
  "smallest_prime_beyond_aux k n =
    (case smallest_prime_between n (k * n) of
      Some p  p
    | None  smallest_prime_beyond_aux (Suc k) n)"
  by (simp add: smallest_prime_beyond_aux_def smallest_prime_betwen_Some split: option.split)

lemma [code]: "smallest_prime_beyond n = smallest_prime_beyond_aux 2 n"
  by (simp add: smallest_prime_beyond_aux_def)

end