# Theory Eratosthenes

(*  Title:      HOL/Number_Theory/Eratosthenes.thy
Author:     Florian Haftmann, TU Muenchen
*)

section ‹The sieve of Eratosthenes›

theory Eratosthenes
imports Main "HOL-Computational_Algebra.Primes"
begin

subsection ‹Preliminary: strict divisibility›

context dvd
begin

abbreviation dvd_strict :: "'a ⇒ 'a ⇒ bool" (infixl "dvd'_strict" 50)
where
"b dvd_strict a ≡ b dvd a ∧ ¬ a dvd b"

end

subsection ‹Main corpus›

text ‹The sieve is modelled as a list of booleans, where \<^const>‹False› means \emph{marked out}.›

type_synonym marks = "bool list"

definition numbers_of_marks :: "nat ⇒ marks ⇒ nat set"
where
"numbers_of_marks n bs = fst  {x ∈ set (enumerate n bs). snd x}"

lemma numbers_of_marks_simps [simp, code]:
"numbers_of_marks n [] = {}"
"numbers_of_marks n (True # bs) = insert n (numbers_of_marks (Suc n) bs)"
"numbers_of_marks n (False # bs) = numbers_of_marks (Suc n) bs"
by (auto simp add: numbers_of_marks_def intro!: image_eqI)

lemma numbers_of_marks_Suc:
"numbers_of_marks (Suc n) bs = Suc  numbers_of_marks n bs"
by (auto simp add: numbers_of_marks_def enumerate_Suc_eq image_iff Bex_def)

lemma numbers_of_marks_replicate_False [simp]:
"numbers_of_marks n (replicate m False) = {}"
by (auto simp add: numbers_of_marks_def enumerate_replicate_eq)

lemma numbers_of_marks_replicate_True [simp]:
"numbers_of_marks n (replicate m True) = {n..<n+m}"
by (auto simp add: numbers_of_marks_def enumerate_replicate_eq image_def)

lemma in_numbers_of_marks_eq:
"m ∈ numbers_of_marks n bs ⟷ m ∈ {n..<n + length bs} ∧ bs ! (m - n)"

lemma sorted_list_of_set_numbers_of_marks:
"sorted_list_of_set (numbers_of_marks n bs) = map fst (filter snd (enumerate n bs))"
by (auto simp add: numbers_of_marks_def distinct_map
intro!: sorted_filter distinct_filter inj_onI sorted_distinct_set_unique)

text ‹Marking out multiples in a sieve›

definition mark_out :: "nat ⇒ marks ⇒ marks"
where
"mark_out n bs = map (λ(q, b). b ∧ ¬ Suc n dvd Suc (Suc q)) (enumerate n bs)"

lemma mark_out_Nil [simp]: "mark_out n [] = []"

lemma length_mark_out [simp]: "length (mark_out n bs) = length bs"

lemma numbers_of_marks_mark_out:
"numbers_of_marks n (mark_out m bs) = {q ∈ numbers_of_marks n bs. ¬ Suc m dvd Suc q - n}"
by (auto simp add: numbers_of_marks_def mark_out_def in_set_enumerate_eq image_iff
nth_enumerate_eq less_eq_dvd_minus)

text ‹Auxiliary operation for efficient implementation›

definition mark_out_aux :: "nat ⇒ nat ⇒ marks ⇒ marks"
where
"mark_out_aux n m bs =
map (λ(q, b). b ∧ (q < m + n ∨ ¬ Suc n dvd Suc (Suc q) + (n - m mod Suc n))) (enumerate n bs)"

lemma mark_out_code [code]: "mark_out n bs = mark_out_aux n n bs"
proof -
have aux: False
if A: "Suc n dvd Suc (Suc a)"
and B: "a < n + n"
and C: "n ≤ a"
for a
proof (cases "n = 0")
case True
with A B C show ?thesis by simp
next
case False
define m where "m = Suc n"
then have "m > 0" by simp
from False have "n > 0" by simp
from A obtain q where q: "Suc (Suc a) = Suc n * q" by (rule dvdE)
have "q > 0"
proof (rule ccontr)
assume "¬ q > 0"
with q show False by simp
qed
with ‹n > 0› have "Suc n * q ≥ 2" by (auto simp add: gr0_conv_Suc)
with q have a: "a = Suc n * q - 2" by simp
with B have "q + n * q < n + n + 2" by auto
then have "m * q < m * 2" by (simp add: m_def)
with ‹m > 0› have "q < 2" by simp
with ‹q > 0› have "q = 1" by simp
with a have "a = n - 1" by simp
with ‹n > 0› C show False by simp
qed
show ?thesis
by (auto simp add: mark_out_def mark_out_aux_def in_set_enumerate_eq intro: aux)
qed

lemma mark_out_aux_simps [simp, code]:
"mark_out_aux n m [] = []"
"mark_out_aux n 0 (b # bs) = False # mark_out_aux n n bs"
"mark_out_aux n (Suc m) (b # bs) = b # mark_out_aux n m bs"
proof goal_cases
case 1
show ?case
next
case 2
show ?case
by (auto simp add: mark_out_code [symmetric] mark_out_aux_def mark_out_def
enumerate_Suc_eq in_set_enumerate_eq less_eq_dvd_minus)
next
case 3
{ define v where "v = Suc m"
define w where "w = Suc n"
fix q
assume "m + n ≤ q"
then obtain r where q: "q = m + n + r" by (auto simp add: le_iff_add)
{ fix u
from w_def have "u mod w < w" by simp
then have "u + (w - u mod w) = w + (u - u mod w)"
by simp
then have "u + (w - u mod w) = w + u div w * w"
}
then have "w dvd v + w + r + (w - v mod w) ⟷ w dvd m + w + r + (w - m mod w)"
moreover from q have "Suc q = m + w + r" by (simp add: w_def)
moreover from q have "Suc (Suc q) = v + w + r" by (simp add: v_def w_def)
ultimately have "w dvd Suc (Suc (q + (w - v mod w))) ⟷ w dvd Suc (q + (w - m mod w))"
then have "Suc n dvd Suc (Suc (Suc (q + n) - Suc m mod Suc n)) ⟷
Suc n dvd Suc (Suc (q + n - m mod Suc n))"
}
then show ?case
enumerate_Suc_eq in_set_enumerate_eq not_less)
qed

text ‹Main entry point to sieve›

fun sieve :: "nat ⇒ marks ⇒ marks"
where
"sieve n [] = []"
| "sieve n (False # bs) = False # sieve (Suc n) bs"
| "sieve n (True # bs) = True # sieve (Suc n) (mark_out n bs)"

text ‹
There are the following possible optimisations here:

\begin{itemize}

\item \<^const>‹sieve› can abort as soon as \<^term>‹n› is too big to let
\<^const>‹mark_out› have any effect.

\item Search for further primes can be given up as soon as the search
position exceeds the square root of the maximum candidate.

\end{itemize}

This is left as an constructive exercise to the reader.
›

lemma numbers_of_marks_sieve:
"numbers_of_marks (Suc n) (sieve n bs) =
{q ∈ numbers_of_marks (Suc n) bs. ∀m ∈ numbers_of_marks (Suc n) bs. ¬ m dvd_strict q}"
proof (induct n bs rule: sieve.induct)
case 1
show ?case by simp
next
case 2
then show ?case by simp
next
case (3 n bs)
have aux: "n ∈ Suc  M ⟷ n > 0 ∧ n - 1 ∈ M" (is "?lhs ⟷ ?rhs") for M n
proof
show ?rhs if ?lhs using that by auto
show ?lhs if ?rhs
proof -
from that have "n > 0" and "n - 1 ∈ M" by auto
then have "Suc (n - 1) ∈ Suc  M" by blast
with ‹n > 0› show "n ∈ Suc  M" by simp
qed
qed
have aux1: False if "Suc (Suc n) ≤ m" and "m dvd Suc n" for m :: nat
proof -
from ‹m dvd Suc n› obtain q where "Suc n = m * q" ..
with ‹Suc (Suc n) ≤ m› have "Suc (m * q) ≤ m" by simp
then have "m * q < m" by arith
then have "q = 0" by simp
with ‹Suc n = m * q› show ?thesis by simp
qed
have aux2: "m dvd q"
if 1: "∀q>0. 1 < q ⟶ Suc n < q ⟶ q ≤ Suc (n + length bs) ⟶
bs ! (q - Suc (Suc n)) ⟶ ¬ Suc n dvd q ⟶ q dvd m ⟶ m dvd q"
and 2: "¬ Suc n dvd m" "q dvd m"
and 3: "Suc n < q" "q ≤ Suc (n + length bs)" "bs ! (q - Suc (Suc n))"
for m q :: nat
proof -
from 1 have *: "⋀q. Suc n < q ⟹ q ≤ Suc (n + length bs) ⟹
bs ! (q - Suc (Suc n)) ⟹ ¬ Suc n dvd q ⟹ q dvd m ⟹ m dvd q"
by auto
from 2 have "¬ Suc n dvd q" by (auto elim: dvdE)
moreover note 3
moreover note ‹q dvd m›
ultimately show ?thesis by (auto intro: *)
qed
from 3 show ?case
apply (simp_all add: numbers_of_marks_mark_out numbers_of_marks_Suc Compr_image_eq
inj_image_eq_iff in_numbers_of_marks_eq Ball_def imp_conjL aux)
apply safe
apply (simp_all add: less_diff_conv2 le_diff_conv2 dvd_minus_self not_less)
apply (clarsimp dest!: aux1)
apply (rule aux2)
apply (clarsimp dest!: aux1)+
done
qed

text ‹Relation of the sieve algorithm to actual primes›

definition primes_upto :: "nat ⇒ nat list"
where
"primes_upto n = sorted_list_of_set {m. m ≤ n ∧ prime m}"

lemma set_primes_upto: "set (primes_upto n) = {m. m ≤ n ∧ prime m}"

lemma sorted_primes_upto [iff]: "sorted (primes_upto n)"

lemma distinct_primes_upto [iff]: "distinct (primes_upto n)"

lemma set_primes_upto_sieve:
"set (primes_upto n) = numbers_of_marks 2 (sieve 1 (replicate (n - 1) True))"
proof -
consider "n = 0 ∨ n = 1" | "n > 1" by arith
then show ?thesis
proof cases
case 1
then show ?thesis
by (auto simp add: numbers_of_marks_sieve numeral_2_eq_2 set_primes_upto
dest: prime_gt_Suc_0_nat)
next
case 2
{
fix m q
assume "Suc (Suc 0) ≤ q"
and "q < Suc n"
and "m dvd q"
then have "m < Suc n" by (auto dest: dvd_imp_le)
assume *: "∀m∈{Suc (Suc 0)..<Suc n}. m dvd q ⟶ q dvd m"
and "m dvd q" and "m ≠ 1"
have "m = q"
proof (cases "m = 0")
case True with ‹m dvd q› show ?thesis by simp
next
case False with ‹m ≠ 1› have "Suc (Suc 0) ≤ m" by arith
with ‹m < Suc n› * ‹m dvd q› have "q dvd m" by simp
with ‹m dvd q› show ?thesis by (simp add: dvd_antisym)
qed
}
then have aux: "⋀m q. Suc (Suc 0) ≤ q ⟹
q < Suc n ⟹
m dvd q ⟹
∀m∈{Suc (Suc 0)..<Suc n}. m dvd q ⟶ q dvd m ⟹
m dvd q ⟹ m ≠ q ⟹ m = 1" by auto
from 2 show ?thesis
apply (auto simp add: numbers_of_marks_sieve numeral_2_eq_2 set_primes_upto
dest: prime_gt_Suc_0_nat)
apply (metis One_nat_def Suc_le_eq less_not_refl prime_nat_iff)
apply (metis One_nat_def Suc_le_eq aux prime_nat_iff)
done
qed
qed

lemma primes_upto_sieve [code]:
"primes_upto n = map fst (filter snd (enumerate 2 (sieve 1 (replicate (n - 1) True))))"
proof -
have "primes_upto n = sorted_list_of_set (numbers_of_marks 2 (sieve 1 (replicate (n - 1) True)))"
apply (rule sorted_distinct_set_unique)
apply (simp_all only: set_primes_upto_sieve numbers_of_marks_def)
apply auto
done
then show ?thesis
qed

lemma prime_in_primes_upto: "prime n ⟷ n ∈ set (primes_upto n)"

subsection ‹Application: smallest prime beyond a certain number›

definition smallest_prime_beyond :: "nat ⇒ nat"
where
"smallest_prime_beyond n = (LEAST p. prime p ∧ p ≥ n)"

lemma prime_smallest_prime_beyond [iff]: "prime (smallest_prime_beyond n)" (is ?P)
and smallest_prime_beyond_le [iff]: "smallest_prime_beyond n ≥ n" (is ?Q)
proof -
let ?least = "LEAST p. prime p ∧ p ≥ n"
from primes_infinite obtain q where "prime q ∧ q ≥ n"
by (metis finite_nat_set_iff_bounded_le mem_Collect_eq nat_le_linear)
then have "prime ?least ∧ ?least ≥ n"
by (rule LeastI)
then show ?P and ?Q
qed

lemma smallest_prime_beyond_smallest: "prime p ⟹ p ≥ n ⟹ smallest_prime_beyond n ≤ p"
by (simp only: smallest_prime_beyond_def) (auto intro: Least_le)

lemma smallest_prime_beyond_eq:
"prime p ⟹ p ≥ n ⟹ (⋀q. prime q ⟹ q ≥ n ⟹ q ≥ p) ⟹ smallest_prime_beyond n = p"
by (simp only: smallest_prime_beyond_def) (auto intro: Least_equality)

definition smallest_prime_between :: "nat ⇒ nat ⇒ nat option"
where
"smallest_prime_between m n =
(if (∃p. prime p ∧ m ≤ p ∧ p ≤ n) then Some (smallest_prime_beyond m) else None)"

lemma smallest_prime_between_None:
"smallest_prime_between m n = None ⟷ (∀q. m ≤ q ∧ q ≤ n ⟶ ¬ prime q)"

lemma smallest_prime_betwen_Some:
"smallest_prime_between m n = Some p ⟷ smallest_prime_beyond m = p ∧ p ≤ n"
by (auto simp add: smallest_prime_between_def dest: smallest_prime_beyond_smallest [of _ m])

lemma [code]: "smallest_prime_between m n = List.find (λp. p ≥ m) (primes_upto n)"
proof -
have "List.find (λp. p ≥ m) (primes_upto n) = Some (smallest_prime_beyond m)"
if assms: "m ≤ p" "prime p" "p ≤ n" for p
proof -
define A where "A = {p. p ≤ n ∧ prime p ∧ m ≤ p}"
from assms have "smallest_prime_beyond m ≤ p"
by (auto intro: smallest_prime_beyond_smallest)
from this ‹p ≤ n› have *: "smallest_prime_beyond m ≤ n"
by (rule order_trans)
from assms have ex: "∃p≤n. prime p ∧ m ≤ p"
by auto
then have "finite A"
with * have "Min A = smallest_prime_beyond m"
by (auto simp add: A_def intro: Min_eqI smallest_prime_beyond_smallest)
with ex sorted_primes_upto show ?thesis
by (auto simp add: set_primes_upto sorted_find_Min A_def)
qed
then show ?thesis
by (auto simp add: smallest_prime_between_def find_None_iff set_primes_upto
intro!: sym [of _ None])
qed

definition smallest_prime_beyond_aux :: "nat ⇒ nat ⇒ nat"
where
"smallest_prime_beyond_aux k n = smallest_prime_beyond n"

lemma [code]:
"smallest_prime_beyond_aux k n =
(case smallest_prime_between n (k * n) of
Some p ⇒ p
| None ⇒ smallest_prime_beyond_aux (Suc k) n)"
by (simp add: smallest_prime_beyond_aux_def smallest_prime_betwen_Some split: option.split)

lemma [code]: "smallest_prime_beyond n = smallest_prime_beyond_aux 2 n"
`