# Theory Sorting_Algorithms

```(*  Title:      HOL/Library/Sorting_Algorithms.thy
Author:     Florian Haftmann, TU Muenchen
*)

theory Sorting_Algorithms
imports Main Multiset Comparator
begin

section ‹Stably sorted lists›

abbreviation (input) stable_segment :: "'a comparator ⇒ 'a ⇒ 'a list ⇒ 'a list"
where "stable_segment cmp x ≡ filter (λy. compare cmp x y = Equiv)"

fun sorted :: "'a comparator ⇒ 'a list ⇒ bool"
where sorted_Nil: "sorted cmp [] ⟷ True"
| sorted_single: "sorted cmp [x] ⟷ True"
| sorted_rec: "sorted cmp (y # x # xs) ⟷ compare cmp y x ≠ Greater ∧ sorted cmp (x # xs)"

lemma sorted_ConsI:
"sorted cmp (x # xs)" if "sorted cmp xs"
and "⋀y ys. xs = y # ys ⟹ compare cmp x y ≠ Greater"
using that by (cases xs) simp_all

lemma sorted_Cons_imp_sorted:
"sorted cmp xs" if "sorted cmp (x # xs)"
using that by (cases xs) simp_all

lemma sorted_Cons_imp_not_less:
"compare cmp y x ≠ Greater" if "sorted cmp (y # xs)"
and "x ∈ set xs"
using that by (induction xs arbitrary: y) (auto dest: compare.trans_not_greater)

lemma sorted_induct [consumes 1, case_names Nil Cons, induct pred: sorted]:
"P xs" if "sorted cmp xs" and "P []"
and *: "⋀x xs. sorted cmp xs ⟹ P xs
⟹ (⋀y. y ∈ set xs ⟹ compare cmp x y ≠ Greater) ⟹ P (x # xs)"
using ‹sorted cmp xs› proof (induction xs)
case Nil
show ?case
by (rule ‹P []›)
next
case (Cons x xs)
from ‹sorted cmp (x # xs)› have "sorted cmp xs"
by (cases xs) simp_all
moreover have "P xs" using ‹sorted cmp xs›
by (rule Cons.IH)
moreover have "compare cmp x y ≠ Greater" if "y ∈ set xs" for y
using that ‹sorted cmp (x # xs)› proof (induction xs)
case Nil
then show ?case
by simp
next
case (Cons z zs)
then show ?case
proof (cases zs)
case Nil
with Cons.prems show ?thesis
by simp
next
case (Cons w ws)
with Cons.prems have "compare cmp z w ≠ Greater" "compare cmp x z ≠ Greater"
by auto
then have "compare cmp x w ≠ Greater"
by (auto dest: compare.trans_not_greater)
with Cons show ?thesis
using Cons.prems Cons.IH by auto
qed
qed
ultimately show ?case
by (rule *)
qed

lemma sorted_induct_remove1 [consumes 1, case_names Nil minimum]:
"P xs" if "sorted cmp xs" and "P []"
and *: "⋀x xs. sorted cmp xs ⟹ P (remove1 x xs)
⟹ x ∈ set xs ⟹ hd (stable_segment cmp x xs) = x ⟹ (⋀y. y ∈ set xs ⟹ compare cmp x y ≠ Greater)
⟹ P xs"
using ‹sorted cmp xs› proof (induction xs)
case Nil
show ?case
by (rule ‹P []›)
next
case (Cons x xs)
then have "sorted cmp (x # xs)"
moreover note Cons.IH
moreover have "⋀y. compare cmp x y = Greater ⟹ y ∈ set xs ⟹ False"
using Cons.hyps by simp
ultimately show ?case
by (auto intro!: * [of "x # xs" x]) blast
qed

lemma sorted_remove1:
"sorted cmp (remove1 x xs)" if "sorted cmp xs"
proof (cases "x ∈ set xs")
case False
with that show ?thesis
next
case True
with that show ?thesis proof (induction xs)
case Nil
then show ?case
by simp
next
case (Cons y ys)
show ?case proof (cases "x = y")
case True
with Cons.hyps show ?thesis
by simp
next
case False
then have "sorted cmp (remove1 x ys)"
using Cons.IH Cons.prems by auto
then have "sorted cmp (y # remove1 x ys)"
proof (rule sorted_ConsI)
fix z zs
assume "remove1 x ys = z # zs"
with ‹x ≠ y› have "z ∈ set ys"
using notin_set_remove1 [of z ys x] by auto
then show "compare cmp y z ≠ Greater"
by (rule Cons.hyps(2))
qed
with False show ?thesis
by simp
qed
qed
qed

lemma sorted_stable_segment:
"sorted cmp (stable_segment cmp x xs)"
proof (induction xs)
case Nil
show ?case
by simp
next
case (Cons y ys)
then show ?case
by (auto intro!: sorted_ConsI simp add: filter_eq_Cons_iff compare.sym)
(auto dest: compare.trans_equiv simp add: compare.sym compare.greater_iff_sym_less)

qed

primrec insort :: "'a comparator ⇒ 'a ⇒ 'a list ⇒ 'a list"
where "insort cmp y [] = [y]"
| "insort cmp y (x # xs) = (if compare cmp y x ≠ Greater
then y # x # xs
else x # insort cmp y xs)"

lemma mset_insort [simp]:
"mset (insort cmp x xs) = add_mset x (mset xs)"
by (induction xs) simp_all

lemma length_insort [simp]:
"length (insort cmp x xs) = Suc (length xs)"
by (induction xs) simp_all

lemma sorted_insort:
"sorted cmp (insort cmp x xs)" if "sorted cmp xs"
using that proof (induction xs)
case Nil
then show ?case
by simp
next
case (Cons y ys)
then show ?case by (cases ys)
qed

lemma stable_insort_equiv:
"stable_segment cmp y (insort cmp x xs) = x # stable_segment cmp y xs"
if "compare cmp y x = Equiv"
proof (induction xs)
case Nil
from that show ?case
by simp
next
case (Cons z xs)
moreover from that have "compare cmp y z = Equiv ⟹ compare cmp z x = Equiv"
by (auto intro: compare.trans_equiv simp add: compare.sym)
ultimately show ?case
using that by (auto simp add: compare.greater_iff_sym_less)
qed

lemma stable_insort_not_equiv:
"stable_segment cmp y (insort cmp x xs) = stable_segment cmp y xs"
if "compare cmp y x ≠ Equiv"
using that by (induction xs) simp_all

lemma remove1_insort_same_eq [simp]:
"remove1 x (insort cmp x xs) = xs"
by (induction xs) simp_all

lemma insort_eq_ConsI:
"insort cmp x xs = x # xs"
if "sorted cmp xs" "⋀y. y ∈ set xs ⟹ compare cmp x y ≠ Greater"
using that by (induction xs) (simp_all add: compare.greater_iff_sym_less)

lemma remove1_insort_not_same_eq [simp]:
"remove1 y (insort cmp x xs) = insort cmp x (remove1 y xs)"
if "sorted cmp xs" "x ≠ y"
using that proof (induction xs)
case Nil
then show ?case
by simp
next
case (Cons z zs)
show ?case
proof (cases "compare cmp x z = Greater")
case True
with Cons show ?thesis
by simp
next
case False
then have "compare cmp x y ≠ Greater" if "y ∈ set zs" for y
using that Cons.hyps
by (auto dest: compare.trans_not_greater)
with Cons show ?thesis
qed
qed

lemma insort_remove1_same_eq:
"insort cmp x (remove1 x xs) = xs"
if "sorted cmp xs" and "x ∈ set xs" and "hd (stable_segment cmp x xs) = x"
using that proof (induction xs)
case Nil
then show ?case
by simp
next
case (Cons y ys)
then have "compare cmp x y ≠ Less"
then consider "compare cmp x y = Greater" | "compare cmp x y = Equiv"
by (cases "compare cmp x y") auto
then show ?case proof cases
case 1
with Cons.prems Cons.IH show ?thesis
by auto
next
case 2
with Cons.prems have "x = y"
by simp
with Cons.hyps show ?thesis
qed
qed

lemma sorted_append_iff:
"sorted cmp (xs @ ys) ⟷ sorted cmp xs ∧ sorted cmp ys
∧ (∀x ∈ set xs. ∀y ∈ set ys. compare cmp x y ≠ Greater)" (is "?P ⟷ ?R ∧ ?S ∧ ?Q")
proof
assume ?P
have ?R
using ‹?P› by (induction xs)
auto simp add: sorted_Cons_imp_sorted intro: sorted_ConsI)
moreover have ?S
using ‹?P› by (induction xs) (auto dest: sorted_Cons_imp_sorted)
moreover have ?Q
using ‹?P› by (induction xs) (auto simp add: sorted_Cons_imp_not_less,
ultimately show "?R ∧ ?S ∧ ?Q"
by simp
next
assume "?R ∧ ?S ∧ ?Q"
then have ?R ?S ?Q
by simp_all
then show ?P
by (induction xs)
(auto simp add: append_eq_Cons_conv intro!: sorted_ConsI)
qed

definition sort :: "'a comparator ⇒ 'a list ⇒ 'a list"
where "sort cmp xs = foldr (insort cmp) xs []"

lemma sort_simps [simp]:
"sort cmp [] = []"
"sort cmp (x # xs) = insort cmp x (sort cmp xs)"

lemma mset_sort [simp]:
"mset (sort cmp xs) = mset xs"
by (induction xs) simp_all

lemma length_sort [simp]:
"length (sort cmp xs) = length xs"
by (induction xs) simp_all

lemma sorted_sort [simp]:
"sorted cmp (sort cmp xs)"
by (induction xs) (simp_all add: sorted_insort)

lemma stable_sort:
"stable_segment cmp x (sort cmp xs) = stable_segment cmp x xs"
by (induction xs) (simp_all add: stable_insort_equiv stable_insort_not_equiv)

lemma sort_remove1_eq [simp]:
"sort cmp (remove1 x xs) = remove1 x (sort cmp xs)"
by (induction xs) simp_all

lemma set_insort [simp]:
"set (insort cmp x xs) = insert x (set xs)"
by (induction xs) auto

lemma set_sort [simp]:
"set (sort cmp xs) = set xs"
by (induction xs) auto

lemma sort_eqI:
"sort cmp ys = xs"
if permutation: "mset ys = mset xs"
and sorted: "sorted cmp xs"
and stable: "⋀y. y ∈ set ys ⟹
stable_segment cmp y ys = stable_segment cmp y xs"
proof -
have stable': "stable_segment cmp y ys =
stable_segment cmp y xs" for y
proof (cases "∃x∈set ys. compare cmp y x = Equiv")
case True
then obtain z where "z ∈ set ys" and "compare cmp y z = Equiv"
by auto
then have "compare cmp y x = Equiv ⟷ compare cmp z x = Equiv" for x
by (meson compare.sym compare.trans_equiv)
moreover have "stable_segment cmp z ys =
stable_segment cmp z xs"
using ‹z ∈ set ys› by (rule stable)
ultimately show ?thesis
by simp
next
case False
moreover from permutation have "set ys = set xs"
by (rule mset_eq_setD)
ultimately show ?thesis
by simp
qed
show ?thesis
using sorted permutation stable' proof (induction xs arbitrary: ys rule: sorted_induct_remove1)
case Nil
then show ?case
by simp
next
case (minimum x xs)
from ‹mset ys = mset xs› have ys: "set ys = set xs"
by (rule mset_eq_setD)
then have "compare cmp x y ≠ Greater" if "y ∈ set ys" for y
using that minimum.hyps by simp
from minimum.prems have stable: "stable_segment cmp x ys = stable_segment cmp x xs"
by simp
have "sort cmp (remove1 x ys) = remove1 x xs"
by (rule minimum.IH) (simp_all add: minimum.prems filter_remove1)
then have "remove1 x (sort cmp ys) = remove1 x xs"
by simp
then have "insort cmp x (remove1 x (sort cmp ys)) =
insort cmp x (remove1 x xs)"
by simp
also from minimum.hyps ys stable have "insort cmp x (remove1 x (sort cmp ys)) = sort cmp ys"
also from minimum.hyps have "insort cmp x (remove1 x xs) = xs"
finally show ?case .
qed
qed

lemma filter_insort:
"filter P (insort cmp x xs) = insort cmp x (filter P xs)"
if "sorted cmp xs" and "P x"
using that by (induction xs)

lemma filter_insort_triv:
"filter P (insort cmp x xs) = filter P xs"
if "¬ P x"
using that by (induction xs) simp_all

lemma filter_sort:
"filter P (sort cmp xs) = sort cmp (filter P xs)"
by (induction xs) (auto simp add: filter_insort filter_insort_triv)

section ‹Alternative sorting algorithms›

subsection ‹Quicksort›

definition quicksort :: "'a comparator ⇒ 'a list ⇒ 'a list"
where quicksort_is_sort [simp]: "quicksort = sort"

lemma sort_by_quicksort:
"sort = quicksort"
by simp

lemma sort_by_quicksort_rec:
"sort cmp xs = sort cmp [x←xs. compare cmp x (xs ! (length xs div 2)) = Less]
@ stable_segment cmp (xs ! (length xs div 2)) xs
@ sort cmp [x←xs. compare cmp x (xs ! (length xs div 2)) = Greater]" (is "_ = ?rhs")
proof (rule sort_eqI)
show "mset xs = mset ?rhs"
by (rule multiset_eqI) (auto simp add: compare.sym intro: comp.exhaust)
next
show "sorted cmp ?rhs"
by (auto simp add: sorted_append_iff sorted_stable_segment compare.equiv_subst_right dest: compare.trans_greater)
next
let ?pivot = "xs ! (length xs div 2)"
fix l
have "compare cmp x ?pivot = comp ∧ compare cmp l x = Equiv
⟷ compare cmp l ?pivot = comp ∧ compare cmp l x = Equiv" for x comp
proof -
have "compare cmp x ?pivot = comp ⟷ compare cmp l ?pivot = comp"
if "compare cmp l x = Equiv"
using that by (simp add: compare.equiv_subst_left compare.sym)
then show ?thesis by blast
qed
then show "stable_segment cmp l xs = stable_segment cmp l ?rhs"
by (simp add: stable_sort compare.sym [of _ ?pivot])
(cases "compare cmp l ?pivot", simp_all)
qed

context
begin

qualified definition partition :: "'a comparator ⇒ 'a ⇒ 'a list ⇒ 'a list × 'a list × 'a list"
where "partition cmp pivot xs =
([x ← xs. compare cmp x pivot = Less], stable_segment cmp pivot xs, [x ← xs. compare cmp x pivot = Greater])"

qualified lemma partition_code [code]:
"partition cmp pivot [] = ([], [], [])"
"partition cmp pivot (x # xs) =
(let (lts, eqs, gts) = partition cmp pivot xs
in case compare cmp x pivot of
Less ⇒ (x # lts, eqs, gts)
| Equiv ⇒ (lts, x # eqs, gts)
| Greater ⇒ (lts, eqs, x # gts))"
using comp.exhaust by (auto simp add: partition_def Let_def compare.sym [of _ pivot])

lemma quicksort_code [code]:
"quicksort cmp xs =
(case xs of
[] ⇒ []
| [x] ⇒ xs
| [x, y] ⇒ (if compare cmp x y ≠ Greater then xs else [y, x])
| _ ⇒
let (lts, eqs, gts) = partition cmp (xs ! (length xs div 2)) xs
in quicksort cmp lts @ eqs @ quicksort cmp gts)"
proof (cases "length xs ≥ 3")
case False
then have "length xs ∈ {0, 1, 2}"
by (auto simp add: not_le le_less less_antisym)
then consider "xs = []" | x where "xs = [x]" | x y where "xs = [x, y]"
by (auto simp add: length_Suc_conv numeral_2_eq_2)
then show ?thesis
by cases simp_all
next
case True
then obtain x y z zs where "xs = x # y # z # zs"
by (metis le_0_eq length_0_conv length_Cons list.exhaust not_less_eq_eq numeral_3_eq_3)
moreover have "quicksort cmp xs =
(let (lts, eqs, gts) = partition cmp (xs ! (length xs div 2)) xs
in quicksort cmp lts @ eqs @ quicksort cmp gts)"
using sort_by_quicksort_rec [of cmp xs] by (simp add: partition_def)
ultimately show ?thesis
by simp
qed

end

subsection ‹Mergesort›

definition mergesort :: "'a comparator ⇒ 'a list ⇒ 'a list"
where mergesort_is_sort [simp]: "mergesort = sort"

lemma sort_by_mergesort:
"sort = mergesort"
by simp

context
fixes cmp :: "'a comparator"
begin

qualified function merge :: "'a list ⇒ 'a list ⇒ 'a list"
where "merge [] ys = ys"
| "merge xs [] = xs"
| "merge (x # xs) (y # ys) = (if compare cmp x y = Greater
then y # merge (x # xs) ys else x # merge xs (y # ys))"
by pat_completeness auto

qualified termination by lexicographic_order

lemma mset_merge:
"mset (merge xs ys) = mset xs + mset ys"
by (induction xs ys rule: merge.induct) simp_all

lemma merge_eq_Cons_imp:
"xs ≠ [] ∧ z = hd xs ∨ ys ≠ [] ∧ z = hd ys"
if "merge xs ys = z # zs"
using that by (induction xs ys rule: merge.induct) (auto split: if_splits)

lemma filter_merge:
"filter P (merge xs ys) = merge (filter P xs) (filter P ys)"
if "sorted cmp xs" and "sorted cmp ys"
using that proof (induction xs ys rule: merge.induct)
case (1 ys)
then show ?case
by simp
next
case (2 xs)
then show ?case
by simp
next
case (3 x xs y ys)
show ?case
proof (cases "compare cmp x y = Greater")
case True
with 3 have hyp: "filter P (merge (x # xs) ys) =
merge (filter P (x # xs)) (filter P ys)"
show ?thesis
proof (cases "¬ P x ∧ P y")
case False
with ‹compare cmp x y = Greater› show ?thesis
next
case True
from ‹compare cmp x y = Greater› "3.prems"
have *: "compare cmp z y = Greater" if "z ∈ set (filter P xs)" for z
using that by (auto dest: compare.trans_not_greater sorted_Cons_imp_not_less)
from ‹compare cmp x y = Greater› show ?thesis
by (cases "filter P xs") (simp_all add: hyp *)
qed
next
case False
with 3 have hyp: "filter P (merge xs (y # ys)) =
merge (filter P xs) (filter P (y # ys))"
show ?thesis
proof (cases "P x ∧ ¬ P y")
case False
with ‹compare cmp x y ≠ Greater› show ?thesis
next
case True
from ‹compare cmp x y ≠ Greater› "3.prems"
have *: "compare cmp x z ≠ Greater" if "z ∈ set (filter P ys)" for z
using that by (auto dest: compare.trans_not_greater sorted_Cons_imp_not_less)
from ‹compare cmp x y ≠ Greater› show ?thesis
by (cases "filter P ys") (simp_all add: hyp *)
qed
qed
qed

lemma sorted_merge:
"sorted cmp (merge xs ys)" if "sorted cmp xs" and "sorted cmp ys"
using that proof (induction xs ys rule: merge.induct)
case (1 ys)
then show ?case
by simp
next
case (2 xs)
then show ?case
by simp
next
case (3 x xs y ys)
show ?case
proof (cases "compare cmp x y = Greater")
case True
with 3 have "sorted cmp (merge (x # xs) ys)"
then have "sorted cmp (y # merge (x # xs) ys)"
proof (rule sorted_ConsI)
fix z zs
assume "merge (x # xs) ys = z # zs"
with 3(4) True show "compare cmp y z ≠ Greater"
by (clarsimp simp add: sorted_Cons_imp_sorted dest!: merge_eq_Cons_imp)
qed
with True show ?thesis
by simp
next
case False
with 3 have "sorted cmp (merge xs (y # ys))"
then have "sorted cmp (x # merge xs (y # ys))"
proof (rule sorted_ConsI)
fix z zs
assume "merge xs (y # ys) = z # zs"
with 3(3) False show "compare cmp x z ≠ Greater"
by (clarsimp simp add: sorted_Cons_imp_sorted dest!: merge_eq_Cons_imp)
qed
with False show ?thesis
by simp
qed
qed

lemma merge_eq_appendI:
"merge xs ys = xs @ ys"
if "⋀x y. x ∈ set xs ⟹ y ∈ set ys ⟹ compare cmp x y ≠ Greater"
using that by (induction xs ys rule: merge.induct) simp_all

lemma merge_stable_segments:
"merge (stable_segment cmp l xs) (stable_segment cmp l ys) =
stable_segment cmp l xs @ stable_segment cmp l ys"
by (rule merge_eq_appendI) (auto dest: compare.trans_equiv_greater)

lemma sort_by_mergesort_rec:
"sort cmp xs =
merge (sort cmp (take (length xs div 2) xs))
(sort cmp (drop (length xs div 2) xs))" (is "_ = ?rhs")
proof (rule sort_eqI)
have "mset (take (length xs div 2) xs) + mset (drop (length xs div 2) xs) =
mset (take (length xs div 2) xs @ drop (length xs div 2) xs)"
by (simp only: mset_append)
then show "mset xs = mset ?rhs"
next
show "sorted cmp ?rhs"
next
fix l
have "stable_segment cmp l (take (length xs div 2) xs) @ stable_segment cmp l (drop (length xs div 2) xs)
= stable_segment cmp l xs"
by (simp only: filter_append [symmetric] append_take_drop_id)
have "merge (stable_segment cmp l (take (length xs div 2) xs))
(stable_segment cmp l (drop (length xs div 2) xs)) =
stable_segment cmp l (take (length xs div 2) xs) @ stable_segment cmp l (drop (length xs div 2) xs)"
by (rule merge_eq_appendI) (auto simp add: compare.trans_equiv_greater)
also have "… = stable_segment cmp l xs"
by (simp only: filter_append [symmetric] append_take_drop_id)
finally show "stable_segment cmp l xs = stable_segment cmp l ?rhs"
qed

lemma mergesort_code [code]:
"mergesort cmp xs =
(case xs of
[] ⇒ []
| [x] ⇒ xs
| [x, y] ⇒ (if compare cmp x y ≠ Greater then xs else [y, x])
| _ ⇒
let
half = length xs div 2;
ys = take half xs;
zs = drop half xs
in merge (mergesort cmp ys) (mergesort cmp zs))"
proof (cases "length xs ≥ 3")
case False
then have "length xs ∈ {0, 1, 2}"
by (auto simp add: not_le le_less less_antisym)
then consider "xs = []" | x where "xs = [x]" | x y where "xs = [x, y]"
by (auto simp add: length_Suc_conv numeral_2_eq_2)
then show ?thesis
by cases simp_all
next
case True
then obtain x y z zs where "xs = x # y # z # zs"
by (metis le_0_eq length_0_conv length_Cons list.exhaust not_less_eq_eq numeral_3_eq_3)
moreover have "mergesort cmp xs =
(let
half = length xs div 2;
ys = take half xs;
zs = drop half xs
in merge (mergesort cmp ys) (mergesort cmp zs))"
using sort_by_mergesort_rec [of xs] by (simp add: Let_def)
ultimately show ?thesis
by simp
qed

end

end
```