(* Title: HOL/Library/Sorting_Algorithms.thy Author: Florian Haftmann, TU Muenchen *) theory Sorting_Algorithms imports Main Multiset Comparator begin section ‹Stably sorted lists› abbreviation (input) stable_segment :: "'a comparator ⇒ 'a ⇒ 'a list ⇒ 'a list" where "stable_segment cmp x ≡ filter (λy. compare cmp x y = Equiv)" fun sorted :: "'a comparator ⇒ 'a list ⇒ bool" where sorted_Nil: "sorted cmp [] ⟷ True" | sorted_single: "sorted cmp [x] ⟷ True" | sorted_rec: "sorted cmp (y # x # xs) ⟷ compare cmp y x ≠ Greater ∧ sorted cmp (x # xs)" lemma sorted_ConsI: "sorted cmp (x # xs)" if "sorted cmp xs" and "⋀y ys. xs = y # ys ⟹ compare cmp x y ≠ Greater" using that by (cases xs) simp_all lemma sorted_Cons_imp_sorted: "sorted cmp xs" if "sorted cmp (x # xs)" using that by (cases xs) simp_all lemma sorted_Cons_imp_not_less: "compare cmp y x ≠ Greater" if "sorted cmp (y # xs)" and "x ∈ set xs" using that by (induction xs arbitrary: y) (auto dest: compare.trans_not_greater) lemma sorted_induct [consumes 1, case_names Nil Cons, induct pred: sorted]: "P xs" if "sorted cmp xs" and "P []" and *: "⋀x xs. sorted cmp xs ⟹ P xs ⟹ (⋀y. y ∈ set xs ⟹ compare cmp x y ≠ Greater) ⟹ P (x # xs)" using ‹sorted cmp xs› proof (induction xs) case Nil show ?case by (rule ‹P []›) next case (Cons x xs) from ‹sorted cmp (x # xs)› have "sorted cmp xs" by (cases xs) simp_all moreover have "P xs" using ‹sorted cmp xs› by (rule Cons.IH) moreover have "compare cmp x y ≠ Greater" if "y ∈ set xs" for y using that ‹sorted cmp (x # xs)› proof (induction xs) case Nil then show ?case by simp next case (Cons z zs) then show ?case proof (cases zs) case Nil with Cons.prems show ?thesis by simp next case (Cons w ws) with Cons.prems have "compare cmp z w ≠ Greater" "compare cmp x z ≠ Greater" by auto then have "compare cmp x w ≠ Greater" by (auto dest: compare.trans_not_greater) with Cons show ?thesis using Cons.prems Cons.IH by auto qed qed ultimately show ?case by (rule *) qed lemma sorted_induct_remove1 [consumes 1, case_names Nil minimum]: "P xs" if "sorted cmp xs" and "P []" and *: "⋀x xs. sorted cmp xs ⟹ P (remove1 x xs) ⟹ x ∈ set xs ⟹ hd (stable_segment cmp x xs) = x ⟹ (⋀y. y ∈ set xs ⟹ compare cmp x y ≠ Greater) ⟹ P xs" using ‹sorted cmp xs› proof (induction xs) case Nil show ?case by (rule ‹P []›) next case (Cons x xs) then have "sorted cmp (x # xs)" by (simp add: sorted_ConsI) moreover note Cons.IH moreover have "⋀y. compare cmp x y = Greater ⟹ y ∈ set xs ⟹ False" using Cons.hyps by simp ultimately show ?case by (auto intro!: * [of "x # xs" x]) blast qed lemma sorted_remove1: "sorted cmp (remove1 x xs)" if "sorted cmp xs" proof (cases "x ∈ set xs") case False with that show ?thesis by (simp add: remove1_idem) next case True with that show ?thesis proof (induction xs) case Nil then show ?case by simp next case (Cons y ys) show ?case proof (cases "x = y") case True with Cons.hyps show ?thesis by simp next case False then have "sorted cmp (remove1 x ys)" using Cons.IH Cons.prems by auto then have "sorted cmp (y # remove1 x ys)" proof (rule sorted_ConsI) fix z zs assume "remove1 x ys = z # zs" with ‹x ≠ y› have "z ∈ set ys" using notin_set_remove1 [of z ys x] by auto then show "compare cmp y z ≠ Greater" by (rule Cons.hyps(2)) qed with False show ?thesis by simp qed qed qed lemma sorted_stable_segment: "sorted cmp (stable_segment cmp x xs)" proof (induction xs) case Nil show ?case by simp next case (Cons y ys) then show ?case by (auto intro!: sorted_ConsI simp add: filter_eq_Cons_iff compare.sym) (auto dest: compare.trans_equiv simp add: compare.sym compare.greater_iff_sym_less) qed primrec insort :: "'a comparator ⇒ 'a ⇒ 'a list ⇒ 'a list" where "insort cmp y [] = [y]" | "insort cmp y (x # xs) = (if compare cmp y x ≠ Greater then y # x # xs else x # insort cmp y xs)" lemma mset_insort [simp]: "mset (insort cmp x xs) = add_mset x (mset xs)" by (induction xs) simp_all lemma length_insort [simp]: "length (insort cmp x xs) = Suc (length xs)" by (induction xs) simp_all lemma sorted_insort: "sorted cmp (insort cmp x xs)" if "sorted cmp xs" using that proof (induction xs) case Nil then show ?case by simp next case (Cons y ys) then show ?case by (cases ys) (auto, simp_all add: compare.greater_iff_sym_less) qed lemma stable_insort_equiv: "stable_segment cmp y (insort cmp x xs) = x # stable_segment cmp y xs" if "compare cmp y x = Equiv" proof (induction xs) case Nil from that show ?case by simp next case (Cons z xs) moreover from that have "compare cmp y z = Equiv ⟹ compare cmp z x = Equiv" by (auto intro: compare.trans_equiv simp add: compare.sym) ultimately show ?case using that by (auto simp add: compare.greater_iff_sym_less) qed lemma stable_insort_not_equiv: "stable_segment cmp y (insort cmp x xs) = stable_segment cmp y xs" if "compare cmp y x ≠ Equiv" using that by (induction xs) simp_all lemma remove1_insort_same_eq [simp]: "remove1 x (insort cmp x xs) = xs" by (induction xs) simp_all lemma insort_eq_ConsI: "insort cmp x xs = x # xs" if "sorted cmp xs" "⋀y. y ∈ set xs ⟹ compare cmp x y ≠ Greater" using that by (induction xs) (simp_all add: compare.greater_iff_sym_less) lemma remove1_insort_not_same_eq [simp]: "remove1 y (insort cmp x xs) = insort cmp x (remove1 y xs)" if "sorted cmp xs" "x ≠ y" using that proof (induction xs) case Nil then show ?case by simp next case (Cons z zs) show ?case proof (cases "compare cmp x z = Greater") case True with Cons show ?thesis by simp next case False then have "compare cmp x y ≠ Greater" if "y ∈ set zs" for y using that Cons.hyps by (auto dest: compare.trans_not_greater) with Cons show ?thesis by (simp add: insort_eq_ConsI) qed qed lemma insort_remove1_same_eq: "insort cmp x (remove1 x xs) = xs" if "sorted cmp xs" and "x ∈ set xs" and "hd (stable_segment cmp x xs) = x" using that proof (induction xs) case Nil then show ?case by simp next case (Cons y ys) then have "compare cmp x y ≠ Less" by (auto simp add: compare.greater_iff_sym_less) then consider "compare cmp x y = Greater" | "compare cmp x y = Equiv" by (cases "compare cmp x y") auto then show ?case proof cases case 1 with Cons.prems Cons.IH show ?thesis by auto next case 2 with Cons.prems have "x = y" by simp with Cons.hyps show ?thesis by (simp add: insort_eq_ConsI) qed qed lemma sorted_append_iff: "sorted cmp (xs @ ys) ⟷ sorted cmp xs ∧ sorted cmp ys ∧ (∀x ∈ set xs. ∀y ∈ set ys. compare cmp x y ≠ Greater)" (is "?P ⟷ ?R ∧ ?S ∧ ?Q") proof assume ?P have ?R using ‹?P› by (induction xs) (auto simp add: sorted_Cons_imp_not_less, auto simp add: sorted_Cons_imp_sorted intro: sorted_ConsI) moreover have ?S using ‹?P› by (induction xs) (auto dest: sorted_Cons_imp_sorted) moreover have ?Q using ‹?P› by (induction xs) (auto simp add: sorted_Cons_imp_not_less, simp add: sorted_Cons_imp_sorted) ultimately show "?R ∧ ?S ∧ ?Q" by simp next assume "?R ∧ ?S ∧ ?Q" then have ?R ?S ?Q by simp_all then show ?P by (induction xs) (auto simp add: append_eq_Cons_conv intro!: sorted_ConsI) qed definition sort :: "'a comparator ⇒ 'a list ⇒ 'a list" where "sort cmp xs = foldr (insort cmp) xs []" lemma sort_simps [simp]: "sort cmp [] = []" "sort cmp (x # xs) = insort cmp x (sort cmp xs)" by (simp_all add: sort_def) lemma mset_sort [simp]: "mset (sort cmp xs) = mset xs" by (induction xs) simp_all lemma length_sort [simp]: "length (sort cmp xs) = length xs" by (induction xs) simp_all lemma sorted_sort [simp]: "sorted cmp (sort cmp xs)" by (induction xs) (simp_all add: sorted_insort) lemma stable_sort: "stable_segment cmp x (sort cmp xs) = stable_segment cmp x xs" by (induction xs) (simp_all add: stable_insort_equiv stable_insort_not_equiv) lemma sort_remove1_eq [simp]: "sort cmp (remove1 x xs) = remove1 x (sort cmp xs)" by (induction xs) simp_all lemma set_insort [simp]: "set (insort cmp x xs) = insert x (set xs)" by (induction xs) auto lemma set_sort [simp]: "set (sort cmp xs) = set xs" by (induction xs) auto lemma sort_eqI: "sort cmp ys = xs" if permutation: "mset ys = mset xs" and sorted: "sorted cmp xs" and stable: "⋀y. y ∈ set ys ⟹ stable_segment cmp y ys = stable_segment cmp y xs" proof - have stable': "stable_segment cmp y ys = stable_segment cmp y xs" for y proof (cases "∃x∈set ys. compare cmp y x = Equiv") case True then obtain z where "z ∈ set ys" and "compare cmp y z = Equiv" by auto then have "compare cmp y x = Equiv ⟷ compare cmp z x = Equiv" for x by (meson compare.sym compare.trans_equiv) moreover have "stable_segment cmp z ys = stable_segment cmp z xs" using ‹z ∈ set ys› by (rule stable) ultimately show ?thesis by simp next case False moreover from permutation have "set ys = set xs" by (rule mset_eq_setD) ultimately show ?thesis by simp qed show ?thesis using sorted permutation stable' proof (induction xs arbitrary: ys rule: sorted_induct_remove1) case Nil then show ?case by simp next case (minimum x xs) from ‹mset ys = mset xs› have ys: "set ys = set xs" by (rule mset_eq_setD) then have "compare cmp x y ≠ Greater" if "y ∈ set ys" for y using that minimum.hyps by simp from minimum.prems have stable: "stable_segment cmp x ys = stable_segment cmp x xs" by simp have "sort cmp (remove1 x ys) = remove1 x xs" by (rule minimum.IH) (simp_all add: minimum.prems filter_remove1) then have "remove1 x (sort cmp ys) = remove1 x xs" by simp then have "insort cmp x (remove1 x (sort cmp ys)) = insort cmp x (remove1 x xs)" by simp also from minimum.hyps ys stable have "insort cmp x (remove1 x (sort cmp ys)) = sort cmp ys" by (simp add: stable_sort insort_remove1_same_eq) also from minimum.hyps have "insort cmp x (remove1 x xs) = xs" by (simp add: insort_remove1_same_eq) finally show ?case . qed qed lemma filter_insort: "filter P (insort cmp x xs) = insort cmp x (filter P xs)" if "sorted cmp xs" and "P x" using that by (induction xs) (auto simp add: compare.trans_not_greater insort_eq_ConsI) lemma filter_insort_triv: "filter P (insort cmp x xs) = filter P xs" if "¬ P x" using that by (induction xs) simp_all lemma filter_sort: "filter P (sort cmp xs) = sort cmp (filter P xs)" by (induction xs) (auto simp add: filter_insort filter_insort_triv) section ‹Alternative sorting algorithms› subsection ‹Quicksort› definition quicksort :: "'a comparator ⇒ 'a list ⇒ 'a list" where quicksort_is_sort [simp]: "quicksort = sort" lemma sort_by_quicksort: "sort = quicksort" by simp lemma sort_by_quicksort_rec: "sort cmp xs = sort cmp [x←xs. compare cmp x (xs ! (length xs div 2)) = Less] @ stable_segment cmp (xs ! (length xs div 2)) xs @ sort cmp [x←xs. compare cmp x (xs ! (length xs div 2)) = Greater]" (is "_ = ?rhs") proof (rule sort_eqI) show "mset xs = mset ?rhs" by (rule multiset_eqI) (auto simp add: compare.sym intro: comp.exhaust) next show "sorted cmp ?rhs" by (auto simp add: sorted_append_iff sorted_stable_segment compare.equiv_subst_right dest: compare.trans_greater) next let ?pivot = "xs ! (length xs div 2)" fix l have "compare cmp x ?pivot = comp ∧ compare cmp l x = Equiv ⟷ compare cmp l ?pivot = comp ∧ compare cmp l x = Equiv" for x comp proof - have "compare cmp x ?pivot = comp ⟷ compare cmp l ?pivot = comp" if "compare cmp l x = Equiv" using that by (simp add: compare.equiv_subst_left compare.sym) then show ?thesis by blast qed then show "stable_segment cmp l xs = stable_segment cmp l ?rhs" by (simp add: stable_sort compare.sym [of _ ?pivot]) (cases "compare cmp l ?pivot", simp_all) qed context begin qualified definition partition :: "'a comparator ⇒ 'a ⇒ 'a list ⇒ 'a list × 'a list × 'a list" where "partition cmp pivot xs = ([x ← xs. compare cmp x pivot = Less], stable_segment cmp pivot xs, [x ← xs. compare cmp x pivot = Greater])" qualified lemma partition_code [code]: "partition cmp pivot [] = ([], [], [])" "partition cmp pivot (x # xs) = (let (lts, eqs, gts) = partition cmp pivot xs in case compare cmp x pivot of Less ⇒ (x # lts, eqs, gts) | Equiv ⇒ (lts, x # eqs, gts) | Greater ⇒ (lts, eqs, x # gts))" using comp.exhaust by (auto simp add: partition_def Let_def compare.sym [of _ pivot]) lemma quicksort_code [code]: "quicksort cmp xs = (case xs of [] ⇒ [] | [x] ⇒ xs | [x, y] ⇒ (if compare cmp x y ≠ Greater then xs else [y, x]) | _ ⇒ let (lts, eqs, gts) = partition cmp (xs ! (length xs div 2)) xs in quicksort cmp lts @ eqs @ quicksort cmp gts)" proof (cases "length xs ≥ 3") case False then have "length xs ∈ {0, 1, 2}" by (auto simp add: not_le le_less less_antisym) then consider "xs = []" | x where "xs = [x]" | x y where "xs = [x, y]" by (auto simp add: length_Suc_conv numeral_2_eq_2) then show ?thesis by cases simp_all next case True then obtain x y z zs where "xs = x # y # z # zs" by (metis le_0_eq length_0_conv length_Cons list.exhaust not_less_eq_eq numeral_3_eq_3) moreover have "quicksort cmp xs = (let (lts, eqs, gts) = partition cmp (xs ! (length xs div 2)) xs in quicksort cmp lts @ eqs @ quicksort cmp gts)" using sort_by_quicksort_rec [of cmp xs] by (simp add: partition_def) ultimately show ?thesis by simp qed end subsection ‹Mergesort› definition mergesort :: "'a comparator ⇒ 'a list ⇒ 'a list" where mergesort_is_sort [simp]: "mergesort = sort" lemma sort_by_mergesort: "sort = mergesort" by simp context fixes cmp :: "'a comparator" begin qualified function merge :: "'a list ⇒ 'a list ⇒ 'a list" where "merge [] ys = ys" | "merge xs [] = xs" | "merge (x # xs) (y # ys) = (if compare cmp x y = Greater then y # merge (x # xs) ys else x # merge xs (y # ys))" by pat_completeness auto qualified termination by lexicographic_order lemma mset_merge: "mset (merge xs ys) = mset xs + mset ys" by (induction xs ys rule: merge.induct) simp_all lemma merge_eq_Cons_imp: "xs ≠ [] ∧ z = hd xs ∨ ys ≠ [] ∧ z = hd ys" if "merge xs ys = z # zs" using that by (induction xs ys rule: merge.induct) (auto split: if_splits) lemma filter_merge: "filter P (merge xs ys) = merge (filter P xs) (filter P ys)" if "sorted cmp xs" and "sorted cmp ys" using that proof (induction xs ys rule: merge.induct) case (1 ys) then show ?case by simp next case (2 xs) then show ?case by simp next case (3 x xs y ys) show ?case proof (cases "compare cmp x y = Greater") case True with 3 have hyp: "filter P (merge (x # xs) ys) = merge (filter P (x # xs)) (filter P ys)" by (simp add: sorted_Cons_imp_sorted) show ?thesis proof (cases "¬ P x ∧ P y") case False with ‹compare cmp x y = Greater› show ?thesis by (auto simp add: hyp) next case True from ‹compare cmp x y = Greater› "3.prems" have *: "compare cmp z y = Greater" if "z ∈ set (filter P xs)" for z using that by (auto dest: compare.trans_not_greater sorted_Cons_imp_not_less) from ‹compare cmp x y = Greater› show ?thesis by (cases "filter P xs") (simp_all add: hyp *) qed next case False with 3 have hyp: "filter P (merge xs (y # ys)) = merge (filter P xs) (filter P (y # ys))" by (simp add: sorted_Cons_imp_sorted) show ?thesis proof (cases "P x ∧ ¬ P y") case False with ‹compare cmp x y ≠ Greater› show ?thesis by (auto simp add: hyp) next case True from ‹compare cmp x y ≠ Greater› "3.prems" have *: "compare cmp x z ≠ Greater" if "z ∈ set (filter P ys)" for z using that by (auto dest: compare.trans_not_greater sorted_Cons_imp_not_less) from ‹compare cmp x y ≠ Greater› show ?thesis by (cases "filter P ys") (simp_all add: hyp *) qed qed qed lemma sorted_merge: "sorted cmp (merge xs ys)" if "sorted cmp xs" and "sorted cmp ys" using that proof (induction xs ys rule: merge.induct) case (1 ys) then show ?case by simp next case (2 xs) then show ?case by simp next case (3 x xs y ys) show ?case proof (cases "compare cmp x y = Greater") case True with 3 have "sorted cmp (merge (x # xs) ys)" by (simp add: sorted_Cons_imp_sorted) then have "sorted cmp (y # merge (x # xs) ys)" proof (rule sorted_ConsI) fix z zs assume "merge (x # xs) ys = z # zs" with 3(4) True show "compare cmp y z ≠ Greater" by (clarsimp simp add: sorted_Cons_imp_sorted dest!: merge_eq_Cons_imp) (auto simp add: compare.asym_greater sorted_Cons_imp_not_less) qed with True show ?thesis by simp next case False with 3 have "sorted cmp (merge xs (y # ys))" by (simp add: sorted_Cons_imp_sorted) then have "sorted cmp (x # merge xs (y # ys))" proof (rule sorted_ConsI) fix z zs assume "merge xs (y # ys) = z # zs" with 3(3) False show "compare cmp x z ≠ Greater" by (clarsimp simp add: sorted_Cons_imp_sorted dest!: merge_eq_Cons_imp) (auto simp add: compare.asym_greater sorted_Cons_imp_not_less) qed with False show ?thesis by simp qed qed lemma merge_eq_appendI: "merge xs ys = xs @ ys" if "⋀x y. x ∈ set xs ⟹ y ∈ set ys ⟹ compare cmp x y ≠ Greater" using that by (induction xs ys rule: merge.induct) simp_all lemma merge_stable_segments: "merge (stable_segment cmp l xs) (stable_segment cmp l ys) = stable_segment cmp l xs @ stable_segment cmp l ys" by (rule merge_eq_appendI) (auto dest: compare.trans_equiv_greater) lemma sort_by_mergesort_rec: "sort cmp xs = merge (sort cmp (take (length xs div 2) xs)) (sort cmp (drop (length xs div 2) xs))" (is "_ = ?rhs") proof (rule sort_eqI) have "mset (take (length xs div 2) xs) + mset (drop (length xs div 2) xs) = mset (take (length xs div 2) xs @ drop (length xs div 2) xs)" by (simp only: mset_append) then show "mset xs = mset ?rhs" by (simp add: mset_merge) next show "sorted cmp ?rhs" by (simp add: sorted_merge) next fix l have "stable_segment cmp l (take (length xs div 2) xs) @ stable_segment cmp l (drop (length xs div 2) xs) = stable_segment cmp l xs" by (simp only: filter_append [symmetric] append_take_drop_id) have "merge (stable_segment cmp l (take (length xs div 2) xs)) (stable_segment cmp l (drop (length xs div 2) xs)) = stable_segment cmp l (take (length xs div 2) xs) @ stable_segment cmp l (drop (length xs div 2) xs)" by (rule merge_eq_appendI) (auto simp add: compare.trans_equiv_greater) also have "… = stable_segment cmp l xs" by (simp only: filter_append [symmetric] append_take_drop_id) finally show "stable_segment cmp l xs = stable_segment cmp l ?rhs" by (simp add: stable_sort filter_merge) qed lemma mergesort_code [code]: "mergesort cmp xs = (case xs of [] ⇒ [] | [x] ⇒ xs | [x, y] ⇒ (if compare cmp x y ≠ Greater then xs else [y, x]) | _ ⇒ let half = length xs div 2; ys = take half xs; zs = drop half xs in merge (mergesort cmp ys) (mergesort cmp zs))" proof (cases "length xs ≥ 3") case False then have "length xs ∈ {0, 1, 2}" by (auto simp add: not_le le_less less_antisym) then consider "xs = []" | x where "xs = [x]" | x y where "xs = [x, y]" by (auto simp add: length_Suc_conv numeral_2_eq_2) then show ?thesis by cases simp_all next case True then obtain x y z zs where "xs = x # y # z # zs" by (metis le_0_eq length_0_conv length_Cons list.exhaust not_less_eq_eq numeral_3_eq_3) moreover have "mergesort cmp xs = (let half = length xs div 2; ys = take half xs; zs = drop half xs in merge (mergesort cmp ys) (mergesort cmp zs))" using sort_by_mergesort_rec [of xs] by (simp add: Let_def) ultimately show ?thesis by simp qed end end