# Theory Koepf_Duermuth_Countermeasure

```(* Author: Johannes Hölzl, TU München *)

section ‹Formalization of a Countermeasure by Koepf \& Duermuth 2009›

theory Koepf_Duermuth_Countermeasure
imports "HOL-Probability.Information"
begin

lemma SIGMA_image_vimage:
"snd ` (SIGMA x:f`X. f -` {x} ∩ X) = X"
by (auto simp: image_iff)

declare inj_split_Cons[simp]

definition extensionalD :: "'b ⇒ 'a set ⇒ ('a ⇒ 'b) set" where
"extensionalD d A = {f. ∀x. x ∉ A ⟶ f x = d}"

lemma extensionalD_empty[simp]: "extensionalD d {} = {λx. d}"
unfolding extensionalD_def by (simp add: set_eq_iff fun_eq_iff)

lemma funset_eq_UN_fun_upd_I:
assumes *: "⋀f. f ∈ F (insert a A) ⟹ f(a := d) ∈ F A"
and **: "⋀f. f ∈ F (insert a A) ⟹ f a ∈ G (f(a:=d))"
and ***: "⋀f x. ⟦ f ∈ F A ; x ∈ G f ⟧ ⟹ f(a:=x) ∈ F (insert a A)"
shows "F (insert a A) = (⋃f∈F A. fun_upd f a ` (G f))"
proof safe
fix f assume f: "f ∈ F (insert a A)"
show "f ∈ (⋃f∈F A. fun_upd f a ` G f)"
proof (rule UN_I[of "f(a := d)"])
show "f(a := d) ∈ F A" using *[OF f] .
show "f ∈ fun_upd (f(a:=d)) a ` G (f(a:=d))"
proof (rule image_eqI[of _ _ "f a"])
show "f a ∈ G (f(a := d))" using **[OF f] .
qed simp
qed
next
fix f x assume "f ∈ F A" "x ∈ G f"
from ***[OF this] show "f(a := x) ∈ F (insert a A)" .
qed

lemma extensionalD_insert[simp]:
assumes "a ∉ A"
shows "extensionalD d (insert a A) ∩ (insert a A → B) = (⋃f ∈ extensionalD d A ∩ (A → B). (λb. f(a := b)) ` B)"
apply (rule funset_eq_UN_fun_upd_I)
using assms
by (auto intro!: inj_onI dest: inj_onD split: if_split_asm simp: extensionalD_def)

lemma finite_extensionalD_funcset[simp, intro]:
assumes "finite A" "finite B"
shows "finite (extensionalD d A ∩ (A → B))"
using assms by induct auto

lemma fun_upd_eq_iff: "f(a := b) = g(a := b') ⟷ b = b' ∧ f(a := d) = g(a := d)"
by (auto simp: fun_eq_iff)

lemma card_funcset:
assumes "finite A" "finite B"
shows "card (extensionalD d A ∩ (A → B)) = card B ^ card A"
using ‹finite A› proof induct
case (insert a A) thus ?case unfolding extensionalD_insert[OF ‹a ∉ A›]
proof (subst card_UN_disjoint, safe, simp_all)
show "finite (extensionalD d A ∩ (A → B))" "⋀f. finite (fun_upd f a ` B)"
using ‹finite B› ‹finite A› by simp_all
next
fix f g b b' assume "f ≠ g" and eq: "f(a := b) = g(a := b')" and
ext: "f ∈ extensionalD d A" "g ∈ extensionalD d A"
have "f a = d" "g a = d"
using ext ‹a ∉ A› by (auto simp add: extensionalD_def)
with ‹f ≠ g› eq show False unfolding fun_upd_eq_iff[of _ _ b _ _ d]
by (auto simp: fun_upd_idem fun_upd_eq_iff)
next
{ fix f assume "f ∈ extensionalD d A ∩ (A → B)"
have "card (fun_upd f a ` B) = card B"
proof (auto intro!: card_image inj_onI)
fix b b' assume "f(a := b) = f(a := b')"
from fun_upd_eq_iff[THEN iffD1, OF this] show "b = b'" by simp
qed }
then show "(∑i∈extensionalD d A ∩ (A → B). card (fun_upd i a ` B)) = card B * card B ^ card A"
using insert by simp
qed
qed simp

lemma prod_sum_distrib_lists:
fixes P and S :: "'a set" and f :: "'a ⇒ _::semiring_0" assumes "finite S"
shows "(∑ms∈{ms. set ms ⊆ S ∧ length ms = n ∧ (∀i<n. P i (ms!i))}. ∏x<n. f (ms ! x)) =
(∏i<n. ∑m∈{m∈S. P i m}. f m)"
proof (induct n arbitrary: P)
case 0 then show ?case by (simp cong: conj_cong)
next
case (Suc n)
have *: "{ms. set ms ⊆ S ∧ length ms = Suc n ∧ (∀i<Suc n. P i (ms ! i))} =
(λ(xs, x). x#xs) ` ({ms. set ms ⊆ S ∧ length ms = n ∧ (∀i<n. P (Suc i) (ms ! i))} × {m∈S. P 0 m})"
apply (auto simp: image_iff length_Suc_conv)
apply force+
apply (case_tac i)
by force+
show ?case unfolding *
using Suc[of "λi. P (Suc i)"]
lessThan_Suc_eq_insert_0 prod.reindex sum_distrib_right[symmetric] sum_distrib_left[symmetric])
qed

declare space_point_measure[simp]

declare sets_point_measure[simp]

lemma measure_point_measure:
"finite Ω ⟹ A ⊆ Ω ⟹ (⋀x. x ∈ Ω ⟹ 0 ≤ p x) ⟹
measure (point_measure Ω (λx. ennreal (p x))) A = (∑i∈A. p i)"
unfolding measure_def
by (subst emeasure_point_measure_finite) (auto simp: subset_eq sum_nonneg)

locale finite_information =
fixes Ω :: "'a set"
and p :: "'a ⇒ real"
and b :: real
assumes finite_space[simp, intro]: "finite Ω"
and p_sums_1[simp]: "(∑ω∈Ω. p ω) = 1"
and positive_p[simp, intro]: "⋀x. 0 ≤ p x"
and b_gt_1[simp, intro]: "1 < b"

lemma (in finite_information) positive_p_sum[simp]: "0 ≤ sum p X"
by (auto intro!: sum_nonneg)

sublocale finite_information ⊆ prob_space "point_measure Ω p"
by standard (simp add: one_ereal_def emeasure_point_measure_finite)

sublocale finite_information ⊆ information_space "point_measure Ω p" b
by standard simp

lemma (in finite_information) μ'_eq: "A ⊆ Ω ⟹ prob A = sum p A"
by (auto simp: measure_point_measure)

locale koepf_duermuth = K: finite_information keys K b + M: finite_information messages M b
for b :: real
and keys :: "'key set" and K :: "'key ⇒ real"
and messages :: "'message set" and M :: "'message ⇒ real" +
fixes observe :: "'key ⇒ 'message ⇒ 'observation"
and n :: nat
begin

definition msgs :: "('key × 'message list) set" where
"msgs = keys × {ms. set ms ⊆ messages ∧ length ms = n}"

definition P :: "('key × 'message list) ⇒ real" where
"P = (λ(k, ms). K k * (∏i<n. M (ms ! i)))"

end

sublocale koepf_duermuth ⊆ finite_information msgs P b
proof
show "finite msgs" unfolding msgs_def
using finite_lists_length_eq[OF M.finite_space, of n]
by auto

have [simp]: "⋀A. inj_on (λ(xs, n). n # xs) A" by (force intro!: inj_onI)

note sum_distrib_left[symmetric, simp]
note sum_distrib_right[symmetric, simp]
note sum.cartesian_product'[simp]

have "(∑ms | set ms ⊆ messages ∧ length ms = n. ∏x<length ms. M (ms ! x)) = 1"
proof (induct n)
case 0 then show ?case by (simp cong: conj_cong)
next
case (Suc n)
then show ?case
sum.reindex prod.reindex)
qed
then show "sum P msgs = 1"
unfolding msgs_def P_def by simp
fix x
have "⋀ A f. 0 ≤ (∏x∈A. M (f x))" by (auto simp: prod_nonneg)
then show "0 ≤ P x"
unfolding P_def by (auto split: prod.split simp: zero_le_mult_iff)
qed auto

context koepf_duermuth
begin

definition observations :: "'observation set" where
"observations = (⋃f∈observe ` keys. f ` messages)"

lemma finite_observations[simp, intro]: "finite observations"
unfolding observations_def by auto

definition OB :: "'key × 'message list ⇒ 'observation list" where
"OB = (λ(k, ms). map (observe k) ms)"

definition t :: "'observation list ⇒ 'observation ⇒ nat" where
t_def2: "t seq obs = card { i. i < length seq ∧ seq ! i = obs}"

lemma t_def: "t seq obs = length (filter ((=) obs) seq)"
unfolding t_def2 length_filter_conv_card by (subst eq_commute) simp

lemma card_T_bound: "card ((t∘OB)`msgs) ≤ (n+1)^card observations"
proof -
have "(t∘OB)`msgs ⊆ extensionalD 0 observations ∩ (observations → {.. n})"
unfolding observations_def extensionalD_def OB_def msgs_def
by (auto simp add: t_def comp_def image_iff subset_eq)
with finite_extensionalD_funcset
have "card ((t∘OB)`msgs) ≤ card (extensionalD 0 observations ∩ (observations → {.. n}))"
by (rule card_mono) auto
also have "… = (n + 1) ^ card observations"
by (subst card_funcset) auto
finally show ?thesis .
qed

abbreviation
"p A ≡ sum P A"

abbreviation
"μ ≡ point_measure msgs P"

abbreviation probability ("𝒫'(_') _") where
"𝒫(X) x ≡ measure μ (X -` x ∩ msgs)"

abbreviation joint_probability ("𝒫'(_ ; _') _") where
"𝒫(X ; Y) x ≡ 𝒫(λx. (X x, Y x)) x"

no_notation disj (infixr "|" 30)

abbreviation conditional_probability ("𝒫'(_ | _') _") where
"𝒫(X | Y) x ≡ (𝒫(X ; Y) x) / 𝒫(Y) (snd`x)"

notation
entropy_Pow ("ℋ'( _ ')")

notation
conditional_entropy_Pow ("ℋ'( _ | _ ')")

notation
mutual_information_Pow ("ℐ'( _ ; _ ')")

lemma t_eq_imp_bij_func:
assumes "t xs = t ys"
shows "∃f. bij_betw f {..<length xs} {..<length ys} ∧ (∀i<length xs. xs ! i = ys ! (f i))"
proof -
from assms have ‹mset xs = mset ys›
using assms by (simp add: fun_eq_iff t_def multiset_eq_iff flip: count_mset)
then obtain p where ‹p permutes {..<length ys}› ‹permute_list p ys = xs›
by (rule mset_eq_permutation)
then have ‹bij_betw p {..<length xs} {..<length ys}› ‹∀i<length xs. xs ! i = ys ! p i›
by (auto dest: permutes_imp_bij simp add: permute_list_nth)
then show ?thesis
by blast
qed

lemma 𝒫_k: assumes "k ∈ keys" shows "𝒫(fst) {k} = K k"
proof -
from assms have *:
"fst -` {k} ∩ msgs = {k}×{ms. set ms ⊆ messages ∧ length ms = n}"
unfolding msgs_def by auto
show "(𝒫(fst) {k}) = K k"
using prod_sum_distrib_lists[OF M.finite_space, of M n "λx x. True"] ‹k ∈ keys›
by (auto simp add: sum_distrib_left[symmetric] subset_eq prod.neutral_const)
qed

lemma fst_image_msgs[simp]: "fst`msgs = keys"
proof -
from M.not_empty obtain m where "m ∈ messages" by auto
then have *: "{m. set m ⊆ messages ∧ length m = n} ≠ {}"
by (auto intro!: exI[of _ "replicate n m"])
then show ?thesis
unfolding msgs_def fst_image_times if_not_P[OF *] by simp
qed

lemma sum_distribution_cut:
"𝒫(X) {x} = (∑y ∈ Y`space μ. 𝒫(X ; Y) {(x, y)})"
by (subst finite_measure_finite_Union[symmetric])
intro!: arg_cong[where f=prob])

lemma prob_conj_imp1:
"prob ({x. Q x} ∩ msgs) = 0 ⟹ prob ({x. Pr x ∧ Q x} ∩ msgs) = 0"
using finite_measure_mono[of "{x. Pr x ∧ Q x} ∩ msgs" "{x. Q x} ∩ msgs"]
using measure_nonneg[of μ "{x. Pr x ∧ Q x} ∩ msgs"]

lemma prob_conj_imp2:
"prob ({x. Pr x} ∩ msgs) = 0 ⟹ prob ({x. Pr x ∧ Q x} ∩ msgs) = 0"
using finite_measure_mono[of "{x. Pr x ∧ Q x} ∩ msgs" "{x. Pr x} ∩ msgs"]
using measure_nonneg[of μ "{x. Pr x ∧ Q x} ∩ msgs"]

lemma simple_function_finite: "simple_function μ f"

lemma entropy_commute: "ℋ(λx. (X x, Y x)) = ℋ(λx. (Y x, X x))"
apply (subst (1 2) entropy_simple_distributed[OF simple_distributedI[OF simple_function_finite _ refl]])
unfolding space_point_measure
proof -
have eq: "(λx. (X x, Y x)) ` msgs = (λ(x, y). (y, x)) ` (λx. (Y x, X x)) ` msgs"
by auto
show "- (∑x∈(λx. (X x, Y x)) ` msgs. (𝒫(X ; Y) {x}) * log b (𝒫(X ; Y) {x})) =
- (∑x∈(λx. (Y x, X x)) ` msgs. (𝒫(Y ; X) {x}) * log b (𝒫(Y ; X) {x}))"
unfolding eq
apply (subst sum.reindex)
apply (auto simp: vimage_def inj_on_def intro!: sum.cong arg_cong[where f="λx. prob x * log b (prob x)"])
done
qed simp_all

lemma (in -) measure_eq_0I: "A = {} ⟹ measure M A = 0" by simp

lemma conditional_entropy_eq_ce_with_hypothesis:
"ℋ(X | Y) = -(∑y∈Y`msgs. (𝒫(Y) {y}) * (∑x∈X`msgs. (𝒫(X ; Y) {(x,y)}) / (𝒫(Y) {y}) *
log b ((𝒫(X ; Y) {(x,y)}) / (𝒫(Y) {y}))))"
apply (subst conditional_entropy_eq[OF
simple_distributedI[OF simple_function_finite _ refl]
simple_distributedI[OF simple_function_finite _ refl]])
unfolding space_point_measure
proof -
have "- (∑(x, y)∈(λx. (X x, Y x)) ` msgs. (𝒫(X ; Y) {(x, y)}) * log b ((𝒫(X ; Y) {(x, y)}) / (𝒫(Y) {y}))) =
- (∑x∈X`msgs. (∑y∈Y`msgs. (𝒫(X ; Y) {(x, y)}) * log b ((𝒫(X ; Y) {(x, y)}) / (𝒫(Y) {y}))))"
unfolding sum.cartesian_product
apply (intro arg_cong[where f=uminus] sum.mono_neutral_left)
apply (auto simp: vimage_def image_iff intro!: measure_eq_0I)
apply metis
done
also have "… = - (∑y∈Y`msgs. (∑x∈X`msgs. (𝒫(X ; Y) {(x, y)}) * log b ((𝒫(X ; Y) {(x, y)}) / (𝒫(Y) {y}))))"
by (subst sum.swap) rule
also have "… = -(∑y∈Y`msgs. (𝒫(Y) {y}) * (∑x∈X`msgs. (𝒫(X ; Y) {(x,y)}) / (𝒫(Y) {y}) * log b ((𝒫(X ; Y) {(x,y)}) / (𝒫(Y) {y}))))"
by (auto simp add: sum_distrib_left vimage_def intro!: sum.cong prob_conj_imp1)
finally show "- (∑(x, y)∈(λx. (X x, Y x)) ` msgs. (𝒫(X ; Y) {(x, y)}) * log b ((𝒫(X ; Y) {(x, y)}) / (𝒫(Y) {y}))) =
-(∑y∈Y`msgs. (𝒫(Y) {y}) * (∑x∈X`msgs. (𝒫(X ; Y) {(x,y)}) / (𝒫(Y) {y}) * log b ((𝒫(X ; Y) {(x,y)}) / (𝒫(Y) {y}))))" .
qed simp_all

lemma ce_OB_eq_ce_t: "ℐ(fst ; OB) = ℐ(fst ; t∘OB)"
proof -
txt ‹Lemma 2›

{ fix k obs obs'
assume "k ∈ keys" "K k ≠ 0" and obs': "obs' ∈ OB ` msgs" and obs: "obs ∈ OB ` msgs"
assume "t obs = t obs'"
from t_eq_imp_bij_func[OF this]
obtain t_f where "bij_betw t_f {..<n} {..<n}" and
obs_t_f: "⋀i. i<n ⟹ obs!i = obs' ! t_f i"
using obs obs' unfolding OB_def msgs_def by auto
then have t_f: "inj_on t_f {..<n}" "{..<n} = t_f`{..<n}" unfolding bij_betw_def by auto

{ fix obs assume "obs ∈ OB`msgs"
then have **: "⋀ms. length ms = n ⟹ OB (k, ms) = obs ⟷ (∀i<n. observe k (ms!i) = obs ! i)"
unfolding OB_def msgs_def by (simp add: image_iff list_eq_iff_nth_eq)

have "(𝒫(OB ; fst) {(obs, k)}) / K k =
p ({k}×{ms. (k,ms) ∈ msgs ∧ OB (k,ms) = obs}) / K k"
apply (simp add: μ'_eq) by (auto intro!: arg_cong[where f=p])
also have "… =
(∏i<n. ∑m∈{m∈messages. observe k m = obs ! i}. M m)"
unfolding P_def using ‹K k ≠ 0› ‹k ∈ keys›
apply (simp add: sum.cartesian_product' sum_divide_distrib msgs_def ** cong: conj_cong)
apply (subst prod_sum_distrib_lists[OF M.finite_space]) ..
finally have "(𝒫(OB ; fst) {(obs, k)}) / K k =
(∏i<n. ∑m∈{m∈messages. observe k m = obs ! i}. M m)" . }
note * = this

have "(𝒫(OB ; fst) {(obs, k)}) / K k = (𝒫(OB ; fst) {(obs', k)}) / K k"
unfolding *[OF obs] *[OF obs']
using t_f(1) obs_t_f by (subst (2) t_f(2)) (simp add: prod.reindex)
then have "(𝒫(OB ; fst) {(obs, k)}) = (𝒫(OB ; fst) {(obs', k)})"
using ‹K k ≠ 0› by auto }
note t_eq_imp = this

let ?S = "λobs. t -`{t obs} ∩ OB`msgs"
{ fix k obs assume "k ∈ keys" "K k ≠ 0" and obs: "obs ∈ OB`msgs"
have *: "((λx. (t (OB x), fst x)) -` {(t obs, k)} ∩ msgs) =
(⋃obs'∈?S obs. ((λx. (OB x, fst x)) -` {(obs', k)} ∩ msgs))" by auto
have df: "disjoint_family_on (λobs'. (λx. (OB x, fst x)) -` {(obs', k)} ∩ msgs) (?S obs)"
unfolding disjoint_family_on_def by auto
have "𝒫(t∘OB ; fst) {(t obs, k)} = (∑obs'∈?S obs. 𝒫(OB ; fst) {(obs', k)})"
unfolding comp_def
using finite_measure_finite_Union[OF _ _ df]
by (auto simp add: * intro!: sum_nonneg)
also have "(∑obs'∈?S obs. 𝒫(OB ; fst) {(obs', k)}) = real (card (?S obs)) * 𝒫(OB ; fst) {(obs, k)}"
by (simp add: t_eq_imp[OF ‹k ∈ keys› ‹K k ≠ 0› obs])
finally have "𝒫(t∘OB ; fst) {(t obs, k)} = real (card (?S obs)) * 𝒫(OB ; fst) {(obs, k)}" .}
note P_t_eq_P_OB = this

{ fix k obs assume "k ∈ keys" and obs: "obs ∈ OB`msgs"
have "𝒫(t∘OB | fst) {(t obs, k)} =
real (card (t -` {t obs} ∩ OB ` msgs)) * 𝒫(OB | fst) {(obs, k)}"
using 𝒫_k[OF ‹k ∈ keys›] P_t_eq_P_OB[OF ‹k ∈ keys› _ obs] by auto }
note CP_t_K = this

{ fix k obs assume "k ∈ keys" and obs: "obs ∈ OB`msgs"
then have "t -`{t obs} ∩ OB`msgs ≠ {}" (is "?S ≠ {}") by auto
then have "real (card ?S) ≠ 0" by auto

have "𝒫(fst | t∘OB) {(k, t obs)} = 𝒫(t∘OB | fst) {(t obs, k)} * 𝒫(fst) {k} / 𝒫(t∘OB) {t obs}"
using finite_measure_mono[of "{x. fst x = k ∧ t (OB x) = t obs} ∩ msgs" "{x. fst x = k} ∩ msgs"]
using measure_nonneg[of μ "{x. fst x = k ∧ t (OB x) = t obs} ∩ msgs"]
by (auto simp add: vimage_def conj_commute subset_eq simp del: measure_nonneg)
also have "(𝒫(t∘OB) {t obs}) = (∑k'∈keys. (𝒫(t∘OB|fst) {(t obs, k')}) * (𝒫(fst) {k'}))"
using finite_measure_mono[of "{x. t (OB x) = t obs ∧ fst x = k} ∩ msgs" "{x. fst x = k} ∩ msgs"]
using measure_nonneg[of μ "{x. fst x = k ∧ t (OB x) = t obs} ∩ msgs"]
apply (simp add: sum_distribution_cut[of "t∘OB" "t obs" fst])
apply (auto intro!: sum.cong simp: subset_eq vimage_def prob_conj_imp1)
done
also have "𝒫(t∘OB | fst) {(t obs, k)} * 𝒫(fst) {k} / (∑k'∈keys. 𝒫(t∘OB|fst) {(t obs, k')} * 𝒫(fst) {k'}) =
𝒫(OB | fst) {(obs, k)} * 𝒫(fst) {k} / (∑k'∈keys. 𝒫(OB|fst) {(obs, k')} * 𝒫(fst) {k'})"
using CP_t_K[OF ‹k∈keys› obs] CP_t_K[OF _ obs] ‹real (card ?S) ≠ 0›
by (simp only: sum_distrib_left[symmetric] ac_simps
mult_divide_mult_cancel_left[OF ‹real (card ?S) ≠ 0›]
cong: sum.cong)
also have "(∑k'∈keys. 𝒫(OB|fst) {(obs, k')} * 𝒫(fst) {k'}) = 𝒫(OB) {obs}"
using sum_distribution_cut[of OB obs fst]
by (auto intro!: sum.cong simp: prob_conj_imp1 vimage_def)
also have "𝒫(OB | fst) {(obs, k)} * 𝒫(fst) {k} / 𝒫(OB) {obs} = 𝒫(fst | OB) {(k, obs)}"
by (auto simp: vimage_def conj_commute prob_conj_imp2)
finally have "𝒫(fst | t∘OB) {(k, t obs)} = 𝒫(fst | OB) {(k, obs)}" . }
note CP_T_eq_CP_O = this

let ?H = "λobs. (∑k∈keys. 𝒫(fst|OB) {(k, obs)} * log b (𝒫(fst|OB) {(k, obs)})) :: real"
let ?Ht = "λobs. (∑k∈keys. 𝒫(fst|t∘OB) {(k, obs)} * log b (𝒫(fst|t∘OB) {(k, obs)})) :: real"

{ fix obs assume obs: "obs ∈ OB`msgs"
have "?H obs = (∑k∈keys. 𝒫(fst|t∘OB) {(k, t obs)} * log b (𝒫(fst|t∘OB) {(k, t obs)}))"
using CP_T_eq_CP_O[OF _ obs]
by simp
then have "?H obs = ?Ht (t obs)" . }
note * = this

have **: "⋀x f A. (∑y∈t-`{x}∩A. f y (t y)) = (∑y∈t-`{x}∩A. f y x)" by auto

{ fix x
have *: "(λx. t (OB x)) -` {t (OB x)} ∩ msgs =
(⋃obs∈?S (OB x). OB -` {obs} ∩ msgs)" by auto
have df: "disjoint_family_on (λobs. OB -` {obs} ∩ msgs) (?S (OB x))"
unfolding disjoint_family_on_def by auto
have "𝒫(t∘OB) {t (OB x)} = (∑obs∈?S (OB x). 𝒫(OB) {obs})"
unfolding comp_def
using finite_measure_finite_Union[OF _ _ df]
by (force simp add: * intro!: sum_nonneg) }
note P_t_sum_P_O = this

txt ‹Lemma 3›
have "ℋ(fst | OB) = -(∑obs∈OB`msgs. 𝒫(OB) {obs} * ?Ht (t obs))"
unfolding conditional_entropy_eq_ce_with_hypothesis using * by simp
also have "… = -(∑obs∈t`OB`msgs. 𝒫(t∘OB) {obs} * ?Ht obs)"
apply (subst SIGMA_image_vimage[symmetric, of "OB`msgs" t])
apply (subst sum.reindex)
apply (fastforce intro!: inj_onI)
apply simp
apply (subst sum.Sigma[symmetric, unfolded split_def])
using finite_space apply fastforce
using finite_space apply fastforce
apply (safe intro!: sum.cong)
using P_t_sum_P_O
by (simp add: sum_divide_distrib[symmetric] field_simps **
sum_distrib_left[symmetric] sum_distrib_right[symmetric])
also have "… = ℋ(fst | t∘OB)"
unfolding conditional_entropy_eq_ce_with_hypothesis
finally show ?thesis
by (subst (1 2) mutual_information_eq_entropy_conditional_entropy) simp_all
qed

theorem "ℐ(fst ; OB) ≤ real (card observations) * log b (real n + 1)"
proof -
have "ℐ(fst ; OB) = ℋ(fst) - ℋ(fst | t∘OB)"
unfolding ce_OB_eq_ce_t
by (rule mutual_information_eq_entropy_conditional_entropy simple_function_finite)+
also have "… = ℋ(t∘OB) - ℋ(t∘OB | fst)"
unfolding entropy_chain_rule[symmetric, OF simple_function_finite simple_function_finite] algebra_simps
by (subst entropy_commute) simp
also have "… ≤ ℋ(t∘OB)"
using conditional_entropy_nonneg[of "t∘OB" fst] by simp
also have "… ≤ log b (real (card ((t∘OB)`msgs)))"
using entropy_le_card[of "t∘OB", OF simple_distributedI[OF simple_function_finite _ refl]] by simp
also have "… ≤ log b (real (n + 1)^card observations)"
using card_T_bound not_empty
by (auto intro!: log_mono simp: card_gt_0_iff of_nat_power [symmetric] simp del: of_nat_power of_nat_Suc)
also have "… = real (card observations) * log b (real n + 1)"