Correctness proof for common subexpression elimination.
Require Import Coqlib.
Require Import Maps.
Require Import Ast.
Require Import Integers.
Require Import Pointers.
Require Import Floats.
Require Import Values.
Require Import Mem.
Require Import Events.
Require Import Globalenvs.
Require Import Op.
Require Import Registers.
Require Import RTL.
Require Import Kildall.
Require Import CSE.
Require Import Memcomp Traces.
Require Import Simulations MCsimulation.
Require Import Libtactics.
Semantic properties of value numberings
Well-formedness of numberings
A numbering is well-formed if all registers mentioned in equations
are less than the ``next'' register number given in the numbering.
This guarantees that the next register is fresh with respect to
the equations.
Definition wf_rhs (
next:
valnum) (
rh:
rhs) :
Prop :=
match rh with
|
Op op vl =>
forall v,
In v vl ->
Plt v next
end.
Definition wf_equation (
next:
valnum) (
vr:
valnum) (
rh:
rhs) :
Prop :=
Plt vr next /\
wf_rhs next rh.
Definition wf_numbering (
n:
numbering) :
Prop :=
(
forall v rh,
In (
v,
rh)
n.(
num_eqs) ->
wf_equation n.(
num_next)
v rh)
/\
(
forall r v,
PTree.get r n.(
num_reg) =
Some v ->
Plt v n.(
num_next)).
Lemma wf_empty_numbering:
wf_numbering empty_numbering.
Proof.
unfold empty_numbering;
split;
simpl;
intros.
elim H.
rewrite PTree.gempty in H.
congruence.
Qed.
Lemma wf_rhs_increasing:
forall next1 next2 rh,
Ple next1 next2 ->
wf_rhs next1 rh ->
wf_rhs next2 rh.
Proof.
intros;
destruct rh;
simpl;
intros;
apply Plt_Ple_trans with next1;
auto.
Qed.
Lemma wf_equation_increasing:
forall next1 next2 vr rh,
Ple next1 next2 ->
wf_equation next1 vr rh ->
wf_equation next2 vr rh.
Proof.
We now show that all operations over numberings
preserve well-formedness.
Lemma wf_valnum_reg:
forall n r n'
v,
wf_numbering n ->
valnum_reg n r = (
n',
v) ->
wf_numbering n' /\
Plt v n'.(
num_next) /\
Ple n.(
num_next)
n'.(
num_next).
Proof.
Lemma wf_valnum_regs:
forall rl n n'
vl,
wf_numbering n ->
valnum_regs n rl = (
n',
vl) ->
wf_numbering n' /\
(
forall v,
In v vl ->
Plt v n'.(
num_next)) /\
Ple n.(
num_next)
n'.(
num_next).
Proof.
induction rl;
intros.
simpl in H0.
inversion H0.
subst n';
subst vl.
simpl.
intuition.
simpl in H0.
caseEq (
valnum_reg n a).
intros n1 v1 EQ1.
caseEq (
valnum_regs n1 rl).
intros ns vs EQS.
rewrite EQ1 in H0;
rewrite EQS in H0;
simpl in H0.
inversion H0.
subst n';
subst vl.
generalize (
wf_valnum_reg _ _ _ _ H EQ1);
intros [
A1 [
B1 C1]].
generalize (
IHrl _ _ _ A1 EQS);
intros [
As [
Bs Cs]].
split.
auto.
split.
simpl;
intros.
elim H1;
intro.
subst v.
eapply Plt_Ple_trans;
eauto.
auto.
eapply Ple_trans;
eauto.
Qed.
Lemma find_valnum_rhs_correct:
forall rh vn eqs,
find_valnum_rhs rh eqs =
Some vn ->
In (
vn,
rh)
eqs.
Proof.
induction eqs;
simpl.
congruence.
case a;
intros v r'.
case (
eq_rhs rh r');
intro.
intro.
left.
congruence.
intro.
right.
auto.
Qed.
Lemma wf_add_rhs:
forall n rd rh,
wf_numbering n ->
wf_rhs n.(
num_next)
rh ->
wf_numbering (
add_rhs n rd rh).
Proof.
Lemma wf_add_op:
forall n rd op rs,
wf_numbering n ->
wf_numbering (
add_op n rd op rs).
Proof.
Lemma wf_add_unknown:
forall n rd,
wf_numbering n ->
wf_numbering (
add_unknown n rd).
Proof.
Lemma wf_transfer:
forall f pc n,
wf_numbering n ->
wf_numbering (
transfer f pc n).
Proof.
As a consequence, the numberings computed by the static analysis
are well formed.
Theorem wf_analyze:
forall f pc,
wf_numbering (
analyze f)!!
pc.
Proof.
Properties of satisfiability of numberings
Module ValnumEq.
Definition t :=
valnum.
Definition eq :=
peq.
End ValnumEq.
Module VMap :=
EMap(
ValnumEq).
Section SATISFIABILITY.
Variable ge:
genv.
Variable sp:
option pointer.
Agremment between two mappings from value numbers to values
up to a given value number.
Definition valu_agree (
valu1 valu2:
valnum ->
val) (
upto:
valnum) :
Prop :=
forall v,
Plt v upto ->
valu2 v =
valu1 v.
Lemma valu_agree_refl:
forall valu upto,
valu_agree valu valu upto.
Proof.
intros; red; auto.
Qed.
Lemma valu_agree_trans:
forall valu1 valu2 valu3 upto12 upto23,
valu_agree valu1 valu2 upto12 ->
valu_agree valu2 valu3 upto23 ->
Ple upto12 upto23 ->
valu_agree valu1 valu3 upto12.
Proof.
intros;
red;
intros.
transitivity (
valu2 v).
apply H0.
apply Plt_Ple_trans with upto12;
auto.
apply H;
auto.
Qed.
Lemma valu_agree_list:
forall valu1 valu2 upto vl,
valu_agree valu1 valu2 upto ->
(
forall v,
In v vl ->
Plt v upto) ->
map valu2 vl =
map valu1 vl.
Proof.
The numbering_holds predicate (defined in file CSE) is
extensional with respect to valu_agree.
Lemma numbering_holds_exten:
forall valu1 valu2 n rs,
valu_agree valu1 valu2 n.(
num_next) ->
wf_numbering n ->
numbering_holds valu1 ge sp rs n ->
numbering_holds valu2 ge sp rs n.
Proof.
intros.
inversion H0.
inversion H1.
split;
intros.
generalize (
H2 _ _ H6).
intro WFEQ.
generalize (
H4 _ _ H6).
unfold equation_holds;
destruct rh.
elim WFEQ;
intros.
rewrite (
valu_agree_list valu1 valu2 n.(
num_next)).
rewrite H.
auto.
auto.
auto.
exact H8.
rewrite H.
auto.
eauto.
Qed.
valnum_reg and valnum_regs preserve the numbering_holds predicate.
Moreover, it is always the case that the returned value number has
the value of the given register in the final assignment of values to
value numbers.
Lemma valnum_reg_holds:
forall valu1 rs n r n'
v,
wf_numbering n ->
numbering_holds valu1 ge sp rs n ->
valnum_reg n r = (
n',
v) ->
exists valu2,
numbering_holds valu2 ge sp rs n' /\
valu2 v =
rs#
r /\
valu_agree valu1 valu2 n.(
num_next).
Proof.
intros until v.
unfold valnum_reg.
caseEq (
n.(
num_reg)!
r).
intros.
inversion H2.
subst n';
subst v0.
inversion H1.
exists valu1.
split.
auto.
split.
symmetry.
auto.
apply valu_agree_refl.
intros.
inversion H2.
subst n'.
subst v.
inversion H1.
set (
valu2 :=
VMap.set n.(
num_next)
rs#
r valu1).
assert (
AG:
valu_agree valu1 valu2 n.(
num_next)).
red;
intros.
unfold valu2.
apply VMap.gso.
auto with coqlib.
elim (
numbering_holds_exten _ _ _ _ AG H0 H1);
intros.
exists valu2.
split.
split;
simpl;
intros.
auto.
unfold valu2,
VMap.set,
ValnumEq.eq.
rewrite PTree.gsspec in H7.
destruct (
peq r0 r).
inversion H7.
rewrite peq_true.
congruence.
case (
peq vn (
num_next n));
intro.
inversion H0.
generalize (
H9 _ _ H7).
rewrite e.
intro.
elim (
Plt_strict _ H10).
auto.
split.
unfold valu2.
apply VMap.gss.
auto.
Qed.
Lemma valnum_regs_holds:
forall rs rl valu1 n n'
vl,
wf_numbering n ->
numbering_holds valu1 ge sp rs n ->
valnum_regs n rl = (
n',
vl) ->
exists valu2,
numbering_holds valu2 ge sp rs n' /\
List.map valu2 vl =
rs##
rl /\
valu_agree valu1 valu2 n.(
num_next).
Proof.
induction rl;
simpl;
intros.
inversion H1;
subst n';
subst vl.
exists valu1.
split.
auto.
split.
reflexivity.
apply valu_agree_refl.
caseEq (
valnum_reg n a);
intros n1 v1 EQ1.
caseEq (
valnum_regs n1 rl);
intros ns vs EQs.
rewrite EQ1 in H1;
rewrite EQs in H1.
inversion H1.
subst vl;
subst n'.
generalize (
valnum_reg_holds _ _ _ _ _ _ H H0 EQ1).
intros [
valu2 [
A [
B C]]].
generalize (
wf_valnum_reg _ _ _ _ H EQ1).
intros [
D [
E F]].
generalize (
IHrl _ _ _ _ D A EQs).
intros [
valu3 [
P [
Q R]]].
exists valu3.
split.
auto.
split.
simpl.
rewrite R.
congruence.
auto.
eapply valu_agree_trans;
eauto.
Qed.
A reformulation of the equation_holds predicate in terms
of the value to which a right-hand side of an equation evaluates.
Definition rhs_evals_to
(
valu:
valnum ->
val) (
rh:
rhs) (
v:
val) :
Prop :=
match rh with
|
Op op vl =>
eval_operation ge sp op (
List.map valu vl) =
Some v
end.
Lemma equation_evals_to_holds_1:
forall valu rh v vres,
rhs_evals_to valu rh v ->
equation_holds valu ge sp vres rh ->
valu vres =
v.
Proof.
intros until vres. unfold rhs_evals_to, equation_holds.
destruct rh. congruence.
Qed.
Lemma equation_evals_to_holds_2:
forall valu rh v vres,
wf_rhs vres rh ->
rhs_evals_to valu rh v ->
equation_holds (
VMap.set vres v valu)
ge sp vres rh.
Proof.
The numbering obtained by adding an equation rd = rhs is satisfiable
in a concrete register set where rd is set to the value of rhs.
Lemma add_rhs_satisfiable:
forall n rh valu1 rs rd v,
wf_numbering n ->
wf_rhs n.(
num_next)
rh ->
numbering_holds valu1 ge sp rs n ->
rhs_evals_to valu1 rh v ->
numbering_satisfiable ge sp (
rs#
rd <-
v) (
add_rhs n rd rh).
Proof.
add_op returns a numbering that is satisfiable in the register
set after execution of the corresponding Iop instruction.
Lemma add_op_satisfiable:
forall n rs op args dst v,
wf_numbering n ->
numbering_satisfiable ge sp rs n ->
eval_operation ge sp op rs##
args =
Some v ->
numbering_satisfiable ge sp (
rs#
dst <-
v) (
add_op n dst op args).
Proof.
add_unknown returns a numbering that is satisfiable in the
register set after setting the target register to any value.
Lemma add_unknown_satisfiable:
forall n rs dst v,
wf_numbering n ->
numbering_satisfiable ge sp rs n ->
numbering_satisfiable ge sp
(
rs#
dst <-
v) (
add_unknown n dst).
Proof.
Correctness of reg_valnum: if it returns a register r,
that register correctly maps back to the given value number.
Lemma reg_valnum_correct:
forall n v r,
reg_valnum n v =
Some r ->
n.(
num_reg)!
r =
Some v.
Proof.
intros until r.
unfold reg_valnum.
rewrite PTree.fold_spec.
assert(
forall l acc0,
List.fold_left
(
fun (
acc:
option reg) (
p:
reg *
valnum) =>
if peq (
snd p)
v then Some (
fst p)
else acc)
l acc0 =
Some r ->
In (
r,
v)
l \/
acc0 =
Some r).
induction l;
simpl.
intros.
tauto.
case a;
simpl;
intros r1 v1 acc0 FL.
generalize (
IHl _ FL).
case (
peq v1 v);
intro.
subst v1.
intros [
A|
B].
tauto.
inversion B;
subst r1.
tauto.
tauto.
intro.
elim (
H _ _ H0);
intro.
apply PTree.elements_complete;
auto.
discriminate.
Qed.
Correctness of find_op and find_load: if successful and in a
satisfiable numbering, the returned register does contain the
result value of the operation or memory load.
Lemma find_rhs_correct:
forall valu rs n rh r,
numbering_holds valu ge sp rs n ->
find_rhs n rh =
Some r ->
rhs_evals_to valu rh rs#
r.
Proof.
Lemma find_op_correct:
forall rs n op args r,
wf_numbering n ->
numbering_satisfiable ge sp rs n ->
find_op n op args =
Some r ->
eval_operation ge sp op rs##
args =
Some rs#
r.
Proof.
End SATISFIABILITY.
The numberings associated to each instruction by the static analysis
are inductively satisfiable, in the following sense: the numbering
at the function entry point is satisfiable, and for any RTL execution
from pc to pc', satisfiability at pc implies
satisfiability at pc'.
Theorem analysis_correct_1:
forall ge sp rs f pc pc'
i,
f.(
fn_code)!
pc =
Some i ->
In pc' (
successors_instr i) ->
numbering_satisfiable ge sp rs (
transfer f pc (
analyze f)!!
pc) ->
numbering_satisfiable ge sp rs (
analyze f)!!
pc'.
Proof.
Theorem analysis_correct_entry:
forall ge sp rs f,
numbering_satisfiable ge sp rs (
analyze f)!!(
f.(
fn_entrypoint)).
Proof.
Semantic preservation
Definition genv_rel :
genv ->
genv ->
Prop :=
(
fun x y =>
Genv.genv_match (
fun a b =>
transf_fundef a =
b)
y x).
Section PRESERVATION.
Variables (
ge tge :
genv).
Hypothesis TRANSF:
genv_rel ge tge.
Let lts := (
mklts thread_labels (
rtl_step ge)).
Let tlts := (
mklts thread_labels (
rtl_step tge)).
Lemma function_ptr_translated:
forall v f,
Genv.find_funct_ptr ge v =
Some f ->
Genv.find_funct_ptr tge v =
Some (
transf_fundef f).
Proof.
Lemma functions_translated:
forall v f,
Genv.find_funct ge v =
Some f ->
Genv.find_funct tge v =
Some (
transf_fundef f).
Proof.
Lemma symbols_preserved:
forall id,
Genv.find_symbol tge id =
Genv.find_symbol ge id.
Proof.
by intros;
destruct TRANSF. Qed.
Lemma sig_preserved:
forall f,
funsig (
transf_fundef f) =
funsig f.
Proof.
by intros [|]. Qed.
Lemma find_function_translated:
forall ros ls f,
find_function ge ros ls =
Some f ->
find_function tge ros ls =
Some (
transf_fundef f).
Proof.
The proof of semantic preservation is a simulation argument using
diagrams of the following form:
st1 --------------- st2
| |
t| |t
| |
v v
st1'--------------- st2'
Left: RTL execution in the original program. Right: RTL execution in
the optimized program. Precondition (top) and postcondition (bottom):
agreement between the states, including the fact that
the numbering at
pc (returned by the static analysis) is satisfiable.
Inductive match_stackframes:
stackframe ->
stackframe ->
Prop :=
match_stackframes_intro:
forall res c sp pc rs f,
c =
f.(
RTL.fn_code) ->
(
forall v,
numbering_satisfiable ge sp (
rs#
res <-
v) (
analyze f)!!
pc) ->
match_stackframes
(
Stackframe res c sp pc rs)
(
Stackframe res (
transf_code (
analyze f)
c)
sp pc rs).
Inductive match_states:
state ->
state ->
Prop :=
|
match_states_intro:
forall s c sp pc rs s'
f
(
CF:
c =
f.(
RTL.fn_code))
(
SAT:
numbering_satisfiable ge sp rs (
analyze f)!!
pc)
(
STACKS:
list_forall2 match_stackframes s s'),
match_states (
State s c sp pc rs)
(
State s' (
transf_code (
analyze f)
c)
sp pc rs)
|
match_states_call:
forall s f args s',
list_forall2 match_stackframes s s' ->
match_states (
Callstate s f args)
(
Callstate s' (
transf_fundef f)
args)
|
match_states_return:
forall s s'
v,
list_forall2 match_stackframes s s' ->
match_states (
Returnstate s v)
(
Returnstate s'
v)
|
match_states_blocked:
forall s s'
v,
list_forall2 match_stackframes s s' ->
match_states (
Blockedstate s v)
(
Blockedstate s'
v).
Ltac TransfInstr :=
match goal with
|
H1: (
PTree.get ?
pc ?
c =
Some ?
instr),
f:
function |-
_ =>
cut ((
transf_code (
analyze f)
c)!
pc =
Some(
transf_instr (
analyze f)!!
pc instr));
[
simpl
|
unfold transf_code;
rewrite PTree.gmap;
unfold option_map;
rewrite H1;
reflexivity ]
end.
The proof of simulation is a case analysis over the transition
in the source code.
Lemma transf_step_correct:
forall s1 t s2,
rtl_step ge s1 t s2 ->
forall s1' (
MS:
match_states s1 s1'),
exists s2',
rtl_step tge s1'
t s2' /\
match_states s2 s2'.
Proof.
Lemma init_sim_succ:
forall {
p vals tinit},
rtl_init tge p vals =
Some tinit ->
exists sinit,
rtl_init ge p vals =
Some sinit /\
match_states sinit tinit.
Proof.
intros p vals tinit INIT.
unfold rtl_init in *.
pose proof TRANSF as (
MG &
MF).
specialize (
MF p).
destruct (
Genv.find_funct_ptr tge p)
as [[[]|]|];
try done;
destruct (
Genv.find_funct_ptr ge p)
as [[[]|]|];
try done; [];
simpl in *.
inv MF.
destruct beq_nat;
try done; [].
eexists;
split;
try done;
inv INIT;
vauto.
Qed.
Lemma init_sim_fail:
forall {
p vals},
rtl_init tge p vals =
None ->
rtl_init ge p vals =
None.
Proof.
Definition bsim_rel := @
bsr _ lts tlts match_states.
Definition bsim_order := @
bsc _ tlts.
End PRESERVATION.
Definition cse_match_prg (
p :
rtl_sem.(
SEM_PRG))
(
p' :
rtl_sem.(
SEM_PRG)) :
Prop :=
transf_program p =
p'.
Theorem cse_sim Mm :
Sim.sim Mm Mm rtl_sem rtl_sem cse_match_prg.
Proof.