(* ------------------ It is always possible to build a good SC order ---------------------- *)

open Atomic
open MachineDefTypes
open MachineDefInstructionSemantics
open MachineDefThreadSubsystem
open Intermediate_actions
open Statement
open Power_thread_lemmas

(* ----- Preliminary: Defining the events and relations ----- *)
val t:trace
val actions:c_action set

val sc_actions : action set
let sc_actions = { action_to_i_action a | forall (a IN actions) | is_seq_cst a }

val fr_sc : action reln
let fr_sc = 
    { (r,w) | forall (r IN sc_actions) (w IN sc_actions) 
            | exist w'. (w',r) IN trace_to_rf t && (w',w) IN trace_to_co t }

val erf_sc : action reln
let erf_sc =
    { (w,r) | forall (r IN sc_actions) (w IN sc_actions) 
            | exist w'. (w',r) IN trace_to_rf t && (w,w') IN rc (trace_to_co t) sc_actions && 
                (forall (w'' IN sc_actions). 
                   not ((w,w'') IN trace_to_co t && (w'',w') IN trace_to_co t)) }

val po_sc : action reln
let po_sc = rrestrict (trace_to_po t) sc_actions

val co_sc : action reln
let co_sc = rrestrict (trace_to_co t) sc_actions


(* -------------- Then we have that always, tc (fr_sc union erf_sc union po_sc union co_sc) is acyclic ---------- *)
let acyclic_sc_relns_lemma =
   is_acyclic (tc (fr_sc union erf_sc union po_sc union co_sc))

(*Proof:

Suppose not. Then there is a cycle in tc (fr_sc union erf_sc union po_sc union co_sc).

Case analyze on whether there is any po_sc edges in the resulting cycle:
*)
let case_I = not (is_acyclic (tc (fr_sc union erf_sc union co_sc)))

(*Case I Proof:

Then there are two writes W1 and W2 such that they are related both
ways by tc (co_sc union fr_sc union erf_sc) 
(because if there is a read R involved in a cycle, then by typing, 
there is a W1 --ERF--> R --FR--> W2).

Then W1 and W2 are related both ways by co_sc (since any two writes in
(co_sc union fr_sc union erf_sc) are same location, and by totality of co_sc).

Then the POWER coherence order has a cycle. Then there is a smallest
prefix of the machine trace with the cycle still present, such that
the last transition in the trace created a cycle. Case analyze on that
transition to get the contradiction.

QED for Case I
*)

(* Case II: tc (fr_sc union erf_sc union co_sc) is acyclic, but 
 there is a cycle in tc (fr_sc union erf_sc union po_sc union co_sc) 

This means there is at least one po_sc edge in the cycle.
*)
let case_II = is_acyclic (tc (fr_sc union erf_sc union co_sc))

(*Proof for Case II:

In this case, we have at least one po_sc edge in the cycle.

Pick any such po_sc edge as a distinguished edge 
ia1 --po_sc--> ia2.

Because ia2 is a sc action, from the chart mapping, there must be a 
(hw)sync between ia1 and ia2, and ia1 and ia2 are on the same thread. Thus we have,
in this situation:
*)
val ia1: action
val ia2: action
let _ = (ia1,ia2) IN trace_to_xsync t Sync

(*
We have an easy lemma about the POWER machine trace, that if two same-thread
events are separated by a sync, then there is, in machine trace, a sync-ack between the
commit events of the two actions, and by that sync-ack, if the first event is a write, 
it or a coherence successor has propagated to all threads.
*)
let sync_separated_lemma =
   forall ia1 ia2 t.
      (ia1,ia2) IN trace_to_xsync t Sync -->
      (* 1. *)
      (exist l b. (l = Acknowledge_sync b) &&
          (commit_transition_of t ia1,l) IN trace_to_order t &&
          (l,commit_transition_of t ia2) IN trace_to_order t &&
      (* 2. *)
          (is_write ia1 -->
             (forall tid. exist lprop w'.
                    (lprop,l) IN trace_to_order t &&
                    lprop = Write_propagate_to_thread w' tid &&
                    (ia1, Write (w_aid_comp w'.w_eiid) w'.w_thread w'.w_addr w'.w_value) IN rc (trace_to_co t) (write_actions_of_trace t))))
(*Proof of sync_separated_lemma:

For 1. Since ia1 and ia2 are separated by at least one sync, call such a sync b. 
       By CommitPrevBarrLS premise of the commit transition, b must be acknowledged before ia2 is committed,
       and by CommitMemoryAccessBeforeBarrier premise, ia1 must be committed before b is committed (and b must
       be committed before being acknowledged.

For 2. By PropAllThreads premise of acknowledge sync, b must be propagated to all threads before b can be acknowledged
       By PropGroupA premise of propagating barrier, before b is propagated to a thread, all group A writes (including ia1 in particular),
        or coherence successors thereof, must be propagated to that thread.

QED for sync_separated_lemma
*)

(*Thus we have: *)
val s:barrier 
let _ = s.b_barrier_type = Sync
let _ = (commit_transition_of t ia1,Acknowledge_sync s) IN trace_to_order t
let premise1 = (Acknowledge_sync s,commit_transition_of t ia2) IN trace_to_order t

(*Also, we are in the case*)
let _ = (ia2,ia1) IN tc (fr_sc union erf_sc union po_sc union co_sc) 
(*From which, we show below*)
let premise2 = 
    if is_read ia2 
    then forall (l IN last_satisfy_transition_of t ia2). (l,Acknowledge_sync s) IN trace_to_order t
    else (commit_transition_of t ia2,Acknowledge_sync s) IN trace_to_order t

(* premise1 and premise2 give the required contradiction (since for reads, the last 
satisfy transition is after the sync ack)

QED for Case II (with one goal remaining)
*)

(*Goal remaining to prove -- *)
let goal_remaining_for_sc =
   forall ian. (ian,ia1) IN tc (fr_sc union erf_sc union po_sc union co_sc) -->
                 (if is_read ian then 
                 forall (l IN last_satisfy_transition_of t ian). (l,Acknowledge_sync s) IN trace_to_order t
                 else
                 (commit_transition_of t ian,Acknowledge_sync s) IN trace_to_order t)
(*Proof:
To prove the goal, we will induct on the length of the chain between ian and ia1.
However, we will have to strengthen the inductive hypothesis in two ways.

Thus, the statement we actually prove is the key lemma below, which is easily 
seen to be stronger than (imply) the goal above.

QED for goal (with key lemma remaining)
*)
let key_lemma_for_sc = 
   forall ian. (ian,ia1) IN tc (fr_sc union erf_sc union po_sc union co_sc) -->
               (exist sn ia_intn.
                 (* 1.1 *)
                 (if is_read ian then 
                 forall (l IN last_satisfy_transition_of t ian). (l,Acknowledge_sync sn) IN trace_to_order t
                 else
                 (commit_transition_of t ian,Acknowledge_sync sn) IN trace_to_order t) &&
                 (* 1.2 *)
                 (Acknowledge_sync sn,Acknowledge_sync s) IN rc (trace_to_order t) {nth_label n t | forall n | true} &&
                 (* 2 *)
                 (is_write ian --> 
                       (forall tid. exist lprop w'.
                           (lprop,Acknowledge_sync sn) IN trace_to_order t &&
                           lprop = Write_propagate_to_thread w' tid &&
                           (ia_intn, Write (w_aid_comp w'.w_eiid) w'.w_thread w'.w_addr w'.w_value) IN rc (trace_to_co t) (write_actions_of_trace t))) &&
                 (* 3 *)
                 (commit_transition_of t ia_intn,Acknowledge_sync sn) IN trace_to_order t &&
                 (* 4 *)
                 (get_thread ia_intn = sn.b_thread) &&
                 (* 5 *)
                 (ian,ia_intn) IN rc (tc (co_sc union fr_sc union erf_sc)) (trace_to_actions t))
(*Proof of key lemma:

By induction on the length of (tc (fr_sc union erf_sc union po_sc union co_sc)) edges between (ian,ia1)

Base case:  length 0 (ian = ia1)
Take ia_intn = ian, sn = s

1.1 and 3 is by CommitMemoryAccessBeforeBarrier precondition of commit rule
1.2, 4, and 5 are trivially true
2: By PropAllThreads premise of acknowledge sync, s must be propagated to all threads before s can be acknowledged
   By PropGroupA premise of propagating barrier, before s is propagated to a thread, all group A writes (including ian in particular),
        or coherence successors thereof, must be propagated to that thread.

Inductive case: length m=n+1

Inductive hypotheses:

exist sn ia_intn.

IND1.1: (if is_read ian then 
        forall (l IN last_satisfy_transition_of t ian). (l,Acknowledge_sync sn) IN trace_to_order t
        else
        (commit_transition_of t ian,Acknowledge_sync sn) IN trace_to_order t) &&
IND1.2: (Acknowledge_sync sn,Acknowledge_sync s) IN rc (trace_to_order t) {nth_label n t | forall n | true} &&
IND2:   (is_write ian --> 
              (forall tid. exist lprop w'.
                  (lprop,Acknowledge_sync sn) IN trace_to_order t &&
                  lprop = Write_propagate_to_thread w' tid &&
                  (ia_intn, Write (w_aid_comp w'.w_eiid) w'.w_thread w'.w_addr w'.w_value) IN rc (trace_to_co t) (write_actions_of_trace t))) &&
IND3:   (commit_transition_of t ia_intn,Acknowledge_sync sn) IN trace_to_order t &&
IND4:   (get_thread ia_intn = sn.b_thread) &&
IND5:   (ian,ia_intn) IN rc (tc (co_sc union fr_sc union erf_sc)) (trace_to_actions t))


Case analyze the edge from iam to ian:

Case po_sc:
        Then iam and ian are on the same thread, and there is a sync in between in program order.
        Let sm be that sync, ia_intm = iam.
        1.1 and 3 is by CommitMemoryAccessBeforeBarrier premise of the commit rule
        1.2 is by transitivity of trace order using IND1.1 and IND1.2
        2 is by sync_separated_lemma above
        4 and 5 are by construction

Case co_sc:
        Then iam and ian are two writes to the same location.
        Take sm = sn, ia_intm = ia_intn
        1.2, 2,3,4 are by IND1.2, IND2, IND3, and IND4 respectively
        1.1 is because commit transition is before write propagate transition, using 1.2
        5 is using the new co_sc edge and IND5

Case fr_sc:
        Then iam is a read and ian a write to the same location.
        Take sm = sn, ia_intm = ia_intn
        For 1.1, note that iam is satisfied before ian (or a coherence-successor) is 
          propagated to iam's thread, which by IND2 is before the Acknowledge_sync sn 
          transition.
        1.2, 3, 4 are by IND1.2, IND3, and IND4 respectively
        2 is vacuously true
        5 is using the new co_sc edge and IND5

Case erf_sc:
        Then iam is a write, and ian a write to the same location.
        Take sm = sn, ia_intm = ia_intn

        Then 1.2, 3, 4 is by IND1.2, IND3, and IND4 respectively
        5 is using the new erf_sc edge and IND5

        Now case analyze on ia_intn.

        Subcase is_read ia_intn:

        Then (iam,ia_intn) IN rc (tc (co_sc union fr_sc union erf_sc)) (trace_to_actions t)

        Lemma: 
*)
let lemma1 =
    forall iaw iar.
       is_write iaw && is_read iar && (iaw,iar) IN rc (tc (co_sc union fr_sc union erf_sc)) (trace_to_actions t) -->
       (exist iaw'. (iaw',iar) IN erf_sc && (iaw,iaw') IN rc co_sc (write_actions_of_trace t)) 
        
(*
        Then we have 
        exist ia_intnw. (ia_intnw,ia_intn) IN erf_sc && (iam,ia_intnw) IN rc co_sc (write_actions_of_trace t) 

        2 is now using that ia_intnw, 
        1.1 is because writes must be committed before being propagated. 

        Now, proof of lemma1: 
          Since (iaw,iar) IN rc (tc (co_sc union fr_sc union erf_sc)) (trace_to_actions t),
          and iar is a read and iaw is a write, from analyzing the edges in the relation,
          we must have a (iaw,iaw') IN rc (tc (co_sc union fr_sc union erf_sc)) (trace_to_actions t)
          and (iaw',iar) IN erf_sc with iaw' a write.
          From the first, since iaw and iaw' are two writes to the same location, and 
          co_sc is total per location, the result follows if we can show 
          not ((iaw',iaw) IN co_sc).
          But from (iaw',iaw) IN co_sc, we get a cycle in co_sc, which implies a
          cycle in the POWER coherence order, which leads to a contradiction.
          Hence the required condition.

        QED for subcase is_read ia_intn
*)
(*
        Subcase is_write ia_intn:

        Then (iam,ia_intn) IN rc (tc (co_sc union fr_sc union erf_sc)) (trace_to_actions t)

        Lemma:
*)
let lemma2 =
    forall iaw1 iaw2.
         is_write iaw1 && is_write iaw2 && (iaw1,iaw2) IN rc (tc (co_sc union fr_sc union erf_sc)) (trace_to_actions t) -->
         (iaw1,iaw2) IN rc co_sc (trace_to_actions t)
(*
        Thus we have (iam,ia_intn) IN rc co_sc (trace_to_actions t)

        2 is now using that ia_intn, 
        1.1 is because writes must be committed before being propagated. 

        Now, proof of lemma2: 
         By the premises, iaw1 and iaw2 are writes to the same location.
         Recall that co_sc is total per location. 
         We have that not ((iaw2,iaw1) IN co_sc), since
            from (iaw2,iaw1) IN co_sc and the premises, we have two
            writes related both ways by co_sc (i.e. a cycle in co_sc),
            which means the POWER coherence order has a cycle, which 
            leads to a contradiction.
         Thus the result.

        QED for subcase is_write ia_intn
*)

(* This concludes all the cases. QED *)

(*

Now to add SC fences (by the mapping, sync):

Take R = tc (fr_sc union erf_sc union po_sc union co_sc), which we have
shown is acyclic above.

Now look at all F_sc (i.e. sync not associated with atomic SC actions)
events, and add them one by one to the partial order R according to the
following rules:

- If F_sc is po-before some action ia IN R, then R' = R union
  (F_sc,ia)

- If F_sc is po-after some action ia IN R, then R' = R union (ia,F_sc)

- If F_sc and ia IN R are po-unrelated, 

  Call F_sc-before all write actions which are either po-before F_sc, 
  or read by a read po-before F_sc

  Call F_sc-after writes all write actions which are either
  po-after F_sc or read by a read po-after F_sc

  Call ia-before all write actions which are either po-before-or-equal
  ia, or read by a read po-before-or-equal ia

  Call ia-after writes all write actions which are either
  po-after-or-equal ia or read by a read po-after-or-equal ia

  if there exist (w2 IN F_sc-before,w1 IN ia_after). (w1,w2) IN co,
  then R' = R union (ia,F_sc) 

  if there exist (w2 IN ia-before,w1 IN F_sc_after). (w1,w2) IN co,
  then R' = R union (F_sc,ia) 


Claim: at each stage, R' is acyclic
Therefore: if we add all SC fence actions, the complete relation R is acyclic
(and we will use as SC order any linearization of that R)


To prove claim: 

We call ia is done if:
 - for a write, when it is committed
 - for a read, when it is last satisfied before a commit
 - for a rmw, when it's stwcx is successfully committed
 - for a sc fence (sync), when it is acknowledged

We call ia's effect is prop-before-simple to a thread t if:
 - if ia is a write, it or a coherence-successor is propagated to t
 - if ia is a read, the write it read from or a coherence-successor is propagated to t
 - if ia is a rmw, when it's stwcx is propagated to t
 - if ia is a sc fence (sync), when it is propagated to t

We maintain inductively the invariant about our relation R that:
if (ia1,ia2) IN R, and
  if ia2 is a fence, then ia1's effect is prop-before-simple to ia2's thread before ia2 is committed
  if ia1 is a fence, then ia1's effect is prop-before-simple to ia2's thread before ia2

Then, the claim follows by seeing that the initial R is acyclic (by lemma proved already), and 
at each stage, assuming inductively that R is acyclic,
  a sync F is added to R to get R'
  If R' were to become cyclic, then there must be an edge from F and to F in R',
   in other words, F's effect must be prop-before-simple to F's thread before F is committed, a contradiction.


Establishing the invariant for the initial R:

   trivial, since there is no fence in the initial R

Maintaining the invariant from R to R':
  by induction on the length of R' edges from ia to the new F_sc (or the new F_sc to ia)

    - base cases are the new edges to and from the added sync F_sc, for all of which we get invariant by acyclicity of po or co

    - inductive case: (ia1,ia2) one-step AND (ia2,ia3) already established invariant
            subcase ia2 a sync (fence_sc): then ia1 is in group A, and by induction
            subcase ia2 a write -- subsubcase ia1 is a fence - by induction
                                -- subsubcase (ia1,ia2) IN po -- by using the sync in between
                                -- subsubcase (ia1,ia2) IN (co+fr) -- direct
                                -- subsubcase (ia1,ia2) IN rf -- impossible by typing
            subcase ia2 a read  -- subsubcase ia1 is a fence - by induction
                                -- subsubcase (ia1,ia2) IN po -- by using the sync in between
                                -- subsubcase (ia1,ia2) IN rf -- direct
                                -- subsubcase (ia1,ia2) IN co+fr -- impossible by typing


*)