(* ------------------ It is always possible to build a good SC order ---------------------- *) open Atomic open MachineDefTypes open MachineDefInstructionSemantics open MachineDefThreadSubsystem open Intermediate_actions open Statement open Power_thread_lemmas (* ----- Preliminary: Defining the events and relations ----- *) val t:trace val actions:c_action set val sc_actions : action set let sc_actions = { action_to_i_action a | forall (a IN actions) | is_seq_cst a } val fr_sc : action reln let fr_sc = { (r,w) | forall (r IN sc_actions) (w IN sc_actions) | exist w'. (w',r) IN trace_to_rf t && (w',w) IN trace_to_co t } val erf_sc : action reln let erf_sc = { (w,r) | forall (r IN sc_actions) (w IN sc_actions) | exist w'. (w',r) IN trace_to_rf t && (w,w') IN rc (trace_to_co t) sc_actions && (forall (w'' IN sc_actions). not ((w,w'') IN trace_to_co t && (w'',w') IN trace_to_co t)) } val po_sc : action reln let po_sc = rrestrict (trace_to_po t) sc_actions val co_sc : action reln let co_sc = rrestrict (trace_to_co t) sc_actions (* -------------- Then we have that always, tc (fr_sc union erf_sc union po_sc union co_sc) is acyclic ---------- *) let acyclic_sc_relns_lemma = is_acyclic (tc (fr_sc union erf_sc union po_sc union co_sc)) (*Proof: Suppose not. Then there is a cycle in tc (fr_sc union erf_sc union po_sc union co_sc). Case analyze on whether there is any po_sc edges in the resulting cycle: *) let case_I = not (is_acyclic (tc (fr_sc union erf_sc union co_sc))) (*Case I Proof: Then there are two writes W1 and W2 such that they are related both ways by tc (co_sc union fr_sc union erf_sc) (because if there is a read R involved in a cycle, then by typing, there is a W1 --ERF--> R --FR--> W2). Then W1 and W2 are related both ways by co_sc (since any two writes in (co_sc union fr_sc union erf_sc) are same location, and by totality of co_sc). Then the POWER coherence order has a cycle. Then there is a smallest prefix of the machine trace with the cycle still present, such that the last transition in the trace created a cycle. Case analyze on that transition to get the contradiction. QED for Case I *) (* Case II: tc (fr_sc union erf_sc union co_sc) is acyclic, but there is a cycle in tc (fr_sc union erf_sc union po_sc union co_sc) This means there is at least one po_sc edge in the cycle. *) let case_II = is_acyclic (tc (fr_sc union erf_sc union co_sc)) (*Proof for Case II: In this case, we have at least one po_sc edge in the cycle. Pick any such po_sc edge as a distinguished edge ia1 --po_sc--> ia2. Because ia2 is a sc action, from the chart mapping, there must be a (hw)sync between ia1 and ia2, and ia1 and ia2 are on the same thread. Thus we have, in this situation: *) val ia1: action val ia2: action let _ = (ia1,ia2) IN trace_to_xsync t Sync (* We have an easy lemma about the POWER machine trace, that if two same-thread events are separated by a sync, then there is, in machine trace, a sync-ack between the commit events of the two actions, and by that sync-ack, if the first event is a write, it or a coherence successor has propagated to all threads. *) let sync_separated_lemma = forall ia1 ia2 t. (ia1,ia2) IN trace_to_xsync t Sync --> (* 1. *) (exist l b. (l = Acknowledge_sync b) && (commit_transition_of t ia1,l) IN trace_to_order t && (l,commit_transition_of t ia2) IN trace_to_order t && (* 2. *) (is_write ia1 --> (forall tid. exist lprop w'. (lprop,l) IN trace_to_order t && lprop = Write_propagate_to_thread w' tid && (ia1, Write (w_aid_comp w'.w_eiid) w'.w_thread w'.w_addr w'.w_value) IN rc (trace_to_co t) (write_actions_of_trace t)))) (*Proof of sync_separated_lemma: For 1. Since ia1 and ia2 are separated by at least one sync, call such a sync b. By CommitPrevBarrLS premise of the commit transition, b must be acknowledged before ia2 is committed, and by CommitMemoryAccessBeforeBarrier premise, ia1 must be committed before b is committed (and b must be committed before being acknowledged. For 2. By PropAllThreads premise of acknowledge sync, b must be propagated to all threads before b can be acknowledged By PropGroupA premise of propagating barrier, before b is propagated to a thread, all group A writes (including ia1 in particular), or coherence successors thereof, must be propagated to that thread. QED for sync_separated_lemma *) (*Thus we have: *) val s:barrier let _ = s.b_barrier_type = Sync let _ = (commit_transition_of t ia1,Acknowledge_sync s) IN trace_to_order t let premise1 = (Acknowledge_sync s,commit_transition_of t ia2) IN trace_to_order t (*Also, we are in the case*) let _ = (ia2,ia1) IN tc (fr_sc union erf_sc union po_sc union co_sc) (*From which, we show below*) let premise2 = if is_read ia2 then forall (l IN last_satisfy_transition_of t ia2). (l,Acknowledge_sync s) IN trace_to_order t else (commit_transition_of t ia2,Acknowledge_sync s) IN trace_to_order t (* premise1 and premise2 give the required contradiction (since for reads, the last satisfy transition is after the sync ack) QED for Case II (with one goal remaining) *) (*Goal remaining to prove -- *) let goal_remaining_for_sc = forall ian. (ian,ia1) IN tc (fr_sc union erf_sc union po_sc union co_sc) --> (if is_read ian then forall (l IN last_satisfy_transition_of t ian). (l,Acknowledge_sync s) IN trace_to_order t else (commit_transition_of t ian,Acknowledge_sync s) IN trace_to_order t) (*Proof: To prove the goal, we will induct on the length of the chain between ian and ia1. However, we will have to strengthen the inductive hypothesis in two ways. Thus, the statement we actually prove is the key lemma below, which is easily seen to be stronger than (imply) the goal above. QED for goal (with key lemma remaining) *) let key_lemma_for_sc = forall ian. (ian,ia1) IN tc (fr_sc union erf_sc union po_sc union co_sc) --> (exist sn ia_intn. (* 1.1 *) (if is_read ian then forall (l IN last_satisfy_transition_of t ian). (l,Acknowledge_sync sn) IN trace_to_order t else (commit_transition_of t ian,Acknowledge_sync sn) IN trace_to_order t) && (* 1.2 *) (Acknowledge_sync sn,Acknowledge_sync s) IN rc (trace_to_order t) {nth_label n t | forall n | true} && (* 2 *) (is_write ian --> (forall tid. exist lprop w'. (lprop,Acknowledge_sync sn) IN trace_to_order t && lprop = Write_propagate_to_thread w' tid && (ia_intn, Write (w_aid_comp w'.w_eiid) w'.w_thread w'.w_addr w'.w_value) IN rc (trace_to_co t) (write_actions_of_trace t))) && (* 3 *) (commit_transition_of t ia_intn,Acknowledge_sync sn) IN trace_to_order t && (* 4 *) (get_thread ia_intn = sn.b_thread) && (* 5 *) (ian,ia_intn) IN rc (tc (co_sc union fr_sc union erf_sc)) (trace_to_actions t)) (*Proof of key lemma: By induction on the length of (tc (fr_sc union erf_sc union po_sc union co_sc)) edges between (ian,ia1) Base case: length 0 (ian = ia1) Take ia_intn = ian, sn = s 1.1 and 3 is by CommitMemoryAccessBeforeBarrier precondition of commit rule 1.2, 4, and 5 are trivially true 2: By PropAllThreads premise of acknowledge sync, s must be propagated to all threads before s can be acknowledged By PropGroupA premise of propagating barrier, before s is propagated to a thread, all group A writes (including ian in particular), or coherence successors thereof, must be propagated to that thread. Inductive case: length m=n+1 Inductive hypotheses: exist sn ia_intn. IND1.1: (if is_read ian then forall (l IN last_satisfy_transition_of t ian). (l,Acknowledge_sync sn) IN trace_to_order t else (commit_transition_of t ian,Acknowledge_sync sn) IN trace_to_order t) && IND1.2: (Acknowledge_sync sn,Acknowledge_sync s) IN rc (trace_to_order t) {nth_label n t | forall n | true} && IND2: (is_write ian --> (forall tid. exist lprop w'. (lprop,Acknowledge_sync sn) IN trace_to_order t && lprop = Write_propagate_to_thread w' tid && (ia_intn, Write (w_aid_comp w'.w_eiid) w'.w_thread w'.w_addr w'.w_value) IN rc (trace_to_co t) (write_actions_of_trace t))) && IND3: (commit_transition_of t ia_intn,Acknowledge_sync sn) IN trace_to_order t && IND4: (get_thread ia_intn = sn.b_thread) && IND5: (ian,ia_intn) IN rc (tc (co_sc union fr_sc union erf_sc)) (trace_to_actions t)) Case analyze the edge from iam to ian: Case po_sc: Then iam and ian are on the same thread, and there is a sync in between in program order. Let sm be that sync, ia_intm = iam. 1.1 and 3 is by CommitMemoryAccessBeforeBarrier premise of the commit rule 1.2 is by transitivity of trace order using IND1.1 and IND1.2 2 is by sync_separated_lemma above 4 and 5 are by construction Case co_sc: Then iam and ian are two writes to the same location. Take sm = sn, ia_intm = ia_intn 1.2, 2,3,4 are by IND1.2, IND2, IND3, and IND4 respectively 1.1 is because commit transition is before write propagate transition, using 1.2 5 is using the new co_sc edge and IND5 Case fr_sc: Then iam is a read and ian a write to the same location. Take sm = sn, ia_intm = ia_intn For 1.1, note that iam is satisfied before ian (or a coherence-successor) is propagated to iam's thread, which by IND2 is before the Acknowledge_sync sn transition. 1.2, 3, 4 are by IND1.2, IND3, and IND4 respectively 2 is vacuously true 5 is using the new co_sc edge and IND5 Case erf_sc: Then iam is a write, and ian a write to the same location. Take sm = sn, ia_intm = ia_intn Then 1.2, 3, 4 is by IND1.2, IND3, and IND4 respectively 5 is using the new erf_sc edge and IND5 Now case analyze on ia_intn. Subcase is_read ia_intn: Then (iam,ia_intn) IN rc (tc (co_sc union fr_sc union erf_sc)) (trace_to_actions t) Lemma: *) let lemma1 = forall iaw iar. is_write iaw && is_read iar && (iaw,iar) IN rc (tc (co_sc union fr_sc union erf_sc)) (trace_to_actions t) --> (exist iaw'. (iaw',iar) IN erf_sc && (iaw,iaw') IN rc co_sc (write_actions_of_trace t)) (* Then we have exist ia_intnw. (ia_intnw,ia_intn) IN erf_sc && (iam,ia_intnw) IN rc co_sc (write_actions_of_trace t) 2 is now using that ia_intnw, 1.1 is because writes must be committed before being propagated. Now, proof of lemma1: Since (iaw,iar) IN rc (tc (co_sc union fr_sc union erf_sc)) (trace_to_actions t), and iar is a read and iaw is a write, from analyzing the edges in the relation, we must have a (iaw,iaw') IN rc (tc (co_sc union fr_sc union erf_sc)) (trace_to_actions t) and (iaw',iar) IN erf_sc with iaw' a write. From the first, since iaw and iaw' are two writes to the same location, and co_sc is total per location, the result follows if we can show not ((iaw',iaw) IN co_sc). But from (iaw',iaw) IN co_sc, we get a cycle in co_sc, which implies a cycle in the POWER coherence order, which leads to a contradiction. Hence the required condition. QED for subcase is_read ia_intn *) (* Subcase is_write ia_intn: Then (iam,ia_intn) IN rc (tc (co_sc union fr_sc union erf_sc)) (trace_to_actions t) Lemma: *) let lemma2 = forall iaw1 iaw2. is_write iaw1 && is_write iaw2 && (iaw1,iaw2) IN rc (tc (co_sc union fr_sc union erf_sc)) (trace_to_actions t) --> (iaw1,iaw2) IN rc co_sc (trace_to_actions t) (* Thus we have (iam,ia_intn) IN rc co_sc (trace_to_actions t) 2 is now using that ia_intn, 1.1 is because writes must be committed before being propagated. Now, proof of lemma2: By the premises, iaw1 and iaw2 are writes to the same location. Recall that co_sc is total per location. We have that not ((iaw2,iaw1) IN co_sc), since from (iaw2,iaw1) IN co_sc and the premises, we have two writes related both ways by co_sc (i.e. a cycle in co_sc), which means the POWER coherence order has a cycle, which leads to a contradiction. Thus the result. QED for subcase is_write ia_intn *) (* This concludes all the cases. QED *) (* Now to add SC fences (by the mapping, sync): Take R = tc (fr_sc union erf_sc union po_sc union co_sc), which we have shown is acyclic above. Now look at all F_sc (i.e. sync not associated with atomic SC actions) events, and add them one by one to the partial order R according to the following rules: - If F_sc is po-before some action ia IN R, then R' = R union (F_sc,ia) - If F_sc is po-after some action ia IN R, then R' = R union (ia,F_sc) - If F_sc and ia IN R are po-unrelated, Call F_sc-before all write actions which are either po-before F_sc, or read by a read po-before F_sc Call F_sc-after writes all write actions which are either po-after F_sc or read by a read po-after F_sc Call ia-before all write actions which are either po-before-or-equal ia, or read by a read po-before-or-equal ia Call ia-after writes all write actions which are either po-after-or-equal ia or read by a read po-after-or-equal ia if there exist (w2 IN F_sc-before,w1 IN ia_after). (w1,w2) IN co, then R' = R union (ia,F_sc) if there exist (w2 IN ia-before,w1 IN F_sc_after). (w1,w2) IN co, then R' = R union (F_sc,ia) Claim: at each stage, R' is acyclic Therefore: if we add all SC fence actions, the complete relation R is acyclic (and we will use as SC order any linearization of that R) To prove claim: We call ia is done if: - for a write, when it is committed - for a read, when it is last satisfied before a commit - for a rmw, when it's stwcx is successfully committed - for a sc fence (sync), when it is acknowledged We call ia's effect is prop-before-simple to a thread t if: - if ia is a write, it or a coherence-successor is propagated to t - if ia is a read, the write it read from or a coherence-successor is propagated to t - if ia is a rmw, when it's stwcx is propagated to t - if ia is a sc fence (sync), when it is propagated to t We maintain inductively the invariant about our relation R that: if (ia1,ia2) IN R, and if ia2 is a fence, then ia1's effect is prop-before-simple to ia2's thread before ia2 is committed if ia1 is a fence, then ia1's effect is prop-before-simple to ia2's thread before ia2 Then, the claim follows by seeing that the initial R is acyclic (by lemma proved already), and at each stage, assuming inductively that R is acyclic, a sync F is added to R to get R' If R' were to become cyclic, then there must be an edge from F and to F in R', in other words, F's effect must be prop-before-simple to F's thread before F is committed, a contradiction. Establishing the invariant for the initial R: trivial, since there is no fence in the initial R Maintaining the invariant from R to R': by induction on the length of R' edges from ia to the new F_sc (or the new F_sc to ia) - base cases are the new edges to and from the added sync F_sc, for all of which we get invariant by acyclicity of po or co - inductive case: (ia1,ia2) one-step AND (ia2,ia3) already established invariant subcase ia2 a sync (fence_sc): then ia1 is in group A, and by induction subcase ia2 a write -- subsubcase ia1 is a fence - by induction -- subsubcase (ia1,ia2) IN po -- by using the sync in between -- subsubcase (ia1,ia2) IN (co+fr) -- direct -- subsubcase (ia1,ia2) IN rf -- impossible by typing subcase ia2 a read -- subsubcase ia1 is a fence - by induction -- subsubcase (ia1,ia2) IN po -- by using the sync in between -- subsubcase (ia1,ia2) IN rf -- direct -- subsubcase (ia1,ia2) IN co+fr -- impossible by typing *)