topic.Power_and_ARM.bib

@comment{{This file has been generated by bib2bib 1.99}}

@comment{{Command line: bibtex2html-1.99-with-magiclink/bib2bib -c topic:"Power_and_ARM" -ob topic.Power_and_ARM.bib sewellbib2.bib}}

@inproceedings{damp09,
  optkey = {},
  author = {Jade Alglave and Anthony Fox and Samin Ishtiaq and Magnus O. Myreen and Susmit Sarkar and Peter Sewell and Zappa Nardelli, Francesco},
  title = {The Semantics of {Power} and {ARM} Multiprocessor Machine Code},
  year = {2009},
  optpages = {},
  optcrossref = {},
  optkey = {},
  conf = {DAMP 2009},
  booktitle = {Proceedings of the 4th Workshop on Declarative Aspects of Multicore Programming},
  isbn = {978-1-60558-417-1},
  location = {Savannah, GA, USA},
  optnote = {553091},
  publisher = {ACM},
  address = {New York, NY, USA},
  optpages = {},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optaddress = {},
  month = jan,
  optorganization = {},
  optpublisher = {},
  optnote = {To appear},
  optannote = {},
  opturl = {},
  pdf = {http://www.cl.cam.ac.uk/~pes20/weakmemory/draft-ppc-arm.pdf},
  doi = {10.1145/1481839.1481842},
  abstract = {We develop a rigorous semantics for Power and ARM multiprocessor programs, including their relaxed memory model and the behaviour of reasonable fragments of their instruction sets. The semantics is mechanised in the HOL proof assistant. This should provide a good basis for informal reasoning and formal verification of low-level code for these weakly consistent architectures, and, together with our x86 semantics, for the design and compilation of high-level concurrent languages.},
  topic = {Power_and_ARM}
}

@inproceedings{cav2010,
  author = {Jade Alglave and
               Luc Maranget and
               Susmit Sarkar and
               Peter Sewell},
  title = {Fences in Weak Memory Models},
  conf = {CAV 2010},
  booktitle = {Proceedings of  the 22nd International Conference on Computer Aided Verification, LNCS 6174},
  year = {2010},
  pages = {258-272},
  doi = {http://dx.doi.org/10.1007/978-3-642-14295-6_25},
  optcrossref = {DBLP:conf/cav/2010},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  pdf = {http://www.cl.cam.ac.uk/~pes20/weakmemory/cav2010.pdf},
  abstract = {We present a class of relaxed memory models, defined in Coq, parameterised by the chosen permitted local reorderings of reads and writes, and the visibility of inter- and intra-processor communications through memory (e.g. store atomicity relaxation). We prove results on the required behaviour and placement of memory fences to restore a given model (such as Sequential Consistency) from a weaker one. Based on this class of models we develop a tool, diy, that systematically and automatically generates and runs litmus tests to determine properties of processor implementations. We detail the results of our experiments on Power and the model we base on them. This work identified a rare implementation error in Power 5 memory barriers (for which IBM is providing a workaround); our results also suggest that Power 6 does not suffer from this problem.},
  topic = {Power_and_ARM}
}

@inproceedings{pldi105,
  author = {Susmit Sarkar and Peter Sewell and Jade Alglave and Luc Maranget and Derek Williams},
  title = {Understanding {POWER} Multiprocessors},
  optcrossref = {},
  optkey = {},
  conf = {PLDI 2011},
  booktitle = {Proceedings of  the 32nd ACM SIGPLAN conference on Programming Language Design and Implementation},
  optbooktitle = {Proc.~PLDI},
  optpages = {},
  year = {2011},
  pages = {175--186},
  numpages = {12},
  opturl = {http://doi.acm.org/10.1145/1993498.1993520},
  doi = {10.1145/1993498.1993520},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optaddress = {},
  optmonth = {},
  optorganization = {},
  optpublisher = {},
  optnote = {},
  optannote = {},
  pdf = {http://www.cl.cam.ac.uk/~pes20/ppc-supplemental/pldi105-sarkar.pdf},
  project = {http://www.cl.cam.ac.uk/~pes20/ppc-supplemental/index.html},
  abstract = {Exploiting today's multiprocessors requires high-performance and correct concurrent systems code (optimising compilers, language runtimes, OS kernels, etc.), which in turn requires a good understanding of the observable processor behaviour that can be relied on. Unfortunately this critical hardware/software interface is not at all clear for several current multiprocessors.

In this paper we characterise the behaviour of IBM POWER multiprocessors, which have a subtle and highly relaxed memory model (ARM multiprocessors have a very similar architecture in this respect). We have conducted extensive experiments on several generations of processors: POWER G5, 5, 6, and 7. Based on these, on published details of the microarchitectures, and on discussions with IBM staff, we give an abstract-machine semantics that abstracts from most of the implementation detail but explains the behaviour of a range of subtle examples. Our semantics is explained in prose but defined in rigorous machine-processed mathematics; we also confirm that it captures the observable processor behaviour, or the architectural intent, for our examples with an executable checker. While not officially sanctioned by the vendor, we believe that this model gives a reasonable basis for reasoning about current POWER multiprocessors.

Our work should bring new clarity to concurrent systems programming for these architectures, and is a necessary precondition for any analysis or verification. It should also inform the design of languages such as C and C++, where the language memory model is constrained by what can be efficiently compiled to such multiprocessors.},
  topic = {Power_and_ARM}
}

@inproceedings{Alglave:2011:LRT:1987389.1987395,
  author = {Alglave, Jade and Maranget, Luc and Sarkar, Susmit and Sewell, Peter},
  title = {Litmus: running tests against hardware},
  conf = {TACAS 2011},
  booktitle = {Proceedings of  the 17th international conference on Tools and Algorithms for the Construction and Analysis of Systems},
  optseries = {TACAS'11/ETAPS'11},
  year = {2011},
  isbn = {978-3-642-19834-2},
  location = {Saarbr\&\#252;cken, Germany},
  pages = {41--44},
  numpages = {4},
  url = {https://doi.org/10.1007/978-3-642-19835-9_5},
  doi = {10.1007/978-3-642-19835-9_5},
  acmid = {1987395},
  publisher = {Springer-Verlag},
  address = {Berlin, Heidelberg},
  pdf = {https://www.cl.cam.ac.uk/~pes20/weakmemory/tacas11.pdf},
  abstract = {Shared memory multiprocessors typically expose subtle, poorly understood and poorly specified relaxed-memory semantics to programmers. To understand them, and to develop formal models to use in program verification, we find it essential to take an empirical approach, testing what results parallel programs can actually produce when executed on the hardware. We describe a key ingredient of our approach, our litmus tool, which takes small 'litmus test' programs and runs them for many iterations to find interesting behaviour. It embodies various techniques for making such interesting behaviour appear more frequently.},
  topic = {Power_and_ARM}
}

@inproceedings{DBLP:conf/cav/Mador-HaimMSMAOAMSW12,
  author = {Sela Mador-Haim and
               Luc Maranget and
               Susmit Sarkar and
               Kayvan Memarian and
               Jade Alglave and
               Scott Owens and
               Rajeev Alur and
               Milo M. K. Martin and
               Peter Sewell and
               Derek Williams},
  title = {An Axiomatic Memory Model for {POWER} Multiprocessors},
  booktitle = {Proceedings of the 24th International Conference on Computer Aided Verification},
  conf = {CAV 2012},
  year = {2012},
  pages = {495-512},
  opturl = {http://dx.doi.org/10.1007/978-3-642-31424-7_36},
  doi = {10.1007/978-3-642-31424-7_36},
  ee = {http://dx.doi.org/10.1007/978-3-642-31424-7_36},
  optcrossref = {DBLP:conf/cav/2012},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  pdf = {https://www.cl.cam.ac.uk/~pes20/weakmemory/CAV2012paper-final.pdf},
  abstract = {The growing complexity of hardware optimizations employed by multiprocessors leads to subtle distinctions among allowed and disallowed behaviors, posing challenges in specifying their memory models formally and accurately, and in understanding and analyzing the behavior of concurrent software. This complexity is particularly evident in the IBM Power Architecture, for which a faithful specification was published only in 2011 using an operational style. In this paper we present an equivalent axiomatic specification, which is more abstract and concise. Although not officially sanctioned by the vendor, our results indicate that this axiomatic specification provides a reasonable basis for reasoning about current IBM POWER multiprocessors. We establish the equivalence of the axiomatic and operational specifications using both manual proof and extensive testing. To demonstrate that the constraint-based style of axiomatic specification is more amenable to computer-aided verification, we develop a SAT-based tool for evaluating possible outcomes of multi-threaded test programs, and we show that this tool is significantly more efficient than a tool based on an operational specification.},
  topic = {Power_and_ARM}
}

@inproceedings{pldi2012,
  author = {Susmit Sarkar and Kayvan Memarian and Scott Owens and Mark Batty and Peter Sewell and Luc Maranget and Jade Alglave and Derek Williams},
  title = {Synchronising {C/C++} and {POWER}},
  optcrossref = {},
  optkey = {},
  conf = {PLDI 2012},
  booktitle = {Proceedings of  the 33rd {ACM SIGPLAN conference on Programming Language Design and Implementation (Beijing)}},
  pages = {311--322},
  numpages = {12},
  opturl = {http://doi.acm.org/10.1145/2254064.2254102},
  doi = {10.1145/2254064.2254102},
  year = {2012},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optaddress = {},
  optmonth = {},
  optorganization = {},
  optpublisher = {},
  optnote = {},
  optannote = {},
  pdf = {http://www.cl.cam.ac.uk/~pes20/cppppc-supplemental/pldi010-sarkar.pdf},
  project = {http://www.cl.cam.ac.uk/~pes20/cppppc-supplemental/},
  abstract = {Shared memory concurrency relies on synchronisation primitives: compare-and-swap, load-reserve/store-conditional (aka LL/SC), language-level mutexes, and so on. In a sequentially consistent setting, or even in the TSO setting of x86 and Sparc, these have well-understood semantics. But in the very relaxed settings of IBM, POWER, ARM, or C/C++, it remains surprisingly unclear exactly what the programmer can depend on.

This paper studies relaxed-memory synchronisation. On the hardware side, we give a clear semantic characterisation of the load-reserve/store-conditional primitives as provided by POWER multiprocessors, for the first time since they were introduced 20 years ago; we cover their interaction with relaxed loads, stores, barriers, and dependencies. Our model, while not officially sanctioned by the vendor, is validated by extensive testing, comparing actual implementation behaviour against an oracle generated from the model, and by detailed discussion with IBM staff. We believe the ARM semantics to be similar.

On the software side, we prove sound a proposed compilation scheme of the C/C++ synchronisation constructs to POWER, including C/C++ spinlock mutexes, fences, and read-modify-write operations, together with the simpler atomic operations for which soundness is already known from our previous work; this is a first step in verifying concurrent algorithms that use load-reserve/store-conditional with respect to a realistic semantics. We also build confidence in the C/C++ model in its own terms, fixing some omissions and contributing to the C standards committee adoption of the C++11 concurrency model.},
  topic = {Power_and_ARM},
  errata = {http://www.cl.cam.ac.uk/~pes20/cppppc/errata.html}
}

@inproceedings{popl2012,
  author = {Mark Batty and Kayvan Memarian and Scott Owens and Susmit Sarkar and Peter Sewell},
  title = {{Clarifying and Compiling C/C++ Concurrency: from C++11 to POWER}},
  optcrossref = {},
  optkey = {},
  conf = {POPL 2012},
  booktitle = {Proceedings of the 39th ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages (Philadelphia)},
  pages = {509--520},
  numpages = {12},
  opturl = {http://doi.acm.org/10.1145/2103656.2103717},
  doi = {10.1145/2103656.2103717},
  year = {2012},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optaddress = {},
  optmonth = {},
  optorganization = {},
  optpublisher = {},
  optnote = {12 pp},
  optannote = {},
  project = {http://www.cl.cam.ac.uk/~pes20/cppppc},
  pdf = {http://www.cl.cam.ac.uk/~pes20/cppppc/popl079-batty.pdf},
  abstract = {The upcoming C and C++ revised standards add concurrency to the languages, for the first time, in the form of a subtle *relaxed memory model* (the *C++11 model*). This aims to permit compiler optimisation and to accommodate the differing relaxed-memory behaviours of mainstream multiprocessors, combining simple semantics for most code with high-performance *low-level atomics* for concurrency libraries. In this paper, we first establish two simpler but provably equivalent models for C++11, one for the full language and another for the subset without consume operations. Subsetting further to the fragment without low-level atomics, we identify a subtlety arising from atomic initialisation and prove that, under an additional condition, the model is equivalent to sequential consistency for race-free programs.

We then prove our main result, the correctness of two proposed compilation schemes for the C++11 load and store concurrency primitives to Power assembly, having noted that an earlier proposal was flawed. (The main ideas apply also to ARM, which has a similar relaxed memory architecture.)

This should inform the ongoing development of production compilers for C++11 and C1x, clarifies what properties of the machine architecture are required, and builds confidence in the C++11 and Power semantics.},
  topic = {Power_and_ARM},
  errata = {http://www.cl.cam.ac.uk/~pes20/cppppc/errata.html}
}

@article{Alglave:2012:FWM:2205506.2205523,
  author = {Alglave, Jade and Maranget, Luc and Sarkar, Susmit and Sewell, Peter},
  title = {Fences in Weak Memory Models (Extended Version)},
  journal = {Formal Methods in System Design},
  optissue_date = {April     2012},
  volume = {40},
  number = {2},
  month = apr,
  year = {2012},
  issn = {0925-9856},
  pages = {170--205},
  numpages = {36},
  opturl = {http://dx.doi.org/10.1007/s10703-011-0135-z},
  doi = {10.1007/s10703-011-0135-z},
  acmid = {2205523},
  publisher = {Kluwer Academic Publishers},
  address = {Hingham, MA, USA},
  keywords = {Fences, Formal proofs, Generic framework, PowerPC, Testing tool, Weak memory models},
  pdf = {https://www.cl.cam.ac.uk/~pes20/weakmemory/fmsd12-40-2.pdf},
  abstract = {We present a class of relaxed memory models, defined in Coq, parameterised by the chosen permitted local reorderings of reads and writes, and by the visibility of inter- and intra-processor communications through memory (e.g. store atomicity relaxation). We prove results on the required behaviour and placement of memory fences to restore a given model (such as Sequential Consistency) from a weaker one. Based on this class of models we develop a tool, diy, that systematically and automatically generates and runs litmus tests. These tests can be used to explore the behaviour of processor implementations and the behaviour of models, and hence to compare the two against each other. We detail the results of experiments on Power and a model we base on them.},
  topic = {Power_and_ARM}
}

@inproceedings{DBLP:conf/micro/GrayKMPSS15,
  author = {Kathryn E. Gray and
               Gabriel Kerneis and
               Dominic P. Mulligan and
               Christopher Pulte and
               Susmit Sarkar and
               Peter Sewell},
  title = {An integrated concurrency and core-{ISA} architectural envelope definition,
               and test oracle, for {IBM} {POWER} multiprocessors},
  abstract = {Weakly consistent multiprocessors such as ARM and IBM POWER have been with us for decades, but their subtle programmer-visible concurrency behaviour remains challenging, both to implement and to use; the traditional architecture documentation, with its mix of prose and pseudocode, leaves much unclear.

In this paper we show how a precise architectural envelope model for such architectures can be defined, taking IBM POWER as our example.  Our model specifies, for an arbitrary test program, the set of all its allowable executions, not just those of some particular implementation.  The model integrates an operational concurrency model with an ISA model for the fixed-point non-vector user-mode instruction set (largely automatically derived from the vendor pseudocode, and expressed in a new ISA description language).  The key question is the interface between these two: allowing all the required concurrency behaviour, without overcommitting to some particular microarchitectural implementation, requires a novel abstract structure.

Our model is expressed in a mathematically rigorous language that can be automatically translated to an executable test-oracle tool; this lets one either interactively explore or exhaustively compute the set of all allowed behaviours of intricate test cases, to provide a reference for hardware and software development.
},
  conf = {MICRO 2015},
  booktitle = {Proceedings of the 48th International Symposium on Microarchitecture (Waikiki)},
  pages = {635--646},
  year = {2015},
  month = dec,
  optcrossref = {DBLP:conf/micro/2015},
  opturl = {http://doi.acm.org/10.1145/2830772.2830775},
  pdf = {http://www.cl.cam.ac.uk/~pes20/micro-48-2015.pdf},
  doi = {10.1145/2830772.2830775},
  timestamp = {Sun, 10 Jan 2016 12:42:56 +0100},
  biburl = {http://dblp.uni-trier.de/rec/bib/conf/micro/GrayKMPSS15},
  bibsource = {dblp computer science bibliography, http://dblp.org},
  topic = {Power_and_ARM},
  topictwo = {ISA_semantics}
}

@inproceedings{DBLP:conf/popl/FlurGPSSMDS16,
  author = {Shaked Flur and
               Kathryn E. Gray and
               Christopher Pulte and
               Susmit Sarkar and
               Ali Sezgin and
               Luc Maranget and
               Will Deacon and
               Peter Sewell},
  title = {Modelling the {ARMv8} architecture, operationally: concurrency and {ISA}},
  abstract = {In this paper we develop semantics for key aspects of the ARMv8 multiprocessor architecture: the concurrency model and much of the 64-bit application-level instruction set (ISA).  Our goal is to clarify what the range of architecturally allowable behaviour is, and thereby to support future work on formal verification, analysis, and testing of concurrent ARM software and hardware.

Establishing such models with high confidence is intrinsically difficult: it involves capturing the vendor's architectural intent, aspects of which (especially for concurrency) have not previously been precisely defined.  We therefore first develop a concurrency model with a microarchitectural flavour, abstracting from many hardware implementation concerns but still close to hardware-designer intuition.  This means it can be discussed in detail with ARM architects.  We then develop a more abstract model, better suited for use as an architectural specification, which we prove sound w.r.t.~the first.

The instruction semantics involves further difficulties, handling the mass of detail and the subtle intensional information required to interface to the concurrency model.  We have a novel ISA description language, with a lightweight dependent type system, letting us do both with a rather direct representation of the ARM reference manual instruction descriptions.

We build a tool from the combined semantics that lets one explore, either interactively or exhaustively, the full range of architecturally allowed behaviour, for litmus tests and (small) ELF executables.  We prove correctness of some optimisations needed for tool performance.

We validate the models by discussion with ARM staff, and by comparison against ARM hardware behaviour, for ISA single- instruction tests and concurrent litmus tests.},
  conf = {POPL 2016},
  booktitle = {Proceedings of the 43rd {ACM} {SIGPLAN-SIGACT} Symposium on
               Principles of Programming Languages (St. Petersburg,
               FL, USA)},
  pages = {608--621},
  year = {2016},
  month = jan,
  optcrossref = {DBLP:conf/popl/2016},
  opturl = {http://doi.acm.org/10.1145/2837614.2837615},
  pdf = {http://www.cl.cam.ac.uk/~pes20/popl16-armv8/top.pdf},
  project = {http://www.cl.cam.ac.uk/~sf502/popl16/index.html},
  optproject = {http://www.cl.cam.ac.uk/~pes20/popl16-armv8/},
  doi = {10.1145/2837614.2837615},
  timestamp = {Fri, 08 Jan 2016 18:18:25 +0100},
  biburl = {http://dblp.uni-trier.de/rec/bib/conf/popl/FlurGPSSMDS16},
  bibsource = {dblp computer science bibliography, http://dblp.org},
  topic = {Power_and_ARM},
  topictwo = {ISA_semantics}
}

@inproceedings{mixed17,
  author = {Shaked Flur and Susmit Sarkar and Christopher Pulte and Kyndylan Nienhuis and Luc Maranget and Kathryn E. Gray and Ali Sezgin and Mark Batty and Peter Sewell},
  title = {Mixed-size Concurrency: {ARM}, {POWER}, {C/C++11}, and {SC}},
  conf = {POPL 2017},
  booktitle = {Proceedings of the 44th {ACM} {SIGPLAN-SIGACT} Symposium on Principles of
               Programming Languages (Paris)},
  year = {2017},
  month = jan,
  pages = {429--442},
  numpages = {14},
  opturl = {http://doi.acm.org/10.1145/3009837.3009839},
  doi = {10.1145/3009837.3009839},
  project = {http://www.cl.cam.ac.uk/users/pes20/popl17/},
  abstract = {Previous work on the semantics of relaxed shared-memory concurrency has only considered the case in which each load reads the data of exactly one store.  In practice, however, multiprocessors support mixed-size accesses, and these are used by systems software and (to some degree) exposed at the C/C++ language level.  A semantic foundation for software, therefore, has to address them.


We investigate the mixed-size behaviour of ARMv8 and IBM POWER architectures and implementations: by experiment, by developing semantic models, by testing the correspondence between these, and by discussion with ARM and IBM staff.  This turns out to be surprisingly subtle, and on the way we have to revisit the fundamental concepts of coherence and sequential consistency, which change in this setting.  In particular, we show that adding a memory barrier between each instruction does not restore sequential consistency.  We go on to extend the C/C++11 model to support non-atomic mixed-size memory accesses.


This is a necessary step towards semantics for real-world shared-memory concurrent code, beyond litmus tests.
},
  pdf = {http://www.cl.cam.ac.uk/~pes20/popl17/mixed-size.pdf},
  topic = {Power_and_ARM},
  topictwo = {ISA_semantics},
  recent = {false}
}

@misc{relaxedtutorial2012,
  optkey = {},
  author = {Luc Maranget and Susmit Sarkar and Peter Sewell.},
  title = {A Tutorial Introduction to the ARM and POWER Relaxed Memory Models},
  opthowpublished = {},
  month = oct,
  year = {2012},
  note = {Draft},
  optannote = {},
  pdf = {http://www.cl.cam.ac.uk/~pes20/ppc-supplemental/test7.pdf},
  abstract = {ARM and IBM POWER multiprocessors have highly
\emph{relaxed} memory models: they make use of a range of hardware
optimisations that do not affect the observable behaviour of
sequential code but which are exposed to concurrent programmers, and
concurrent code may not execute in the way one intends unless
sufficient synchronisation, in the form of barriers, dependencies, and
load-reserve/store-conditional pairs, is present.  In this tutorial we
explain some of the main issues that programmers should be aware of,
by example.  The material is based on extensive experimental testing,
discussion with some of the designers, and formal models that aim to
capture the architectural intent (though we do not speak for the vendors). To keep this tutorial as accessible
as possible, we refer to our previous work for those details.},
  topic = {Power_and_ARM}
}

@inproceedings{armv8-mca,
  author = {Christopher Pulte and Shaked Flur and Will Deacon and Jon French and Susmit Sarkar and Peter Sewell},
  title = {{Simplifying ARM Concurrency: Multicopy-atomic Axiomatic and Operational Models for ARMv8}},
  optcrossref = {},
  optkey = {},
  conf = {POPL 2018},
  booktitle = {Proceedings of the 45th ACM SIGPLAN Symposium on Principles of Programming Languages},
  optpages = {},
  year = {2018},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optaddress = {},
  doi = {10.1145/3158107},
  month = jan,
  optorganization = {},
  optpublisher = {},
  optnote = {Draft},
  optannote = {},
  pdf = {http://www.cl.cam.ac.uk/~pes20/armv8-mca/armv8-mca-draft.pdf},
  project = {http://www.cl.cam.ac.uk/~pes20/armv8-mca/},
  errata = {https://www.cl.cam.ac.uk/~pes20/armv8-mca/errata.html},
  abstract = {ARM has a relaxed memory model, previously specified in informal prose for ARMv7 and ARMv8.  Over time, and partly due to work building formal semantics for ARM concurrency, it has become clear that some of the complexity of the model is not justified by the potential benefits.  In particular, the model was originally \emph{non-multicopy-atomic}: writes could become visible to some other threads before becoming visible to all --- but this has not been exploited in production implementations, the corresponding potential hardware optimisations are thought to have insufficient benefits in the ARM context, and it gives rise to subtle complications when combined with other ARMv8 features.  The ARMv8 architecture has therefore been revised: it now has a multicopy-atomic model.  It has also been simplified in other respects, including more straightforward notions of dependency, and the architecture now includes a formal concurrency model.

In this paper we detail these changes and discuss their motivation. We define two formal concurrency models: an operational one, simplifying the Flowing model of Flur et al., and the axiomatic model of the revised ARMv8 specification. The models were developed by an academic group and by ARM staff, respectively, and this extended collaboration partly motivated the above changes. We prove the equivalence of the two models. The operational model is integrated into an executable exploration tool with new web interface, demonstrated by exhaustively checking the possible behaviours of a loop-unrolled version of a Linux kernel lock implementation, a previously known bug due to unprevented speculation, and a fixed version.},
  topic = {Power_and_ARM},
  topictwo = {ISA_semantics},
  optrecent = {true}
}

@inproceedings{iflat-esop2020-extended,
  author = {Ben Simner and Shaked Flur and Christopher Pulte and Alasdair Armstrong and Jean Pichon-Pharabod and Luc Maranget and Peter Sewell},
  title = {{ARMv8-A} system semantics: instruction fetch in relaxed architectures},
  optcrossref = {},
  optkey = {},
  conf = {ESOP 2020},
  booktitle = {Proceedings of the 29th European Symposium on Programming},
  year = {2020},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optpages = {},
  month = apr,
  optaddress = {},
  optorganization = {},
  optpublisher = {},
  optnote = {This is an extended version of the ESOP 2020 paper, with appendices giving additional details},
  optannote = {},
  abstract = {Computing relies on \emph{architecture specifications} to decouple hardware and software development.  Historically these have been prose documents, with all the problems that entails, but research over the last ten years has developed rigorous and executable-as-test-oracle specifications of mainstream architecture instruction sets and ``user-mode'' concurrency, clarifying architectures and bringing them into the scope of programming-language semantics and verification.  However, the \emph{system semantics}, of instruction-fetch and cache maintenance, exceptions and interrupts, and address translation, remains obscure, leaving us without a solid foundation for verification of security-critical systems software.

In this paper we establish a robust model for one aspect of system semantics: instruction fetch and cache maintenance for ARMv8-A.  Systems code relies on executing instructions that were written by data writes, e.g.~in program loading, dynamic linking, JIT compilation, debugging, and OS configuration, but hardware implementations are often highly optimised, e.g.~with instruction caches, linefill buffers, out-of-order fetching, branch prediction, and instruction prefetching, which can affect programmer-observable behaviour.  It is essential, both for programming and verification, to abstract from such microarchitectural details as much as possible, but no more.  We explore the key architecture design questions with a series of examples, discussed in detail with senior Arm staff; capture the architectural intent in operational and axiomatic semantic models, extending previous work on ``user-mode'' concurrency; make these models executable as test oracles for small examples; and experimentally validate them against hardware behaviour (finding a bug in one hardware device).  We thereby bring these subtle issues into the mathematical domain, clarifying the architecture and enabling future work on system software verification.
},
  topic = {Power_and_ARM},
  elver = {true},
  project = {http://www.cl.cam.ac.uk/~pes20/iflat},
  pdf = {http://www.cl.cam.ac.uk/~pes20/iflat/top-extended.pdf},
  errata = {https://www.cl.cam.ac.uk/~pes20/iflat/},
  pkvm = {true}
}

@inproceedings{isla-cav,
  optkey = {},
  author = {Alasdair Armstrong and Brian Campbell and Ben Simner and Christopher Pulte and Peter Sewell},
  title = {Isla: Integrating full-scale {ISA} semantics and axiomatic concurrency models},
  conf = {CAV 2021},
  booktitle = {Proc. 33rd International Conference on Computer-Aided Verification},
  month = jul,
  year = {2021},
  opteditor = {Alexandra Silva and K. Rustan M. Leino},
  opttitle = {Isla: Integrating Full-Scale {ISA} Semantics and Axiomatic Concurrency
               Models},
  optbooktitle = {Computer Aided Verification - 33rd International Conference, {CAV}
               2021, Virtual Event, July 20-23, 2021, Proceedings, Part {I}},
  series = {Lecture Notes in Computer Science},
  volume = {12759},
  pages = {303--316},
  publisher = {Springer},
  optyear = {2021},
  url = {https://doi.org/10.1007/978-3-030-81685-8\_14},
  doi = {10.1007/978-3-030-81685-8\_14},
  timestamp = {Fri, 23 Jul 2021 16:41:31 +0200},
  biburl = {https://dblp.org/rec/conf/cav/ArmstrongCSPS21.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  optpdf = {https://www.cl.cam.ac.uk/~pes20/isla/isla-cav2021.pdf},
  optnote = {Extended version available at \url{https://www.cl.cam.ac.uk/~pes20/isla/isla-cav2021-extended.pdf}},
  pdf = {https://www.cl.cam.ac.uk/~pes20/isla/isla-cav2021-extended.pdf},
  elver = {true},
  abstract = {
Architecture specifications such as Armv8-A and RISC-V are the ultimate foundation for software verification and the correctness criteria for hardware verification. They should define the allowed sequential and relaxed-memory concurrency behaviour of programs, but hitherto there has been no integration of full-scale instruction-set architecture (ISA) semantics with axiomatic concurrency models, either in mathematics or in tools.  These ISA semantics can be surprisingly large and intricate, e.g.~100k+ lines for Armv8-A.

In this paper we present a tool, Isla, for computing the allowed behaviours of concurrent litmus tests with respect to full-scale ISA definitions, in Sail, and arbitrary axiomatic relaxed-memory concurrency models, in the Cat language.  It is based on a generic symbolic engine for Sail ISA specifications, which should be valuable also for other verification tasks.  We equip the tool with a web interface to make it widely accessible, and illustrate and evaluate it for Armv8-A and RISC-V.

By using full-scale and authoritative ISA semantics, this lets one evaluate litmus tests using arbitrary user instructions with high confidence.  Moreover, because these ISA specifications give detailed and validated definitions of the sequential aspects of \emph{systems} functionality, as used by hypervisors and operating systems, e.g.~instruction fetch, exceptions, and address translation, our tool provides a basis for developing concurrency semantics for these. We demonstrate this for the Armv8-A instruction-fetch model and self-modifying code examples of Simner et al.
},
  optannote = {},
  topic = {Power_and_ARM},
  topictwo = {ISA_semantics},
  pkvm = {true}
}

@article{isla-cav-fmsd,
  optkey = {},
  author = {Alasdair Armstrong and Brian Campbell and Ben Simner and Christopher Pulte and Peter Sewell},
  title = {Isla: Integrating full-scale {ISA} semantics and axiomatic concurrency models (extended version)},
  journal = {Formal Methods in System Design},
  month = may,
  year = {2023},
  optpages = {303--316},
  publisher = {Springer},
  optissn = {1572-8102},
  optyear = {2021},
  url = {https://doi.org/10.1007/s10703-023-00409-y},
  doi = {10.1007/s10703-023-00409-y},
  pdf = {https://www.cl.cam.ac.uk/~pes20/isla/isla-cav2021-extended.pdf},
  optpdf = {https://link.springer.com/content/pdf/10.1007/s10703-023-00409-y.pdf},
  elver = {true},
  abstract = {
Architecture specifications such as Armv8-A and RISC-V are the ultimate foundation for software verification and the correctness criteria for hardware verification. They should define the allowed sequential and relaxed-memory concurrency behaviour of programs, but hitherto there has been no integration of full-scale instruction-set architecture (ISA) semantics with axiomatic concurrency models, either in mathematics or in tools. These ISA semantics can be surprisingly large and intricate, e.g. 100k+ lines for Armv8-A. In this paper we present a tool, Isla, for computing the allowed behaviours of concurrent litmus tests with respect to full-scale ISA definitions, in the Sail language, and arbitrary axiomatic relaxed-memory concurrency models, in the Cat language. It is based on a generic symbolic engine for Sail ISA specifications. We equip the tool with a web interface to make it widely accessible, and illustrate and evaluate it for Armv8-A and RISC-V. The symbolic execution engine is valuable also for other verification tasks: it has been used in automated ISA test generation for the Arm Morello prototype architecture, extending Armv8-A with CHERI capabilities, and for Iris program-logic reasoning about binary code above the Armv8-A and RISC-V ISA specifications. By using full-scale and authoritative ISA semantics, Isla lets one evaluate litmus tests using arbitrary user instructions with high confidence. Moreover, because these ISA specifications give detailed and validated definitions of the sequential aspects of systems functionality, as used by hypervisors and operating systems, e.g. instruction fetch, exceptions, and address translation, our tool provides a basis for developing concurrency semantics for these. We demonstrate this for the Armv8-A instruction-fetch and virtual-memory models and examples of Simner et al.
},
  optannote = {},
  topic = {Power_and_ARM},
  topictwo = {ISA_semantics},
  pkvm = {true}
}

@misc{acs-2020,
  optkey = {},
  author = {Peter Sewell and Christopher Pulte and Shaked Flur and Mark Batty and Luc Maranget and Alasdair Armstrong},
  title = {Multicore Semantics: Making Sense of Relaxed Memory (MPhil slides)},
  opthowpublished = {},
  month = oct,
  year = 2022,
  optnote = {\url{https://www.cl.cam.ac.uk/~pes20/slides-acs-2022.pdf}},
  url = {https://www.cl.cam.ac.uk/~pes20/slides-acs-2022.pdf},
  optannote = {},
  topic = {Power_and_ARM},
  topictwo = {ISA_semantics}
}

@inproceedings{relaxedVM-esop2022,
  author = {
  Ben Simner and 
  Alasdair Armstrong and
  Jean Pichon-Pharabod and
  Christopher Pulte and
  Richard Grisenthwaite and
  Peter Sewell
},
  title = {Relaxed virtual memory in {Armv8-A}},
  optcrossref = {},
  optkey = {},
  conf = {ESOP 2022},
  booktitle = {Proceedings of the 31st European Symposium on Programming},
  year = {2022},
  optbooktitle = {},
  optyear = {},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optpages = {},
  month = apr,
  optaddress = {},
  optorganization = {},
  optpublisher = {},
  optnote = {},
  optannote = {},
  optbooktitle = {Programming Languages and Systems - 31st European Symposium on Programming,
               {ESOP} 2022, Held as Part of the European Joint Conferences on Theory
               and Practice of Software, {ETAPS} 2022, Munich, Germany, April 2-7,
               2022, Proceedings},
  series = {Lecture Notes in Computer Science},
  volume = {13240},
  pages = {143--173},
  publisher = {Springer},
  optyear = {2022},
  url = {https://doi.org/10.1007/978-3-030-99336-8\_6},
  doi = {10.1007/978-3-030-99336-8\_6},
  timestamp = {Fri, 01 Apr 2022 15:49:28 +0200},
  biburl = {https://dblp.org/rec/conf/esop/SimnerAPPGS22.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  abstract = {
Virtual memory is an essential mechanism for enforcing security boundaries,
but its relaxed-memory concurrency semantics has not previously been investigated in detail.  The concurrent systems code managing virtual memory has been left on an entirely informal basis, and 
OS and hypervisor verification has had to make major simplifying assumptions.
  
We explore the design space for relaxed virtual memory semantics in the Armv8-A architecture, to support future system-software verification. We identify many design questions, in discussion with Arm; develop a test suite, including use cases from the pKVM production hypervisor under development by Google; delimit the design space with axiomatic-style concurrency models; prove that under simple stable configurations our architectural model collapses to previous ``user'' models; develop tooling to compute allowed behaviours in the model integrated with the full Armv8-A ISA semantics; and develop a hardware test harness.
  
This lays out some of the main issues in relaxed virtual memory  bringing these security-critical systems phenomena into the domain of programming-language semantics and verification with foundational architecture semantics.
},
  pdf = {http://www.cl.cam.ac.uk/users/pes20/RelaxedVM-Arm/RelaxedVM-Arm-esop2022.pdf},
  topic = {Power_and_ARM},
  elver = {true},
  project = {http://www.cl.cam.ac.uk/~pes20/RelaxedVM-Arm},
  pkvm = {true}
}

@misc{relaxedVM-arxiv,
  author = {
  Ben Simner and 
  Alasdair Armstrong and
  Jean Pichon-Pharabod and
  Christopher Pulte and
  Richard Grisenthwaite and
  Peter Sewell
},
  title = {Relaxed virtual memory in {Armv8-A} (extended version)},
  optcrossref = {},
  optkey = {},
  optconf = {ESOP 2022},
  optbooktitle = {Proceedings of the 31st European Symposium on Programming},
  year = {2022},
  optbooktitle = {},
  optyear = {},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optpages = {},
  month = mar,
  optaddress = {},
  optorganization = {},
  optpublisher = {},
  note = {\url{http://arxiv.org/abs/2203.00642}},
  optannote = {},
  abstract = {
  Virtual memory is an essential mechanism for enforcing security boundaries,
but its relaxed-memory concurrency semantics has not previously been investigated in detail.  The concurrent systems code managing virtual memory has been left on an entirely informal basis, and 
OS and hypervisor verification has had to make major simplifying assumptions.
  
We explore the design space for relaxed virtual memory semantics in the \mbox{Armv8-A} architecture, to support future system-software verification. We identify many design questions, in discussion with Arm; develop a test suite, including use cases from the pKVM production hypervisor under development by Google; delimit the design space with axiomatic-style concurrency models; prove that under simple stable configurations our architectural model collapses to previous ``user'' models; develop tooling to compute allowed behaviours in the model integrated with the full Armv8-A ISA semantics; and develop a hardware test harness.
  
This lays out some of the main issues in relaxed virtual memory  bringing these security-critical systems phenomena into the domain of programming-language semantics and verification with foundational architecture semantics.

This document is an extended version of a paper in ESOP 2022, with additional explanation and examples in the main body, and appendices detailing our litmus tests, models, proofs, and test results. 
},
  pdf = {https://arxiv.org/pdf/2203.00642},
  topic = {Power_and_ARM},
  project = {http://www.cl.cam.ac.uk/~pes20/RelaxedVM-Arm},
  pkvm = {true}
}

@inproceedings{AxSL-popl2024,
  author = {Angus Hammond{$^1$} and Zongyuan Liu{$^1$} and Thibaut P{\'{e}}rami and Peter Sewell and Lars Birkedal and Jean Pichon-Pharabod},
  title = {An axiomatic basis for computer programming on the relaxed {Arm-A} architecture: the {AxSL} logic},
  conf = {POPL 2024},
  optcrossref = {},
  optkey = {},
  booktitle = {Proceedings of the 51st ACM SIGPLAN Symposium on Principles of Programming Languages},
  optbooktitle = {},
  year = {2024},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optpages = {},
  month = jan,
  optaddress = {},
  optorganization = {},
  optpublisher = {},
  note = {${}^1$These authors contributed equally. Proc. ACM Program. Lang. 8, POPL, Article 21},
  optannote = {},
  abstract = {Very relaxed concurrency memory models, like those of the Arm-A, RISC-V, and IBM Power hardware architectures, underpin much of computing but  break a fundamental intuition about programs, namely that syntactic program order and the reads-from relation always both induce order in the execution. Instead, out-of-order execution is allowed except where prevented by certain pairwise dependencies, barriers, or other synchronisation.  This means that there is no notion of the `current' state of the program, making it challenging to design (and prove sound) syntax-directed, modular reasoning methods like Hoare logics, as usable resources cannot implicitly flow from one program point to the next.

We present AxSL, a separation logic for the relaxed memory model of Arm-A, that captures the fine-grained reasoning underpinning the low-overhead synchronisation mechanisms used by high-performance systems code. In particular, AxSL allows transferring arbitrary resources using relaxed reads and writes when they induce inter-thread ordering. We mechanise AxSL in the Iris separation logic framework, illustrate it on key examples, and prove it sound with respect to the axiomatic memory model of Arm-A. Our approach is largely generic in the axiomatic model and in the instruction-set semantics, offering a potential way forward for compositional reasoning for other similar models, and for the combination of production concurrency models and full-scale ISAs.
},
  doi = {10.1145/3632863},
  pdf = {https://www.cl.cam.ac.uk/~pes20/axsl-popl-2024.pdf},
  topic = {Power_and_ARM},
  optproject = {},
  elver = {true},
  pkvm = {true}
}

@misc{simner2024relaxedexceptionsemanticsarma,
  title = {Relaxed exception semantics for {Arm-A} (extended version)},
  author = {Ben Simner and Alasdair Armstrong and Thomas Bauereiss and Brian Campbell and Ohad Kammar and Jean Pichon-Pharabod and Peter Sewell},
  year = {2024},
  eprint = {2412.15140},
  archiveprefix = {arXiv},
  primaryclass = {cs.AR},
  url = {https://arxiv.org/abs/2412.15140},
  pdf = {https://arxiv.org/pdf/2412.15140},
  abstract = {
    To manage exceptions, software relies on a key architectural guarantee, precision: that exceptions appear to execute between instructions. However, this definition, dating back over 60 years, fundamentally assumes a sequential programmers model. Modern architectures such as Arm-A with programmer-observable relaxed behaviour make such a naive definition inadequate, and it is unclear exactly what guarantees programmers have on exception entry and exit.
    In this paper, we clarify the concepts needed to discuss exceptions in the relaxed-memory setting -- a key aspect of precisely specifying the architectural interface between hardware and software. We explore the basic relaxed behaviour across exception boundaries, and the semantics of external aborts, using Arm-A as a representative modern architecture. We identify an important problem, present yet unexplored for decades: pinning down what it means for exceptions to be precise in a relaxed setting. We describe key phenomena that any definition should account for. We develop an axiomatic model for Arm-A precise exceptions, tooling for axiomatic model execution, and a library of tests. Finally we explore the relaxed semantics of software-generated interrupts, as used in sophisticated programming patterns, and sketch how they too could be modelled. },
  elver = {true},
  topic = {Power_and_ARM}
}

@inproceedings{simner-isca2025,
  title = {Precise exceptions in relaxed architectures},
  author = {Ben Simner and Alasdair Armstrong and Thomas Bauereiss and Brian Campbell and Ohad Kammar and Jean Pichon-Pharabod and Peter Sewell},
  optcrossref = {},
  optkey = {},
  conf = {ISCA 2025},
  booktitle = {International Symposium on Computer Architecture},
  year = {2025},
  pdf = {http://www.cl.cam.ac.uk/users/pes20/isca25-113.pdf},
  errata = {http://www.cl.cam.ac.uk/users/pes20/isca25-113-errata.md},
  doi = {https://doi.org/10.1145/3695053.3731102},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optpages = {},
  month = jun,
  optaddress = {},
  optorganization = {},
  optpublisher = {},
  note = {Best paper award.},
  optannote = {},
  abstract = {
To manage exceptions, software relies on a key architectural guarantee, \emph{precision}: that exceptions appear to execute between instructions.  However, this definition, dating back over 60 years, fundamentally assumes a sequential programmers model.  Modern architectures such as Arm-A with programmer-observable relaxed behaviour make such a naive definition inadequate, and it is unclear exactly what guarantees programmers have on exception entry and exit.

In this paper, we clarify the concepts needed to discuss exceptions in the relaxed-memory setting -- a key aspect of precisely specifying the architectural interface between hardware and software.  We explore the basic relaxed behaviour across exception boundaries, and the semantics of external aborts, using Arm-A as a representative modern architecture.  We identify an important problem, present yet unexplored for decades: pinning down what it means for exceptions to be precise in a relaxed setting.  We describe key phenomena that any definition should account for.  We develop an axiomatic model for Arm-A precise exceptions, tooling for axiomatic model execution, and a library of tests. Finally we explore the relaxed semantics of software-generated interrupts, as used in sophisticated programming patterns, and sketch how they too could be modelled.
 },
  recent = {true},
  elver = {true},
  topic = {Power_and_ARM},
  pkvm = {true}
}

@inproceedings{archsem-popl2026,
  author = {Thibaut P{\'e}rami and Thomas Bauereiss and Brian Campbell and Zongyuan Liu and Nils Lauermann and Alasdair Armstrong and Peter Sewell},
  github = {https://github.com/rems-project/archsem},
  title = {{ArchSem}: Reusable rigorous semantics of relaxed architectures},
  optcrossref = {},
  optkey = {},
  conf = {POPL 2026},
  booktitle = {POPL 2026},
  booktitle = {Proceedings of the 53rd ACM SIGPLAN Symposium on Principles of Programming Languages (PACM PL Vol.~10, No.~POPL, Article~8},
  year = 2026,
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optpages = {},
  month = jan,
  optaddress = {},
  optorganization = {},
  optpublisher = {},
  optnote = {},
  optannote = {},
  doi = {https://doi.org/10.1145/3776650},
  pdf = {https://www.cl.cam.ac.uk/~pes20/Stuff/2025-archsem-paper.pdf},
  abstract = {
The specifications of mainstream processor architectures, such as Arm, x86, and RISC-V, underlie modern computing, as the targets of compilers, operating systems, and hypervisors.  However, despite extensive research and tooling for instruction-set architecture (ISA) and relaxed-memory semantics, recently including systems features, there still do not exist integrated mathematical models that suffice for foundational formal verification, of concurrent architecture properties or of systems software.  Previous proof-assistant work has had to substantially simplify the ISA semantics, the concurrency model, or both.

We present ArchSem, an architecture-generic framework for architecture semantics, modularly combining ISA and concurrency models along a tractable interface of instruction-semantics effects, that covers a range of systems aspects.  To do so, one has to handle many issues that were previously unclear, about the architectures themselves, the interface, the proper definition of reusable models, and the Rocq and Isabelle idioms required to make it usable.  We instantiate it to the Arm-A and RISC-V instruction-set architectures and multiple concurrency models.

We demonstrate usability for proof, despite the scale, by establishing that the Arm architecture (in a particular configuration) provides a provable virtual memory abstraction, with a combination of Rocq, Isabelle, and paper proof.  Previous work provides further confirmation of usability: the AxSL program logic for Arm relaxed concurrency was proved sound above an earlier version of ArchSem.

This establishes a basis for future proofs of architecture properties and systems software, above production architecture specifications.
  },
  recent = {true},
  elver = {true},
  topic = {Power_and_ARM}
}