topic.x86.bib

@comment{{This file has been generated by bib2bib 1.96}}
@comment{{Command line: /usr/bin/bib2bib -c topic:"x86" -ob topic.x86.bib sewellbib2.bib}}
@inproceedings{tphols09,
  author = {Scott Owens and Susmit Sarkar and Peter Sewell},
  title = {A better x86 memory model: {x86-TSO}},
  conf = {TPHOLs 2009},
  booktitle = {Proceedings of  Theorem Proving in Higher Order Logics, LNCS 5674},
  pages = {391--407},
  year = {2009},
  url = {https://doi.org/10.1007/978-3-642-03359-9_27},
  doi = {10.1007/978-3-642-03359-9_27},
  pdf = {http://www.cl.cam.ac.uk/~pes20/weakmemory/x86tso-paper.tphols.pdf},
  abstract = {Real multiprocessors do not provide the sequentially consistent memory that is assumed by most work on semantics and verification. Instead, they have relaxed memory models, typically described in
ambiguous prose, which lead to widespread confusion. These are prime
targets for mechanized formalization. In previous work we produced a rigorous x86-CC model, formalizing the Intel and AMD architecture specifications of the time, but those turned out to be unsound with respect
to actual hardware, as well as arguably too weak to program above.
We discuss these issues and present a new x86-TSO model that suffers
from neither problem, formalized in HOL4. We believe it is sound with
respect to real processors, reflects better the vendor’s intentions, and is
also better suited for programming. We give two equivalent definitions of
x86-TSO: an intuitive operational model based on local write buffers, and
an axiomatic total store ordering model, similar to that of the SPARCv8.
Both are adapted to handle x86-specific features. We have implemented
the axiomatic model in our memevents tool, which calculates the set of all
valid executions of test programs, and, for greater confidence, verify the
witnesses of such executions directly, with code extracted from a third,
more algorithmic, equivalent version of the definition.
},
  topic = {x86}
}
@inproceedings{x86popl,
  optkey = {},
  optauthor = {S. Sarkar and P. Sewell and Zappa Nardelli, F. and S. Owens and T. Ridge and T. Braibant and M. Myreen and J. Alglave},
  author = {Susmit Sarkar and Peter Sewell and Zappa Nardelli, Francesco and Scott Owens and Tom Ridge and Thomas Braibant and Magnus Myreen and Jade Alglave},
  title = {The Semantics of {x86-CC} Multiprocessor Machine Code},
  year = {2009},
  optpages = {},
  optcrossref = {},
  optkey = {},
  conf = {POPL 2009},
  booktitle = {Proceedings of  the 36th annual ACM SIGPLAN-SIGACT symposium on Principles of Programming Languages},
  optbooktitle = {Proc.~POPL 2009},
  pages = {379--391},
  numpages = {13},
  opturl = {http://doi.acm.org/10.1145/1594834.1480929},
  doi = {10.1145/1594834.1480929},
  optpages = {},
  optyear = {},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optaddress = {},
  month = jan,
  optorganization = {},
  optpublisher = {},
  optnote = {To appear},
  optannote = {},
  opturl = {},
  pdf = {https://www.cl.cam.ac.uk/~pes20/weakmemory/popl09.pdf},
  abstract = {Multiprocessors are now dominant, but real multiprocessors do not provide the sequentially consistent memory that is assumed by most work on semantics and verification. Instead, they have subtle relaxed (or weak) memory models, usually described only in ambiguous prose, leading to widespread confusion.

We develop a rigorous and accurate semantics for x86 multiprocessor programs, from instruction decoding to relaxed memory model, mechanised in HOL. We test the semantics against actual processors and the vendor litmus-test examples, and give an equivalent abstract-machine characterisation of our axiomatic memory model. For programs that are (in some precise sense) data-race free, we prove in HOL that their behaviour is sequentially consistent. We also contrast the x86 model with some aspects of Power and ARM behaviour.

This provides a solid intuition for low-level programming, and a sound foundation for future work on verification, static analysis, and compilation of low-level concurrent code.},
  topic = {x86}
}
@techreport{UCAM-CL-TR-745,
  author = {Owens, Scott and Sarkar, Susmit and Sewell, Peter},
  title = {{A better x86 memory model: x86-TSO (extended version)}},
  year = 2009,
  month = mar,
  pdf = {http://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-745.pdf},
  institution = {University of Cambridge, Computer Laboratory},
  number = {UCAM-CL-TR-745},
  note = {52pp},
  pdf = {http://www.cl.cam.ac.uk/~pes20/weakmemory/x86tso-paper.pdf},
  abstract = {Real multiprocessors do not provide the sequentially consistent memory that is assumed by most work on semantics and verification. Instead, they have relaxed memory models, typically described in ambiguous prose, which lead to widespread confusion. These are prime targets for mechanized formalization. In previous work we produced a rigorous x86-CC model, formalizing the Intel and AMD architecture specifications of the time, but those turned out to be unsound with respect to actual hardware, as well as arguably too weak to program above. We discuss these issues and present a new x86-TSO model that suffers from neither problem, formalized in HOL4. We believe it is sound with respect to real processors, reflects better the vendor's intentions, and is also better suited for programming. We give two equivalent definitions of x86-TSO: an intuitive operational model based on local write buffers, and an axiomatic total store ordering model, similar to that of the SPARCv8. Both are adapted to handle x86-specific features. We have implemented the axiomatic model in our memevents tool, which calculates the set of all valid executions of test programs, and, for greater confidence, verify the witnesses of such executions directly, with code extracted from a third, more algorithmic, equivalent version of the definition.
},
  topic = {x86}
}
@article{cacm,
  author = {Peter Sewell and Susmit Sarkar and Scott Owens and Zappa Nardelli, Francesco  and Magnus O. Myreen},
  title = {{x86-TSO}: A Rigorous and Usable Programmer's Model for x86 Multiprocessors},
  journal = {Communications of the ACM},
  year = {2010},
  optkey = {},
  volume = {53},
  number = {7},
  pages = {89--97},
  month = jul,
  url = {http://doi.acm.org/10.1145/1785414.1785443},
  doi = {10.1145/1785414.1785443},
  note = {(Research Highlights)},
  optannote = {},
  pdf = {http://www.cl.cam.ac.uk/~pes20/weakmemory/cacm.pdf},
  abstract = {Exploiting the multiprocessors that have recently become
ubiquitous requires high-performance and reliable concurrent systems code, for concurrent data structures, operating system kernels, synchronisation libraries, compilers, and
so on. However, concurrent programming, which is always
challenging, is made much more so by two problems. First,
real multiprocessors typically do not provide the sequentially
consistent memory that is assumed by most work on semantics and verification. Instead, they have relaxed memory
models, varying in subtle ways between processor families,
in which different hardware threads may have only loosely
consistent views of a shared memory. Second, the public
vendor architectures, supposedly specifying what programmers can rely on, are often in ambiguous informal prose (a
particularly poor medium for loose specifications), leading
to widespread confusion.

In this paper we focus on x86 processors. We review several recent Intel and AMD specifications, showing that all
contain serious ambiguities, some are arguably too weak to
program above, and some are simply unsound with respect
to actual hardware. We present a new x86-TSO programmer's model that, to the best of our knowledge, suffers from
none of these problems. It is mathematically precise (rigorously defined in HOL4) but can be presented as an intuitive abstract machine which should be widely accessible to
working programmers. We illustrate how this can be used to
reason about the correctness of a Linux spinlock implementation and describe a general theory of data-race-freedom for
x86-TSO. This should put x86 multiprocessor system building on a more solid foundation; it should also provide a basis
for future work on verification of such systems.
},
  topic = {x86}
}