rems-task3.bib

@inproceedings{ChisnallCPDP11,
  author = {Chisnall, David and Rothwell, Colin and Davis, Brooks and Watson, Robert N.M. and Woodruff, Jonathan and Vadera, Munraj and Moore, Simon W. and Neumann, Peter G. and Roe, Michael},
  title = {Beyond the {PDP-11}: Processor support for a memory-safe {C} abstract machine},
  booktitle = {\textbf{ASPLOS 2015}: Proceedings of the Fifteenth Edition of ASPLOS on Architectural Support for Programming Languages and Operating Systems},
  conf = {ASPLOS 2015},
  series = {ASPLOS XX},
  year = {2015},
  month = mar,
  location = {Istanbul, Turkey},
  numpages = {15},
  publisher = {ACM},
  address = {New York, NY, USA},
  optkeywords = {C, memory safety, memory models, code generation},
  doi = {10.1145/2694344.2694367},
  optpdf = {papers/asplos15-memory-safe-c.pdf},
  abstract = {We propose a new memory-safe interpretation of the C abstract machine that provides stronger protection to benefit
security and debugging. Despite ambiguities in the specification intended to provide implementation flexibility, contemporary implementations of C have converged on a memory model similar to the PDP-11, the original target for C.
This model lacks support for \emph{memory safety} despite well-documented impacts on security and reliability.

Attempts to change this model are often hampered by assumptions embedded in a large body of existing C code, dating back to the memory model exposed by the original C
compiler for the PDP-11. Our experience with attempting to
implement a memory-safe variant of C on the CHERI experimental microprocessor led us to identify a number of
problematic idioms. We describe these as well as their interaction with existing memory safety schemes and the assumptions that they make beyond the requirements of the C
specification. Finally, we refine the CHERI ISA and abstract
model for C, by combining elements of the CHERI capability model and fat pointers, and present a softcore CPU that
implements a C abstract machine that can run legacy C code
with strong memory protection guarantees.
}
}
@inproceedings{BMNPS2015,
  author = {Mark Batty and
               Kayvan Memarian and
               Kyndylan Nienhuis and
               Jean Pichon{-}Pharabod and
               Peter Sewell},
  title = {The Problem of Programming Language Concurrency Semantics},
  abstract = {Despite decades of research, we do not have a satisfactory concurrency semantics for any general-purpose programming language that aims to support concurrent systems code. The Java Memory Model has been shown to be unsound with respect to standard compiler optimisations, while the C/C++11 model is too weak, admitting undesirable thin-air executions. Our goal in this paper is to articulate this major open problem as clearly as is currently possible, showing how it arises from the combination of multiprocessor relaxed-memory behaviour and the desire to accommodate current compiler optimisations. We make several novel contributions that each shed some light on the problem, constraining the possible solutions and identifying new difficulties.},
  booktitle = {\textbf{ESOP 2015}: Programming Languages and Systems - 24th European Symposium on Programming,
               Held as Part of the European Joint Conferences on Theory
               and Practice of Software, {ETAPS} 2015, London, UK, April 11-18, 2015.
              },
  conf = {ESOP 2015},
  pages = {283--307},
  year = {2015},
  month = apr,
  optcrossref = {DBLP:conf/esop/2015},
  url = {http://dx.doi.org/10.1007/978-3-662-46669-8_12},
  pdf = {http://www.cl.cam.ac.uk/~pes20/cpp/c_concurrency_challenges.pdf},
  doi = {10.1007/978-3-662-46669-8_12},
  timestamp = {Tue, 07 Apr 2015 15:19:02 +0200},
  biburl = {http://dblp.uni-trier.de/rec/bib/conf/esop/BattyMNPS15},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}
@misc{EBC2014,
  optkey = {},
  author = {Reinoud Elhorst and Mark Batty and David Chisnall},
  title = {Efficient code generation for weakly ordered architectures},
  howpublished = {Presentation at the 4th European LLVM conference  (EuroLLVM)},
  month = apr,
  year = 2014,
  note = {Slides and report available at \url{http://llvm.org/devmtg/2014-04/}},
  optannote = {},
  abstract = {This report explores the way LLVM generates the
memory barriers needed to support the C11/C++11 atomics
for ARM. I measure the influence of memory barriers on
performance, and I show that in some cases LLVM generates
too many barriers. By leaving these barriers out, performance
increases significantly. I introduce two LLVM passes, which will
remove these extra barriers, improving performance in my test
by 40\%. I believe one of these passes is ready to be upstreamed
to LLVM, while the other will need more testing.
}
}
@misc{Elhorst2014,
  optkey = {},
  author = {Reinoud Elhorst},
  title = {Lowering {C11} Atomics for {ARM} in {LLVM}},
  opthowpublished = {},
  optmonth = {},
  year = {2014},
  month = mar,
  optnote = {},
  optannote = {},
  pdf = {http://llvm.org/devmtg/2014-04/PDFs/Talks/Reinoud-report.pdf},
  abstract = {This report explores the way LLVM generates the
memory barriers needed to support the C11/C++11 atomics
for ARM. I measure the influence of memory barriers on
performance, and I show that in some cases LLVM generates
too many barriers. By leaving these barriers out, performance
increases significantly. I introduce two LLVM passes, which will
remove these extra barriers, improving performance in my test
by 40\%. I believe one of these passes is ready to be upstreamed
to LLVM, while the other will need more testing.
}
}
@misc{HCSS2015-Memarian,
  optkey = {},
  author = {Kayvan Memarian and Kyndylan Nienhuis and Justus Matthiesen and James Lingard and Peter Sewell},
  title = {Cerberus: towards an Executable Semantics for Sequential and Concurrent {C11}},
  opthowpublished = {},
  month = may,
  year = 2015,
  note = {Presentation at HCSS 2015: the Fifteenth Annual
High Confidence Software and Systems Conference},
  optmonth = {},
  optyear = {},
  optannote = {},
  abstract = {C remains central to our computing infrastructure but still lacks a
clear and complete semantics. Programmers lack tools to explore the
range of behaviours they should expect; compiler development lacks
test oracles; and formal verification and analysis must make
(explicitly or implicitly) many choices about the specific C they
target.

We describe Cerberus, a semantics for a substantial fragment of C11.
Its thread-local semantics is factored via an elaboration into a
simpler Core language, to make it conceptually and mathematically
tractable.  This is integrated with an operational model for C11
concurrency, with a mechanised proof of equivalence to the axiomatic
C11 model of Batty et al.  The front-end includes a parser that
closely follows the C11 standard grammar and a typechecker.  Cerberus
is executable, to explore all behaviours or single paths of test
programs, and it supports proof, as shown by a preliminary experiment
in translation validation for the front-end of Clang, for very simple
programs.  This is a step towards a clear, consistent, and unambiguous
semantics for C.
}
}
@inproceedings{Kell:2014:ST:2661136.2661154,
  author = {Kell, Stephen},
  title = {In Search of Types},
  booktitle = {\textbf{Onward! 2014}: Proceedings of the 2014 ACM International Symposium on New
Ideas, New Paradigms, and Reflections on Programming \& Software},
  conf = {Onward! 2014},
  series = {Onward! 2014},
  year = {2014},
  month = oct,
  isbn = {978-1-4503-3210-1},
  location = {Portland, Oregon, USA},
  pages = {227--241},
  numpages = {15},
  url = {http://doi.acm.org/10.1145/2661136.2661154},
  doi = {10.1145/2661136.2661154},
  acmid = {2661154},
  publisher = {ACM},
  address = {New York, NY, USA},
  optkeywords = {abstraction, data types, information hiding, type
discipline, type system},
  abstract = {The concept of "type" has been used without a consistent, 
 precise definition in discussions about programming languages for 60 years. 
 In this essay I explore various concepts lurking behind distinct uses of this word, 
 highlighting two traditions in which the word came into use largely independently: 
 engineering traditions on the one hand, and those of symbolic logic on the other. 
 These traditions are founded on differing attitudes to the nature and purpose of abstraction, 
 but their distinct uses of "type" have never been explicitly unified. 
 One result is that discourse across these traditions often finds itself at cross purposes, 
 such as overapplying one sense of "type" where another is appropriate, and occasionally 
 proceeding to draw wrong conclusions. I illustrate this with examples from well-known 
 and justly well-regarded literature, and argue that ongoing developments in both the 
 theory and practice of programming make now a good time to resolve these problems. }
}
@inproceedings{DBLP:conf/pepm/KerneisSH14,
  author = {Gabriel Kerneis and
               Charlie Shepherd and
               Stefan Hajnoczi},
  title = {{QEMU/CPC:} static analysis and {CPS} conversion for safe, portable,
               and efficient coroutines},
  booktitle = {\textbf{PEPM 2014}: Proceedings of the {ACM} {SIGPLAN} 2014 workshop on Partial evaluation
               and program manipulation, San Diego,
               California, {USA}},
  conf = {PEPM 2014},
  pages = {83--94},
  year = {2014},
  month = jan,
  optcrossref = {DBLP:conf/pepm/2014},
  url = {http://doi.acm.org/10.1145/2543728.2543733},
  doi = {10.1145/2543728.2543733},
  timestamp = {Thu, 09 Jan 2014 10:44:12 +0100},
  biburl = {http://dblp.uni-trier.de/rec/bib/conf/pepm/KerneisSH14},
  bibsource = {dblp computer science bibliography, http://dblp.org},
  abstract = {

Coroutines and events are two common abstractions for writing concurrent programs. Because coroutines are often more convenient, but events more portable and efficient, it is natural to want to translate the former into the latter. CPC is such a source-to-source translator for C programs, based on a partial conversion into continuation-passing style (CPS conversion) of functions annotated as cooperative.

In this article, we study the application of the CPC translator to QEMU, an open-source machine emulator which also uses annotated coroutine functions for concurrency. We first propose a new type of annotations to identify functions which never cooperate, and we introduce CoroCheck, a tool for the static analysis and inference of cooperation annotations. Then, we improve the CPC translator, defining CPS conversion as a calling convention for the C language, with support for indirect calls to CPS-converted function through function pointers. Finally, we apply CoroCheck and CPC to QEMU (750 000 lines of C code), fixing hundreds of missing annotations and comparing performance of the translated code with existing implementations of coroutines in QEMU.

Our work shows the importance of static annotation checking to prevent actual concurrency bugs, and demonstrates that CPS conversion is a flexible, portable, and efficient compilation technique, even for very large programs written in an imperative language.
}
}
@inproceedings{DBLP:conf/popl/Pichon-Pharabod16,
  author = {Jean Pichon{-}Pharabod and
               Peter Sewell},
  title = {A concurrency semantics for relaxed atomics that permits optimisation
               and avoids thin-air executions},
  abstract = {Despite much research on concurrent programming languages, especially for Java and C/C++, we still do not have a satisfactory definition of their semantics, one that admits all common optimisations without also admitting undesired behaviour. Especially problematic are the ``thin-air'' examples involving high-performance concurrent accesses, such as C/C++11 relaxed atomics.  The C/C++11 model is in a per-candidate-execution style, and previous work has identified a tension between that and the fact that compiler optimisations do not operate over single candidate executions in isolation; rather, they operate over syntactic representations that represent all executions.

In this paper we propose a novel approach that circumvents this difficulty.  We define a concurrency semantics for a core calculus, including relaxed-atomic and non-atomic accesses, and locks, that admits a wide range of optimisation while still forbidding the classic thin-air examples.  It also addresses other problems relating to undefined behaviour.

The basic idea is to use an event-structure representation of the current state of each thread, capturing all of its potential executions, and to permit interleaving of execution and transformation steps over that to reflect optimisation (possibly dynamic) of the code. These are combined with a non-multi-copy-atomic storage subsystem, to reflect common hardware behaviour.

The semantics is defined in a mechanised and executable form, and designed to be implementable above current relaxed hardware and strong enough to support the programming idioms that C/C++11 does for this fragment.  It offers a potential way forward for concurrent programming language semantics, beyond the current C/C++11 and Java models.
},
  booktitle = {\textbf{POPL 2016}: Proceedings of the 43rd Annual {ACM} {SIGPLAN-SIGACT} Symposium on
               Principles of Programming Languages,  St. Petersburg,
               FL, USA, January 20 - 22, 2016},
  conf = {POPL 2016},
  pages = {622--633},
  year = {2016},
  month = jan,
  optcrossref = {DBLP:conf/popl/2016},
  url = {http://doi.acm.org/10.1145/2837614.2837616},
  pdf = {http://www.cl.cam.ac.uk/~jp622/popl16-thinair/a_concurrency_semantics.pdf},
  doi = {10.1145/2837614.2837616},
  timestamp = {Fri, 08 Jan 2016 18:18:25 +0100},
  biburl = {http://dblp.uni-trier.de/rec/bib/conf/popl/Pichon-Pharabod16},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}
@inproceedings{Cerberus-PLDI16,
  author = {
Kayvan Memarian and 
Justus Matthiesen and 
James Lingard and
Kyndylan Nienhuis and
David Chisnall and
Robert N.M. Watson and
Peter Sewell
},
  title = {Into the depths of {C}: elaborating the de facto standards},
  abstract = {
C remains central to our computing infrastructure.  It is notionally defined by ISO standards, but in reality the properties of C assumed by systems code and those implemented by compilers have diverged, both from the ISO standards and from each other, and none of these are clearly understood.

We make two contributions to help improve this error-prone situation.  First, we describe an in-depth analysis of the design space for the semantics of pointers and memory in C as it is used in practice.  We articulate many specific questions, build a suite of semantic test cases, gather experimental data from multiple implementations, and survey what C experts believe about the de facto standards.  We identify questions where there is a consensus (either following ISO or differing) and where there are conflicts. We apply all this to an experimental C implemented above capability hardware.  Second, we describe a formal model, Cerberus, for large parts of C.  Cerberus is parameterised on its memory model; it is linkable either with a candidate de facto memory object model, under construction, or with an operational C11 concurrency model; it is defined by elaboration to a much simpler Core language for accessibility, and it is executable as a test oracle on small examples.

This should provide a solid basis for discussion of what mainstream C is now: what programmers and analysis tools can assume and what compilers aim to implement. Ultimately we hope it will be a step towards clear, consistent, and accepted semantics for the various use-cases of C.
},
  optcrossref = {},
  optkey = {},
  booktitle = {\textbf{PLDI 2016}},
  conf = {PLDI 2016},
  optpages = {},
  year = {2016},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optaddress = {},
  month = jun,
  optorganization = {},
  optpublisher = {},
  note = {PLDI 2016 Distinguished Paper award},
  pdf = {http://www.cl.cam.ac.uk/users/pes20/cerberus/pldi16.pdf},
  optannote = {}
}
@inproceedings{gudka15:soaap,
  author = {Khilan Gudka and Robert N. M. Watson and Jonathan Anderson and David Chisnall and Brooks Davis and Ben Laurie and Ilias Marinos and Peter G. Neumann and Alex Richardson},
  title = {{Clean Application Compartmentalization with SOAAP}},
  booktitle = {\textbf{CCS 2015}: {Proceedings of the 22nd ACM Conference on Computer and Communications Security}},
  conf = {CCS 2015},
  month = oct,
  year = {2015},
  pdf = {https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/2015ccs-soaap.pdf},
  abstract = {Application compartmentalization, a vulnerability mitigation technique
  employed in programs such as OpenSSH and the Chromium web browser,
  decomposes software into isolated components to limit privileges leaked
  or otherwise available to attackers.
  However, compartmentalizing applications -- and maintaining that
  compartmentalization -- is hindered by ad hoc
  methodologies and significantly increased programming effort.
  In practice, programmers stumble through (rather than overtly reason
  about) compartmentalization spaces of possible decompositions,
  unknowingly trading off correctness, security, complexity, and
  performance.
  We present a new conceptual framework embodied in an LLVM-based tool: the
  Security-Oriented Analysis of Application Programs (SOAAP) that allows
  programmers to reason about co mpartmentalization using source-code
  annotations (compartmentalization hypotheses).
  We demonstrate considerable benefit when creating new compartmentalizations
  for complex applications, and analyze existing compartmentalized
  applications to discover design faults and maintenance issues arising
  from application evolution.}
}
@misc{N2012,
  optkey = {},
  author = {Kayvan Memarian and Peter Sewell},
  title = {Clarifying the {C} memory object model},
  howpublished = {ISO SC22 WG14 N2012},
  html = {http://www.cl.cam.ac.uk/~pes20/cerberus/notes64-wg14.html},
  month = mar,
  year = 2016,
  optnote = {},
  optannote = {}
}
@misc{N2013,
  optkey = {},
  author = {David Chisnall and Justus Matthiesen and Kayvan Memarian and Kyndylan Nienhuis and Peter Sewell and Robert N. M. Watson},
  title = {C memory object and value semantics: the space of de facto and {ISO} standards},
  howpublished = {ISO SC22 WG14 N2013},
  pdf = {http://www.cl.cam.ac.uk/~pes20/cerberus/notes30.pdf},
  month = mar,
  year = 2016,
  optnote = {},
  optannote = {}
}
@misc{N2014,
  optkey = {},
  author = {Kayvan Memarian and Peter Sewell},
  title = {What is {C} in practice? ({Cerberus} survey v2): Analysis of Responses},
  howpublished = {ISO SC22 WG14 N2014},
  html = {http://www.cl.cam.ac.uk/~pes20/cerberus/notes50-survey-discussion.html},
  month = mar,
  year = 2016,
  optnote = {},
  optannote = {}
}
@misc{N2015,
  optkey = {},
  author = {Kayvan Memarian and Peter Sewell},
  title = {What is {C} in practice? ({Cerberus} survey v2): Analysis of Responses -- with Comments},
  howpublished = {ISO SC22 WG14 N2015},
  url = {http://www.cl.cam.ac.uk/~pes20/cerberus/analysis-2016-02-05-anon.txt},
  month = mar,
  year = 2016,
  optnote = {},
  optannote = {}
}
@phdthesis{battyPhD,
  author = {Mark John Batty},
  title = {The C11 and C++11 Concurrency Model},
  school = {University of Cambridge Computer Laboratory},
  year = {2014},
  month = nov,
  optkey = {},
  opttype = {},
  optaddress = {},
  optmonth = {},
  note = {Winner of 2015 ACM SIGPLAN John C. Reynolds
Doctoral Dissertation Award and 2015 CPHC/BCS Distinguished
Dissertation competition},
  optannote = {}
}
@inproceedings{kell_dynamically_2016,
  author = {Kell, Stephen},
  title = {Dynamically diagnosing type errors in unsafe code},
  booktitle = {\textbf{OOPSLA 2016}: Proceedings of the ACM SIGPLAN International Conference on Object-Oriented Programming, Systems, Languages, and Applications},
  conf = {OOPSLA 2016},
  series = {OOPSLA 2016},
  year = {2016},
  month = nov,
  location = {Amsterdam, The Netherlands},
  numpages = {20},
  publisher = {ACM},
  address = {New York, NY, USA},
  keywords = {C, unsafe code, pointers, type information},
  abstract = {Existing approaches for detecting type errors in unsafe languages are limited.
Static analysis methods are imprecise, 
and often require source-level changes, while
most dynamic methods check only memory properties
(bounds, liveness, etc.), owing to a lack of run-time type information. 
This paper describes \textsf{libcrunch}, 
a system for binary-compatible \emph{run-time} type checking of unmodified
unsafe code, currently focusing on C.
Practical experience shows that our prototype implementation 
is easily applicable to many real codebases without source-level modification,
correctly flags programmer errors with a very low rate of false positives,
offers a very low run-time overhead, 
and covers classes of error caught by no previously existing tool.},
  pdf = {http://www.cl.cam.ac.uk/~pes20/rems/papers/typebugs.pdf}
}
@inproceedings{kell_missing_2016,
  author = {Stephen Kell and Dominic P. Mulligan and Peter Sewell},
  title = {The missing link: explaining {ELF} static linking, semantically},
  booktitle = {\textbf{OOPSLA 2016}: Proceedings of the ACM SIGPLAN International Conference on Object-Oriented Programming, Systems, Languages, and Applications},
  conf = {OOPSLA 2016},
  series = {OOPSLA 2016},
  year = {2016},
  month = nov,
  location = {Amsterdam, The Netherlands},
  numpages = {17},
  publisher = {ACM},
  address = {New York, NY, USA},
  keywords = {Linking, formal specification, Executable and Linkable Format (ELF), theorem-proving},
  abstract = {Beneath the surface, software usually depends on complex \emph{linker
  behaviour} to work as intended.  Even linking
\texttt{hello\_world.c} is surprisingly involved, and systems software
such as \texttt{libc} and operating system kernels rely on a host of
linker features.  But linking is poorly understood by working
programmers and has largely been neglected by language researchers.

In this paper we survey the many use-cases that linkers support and
the poorly specified \emph{linker speak} by which they are controlled:
metadata in object files, command-line options, and linker-script
language.  We provide the first validated formalisation of a realistic
executable and linkable format (ELF), and capture aspects of the
Application Binary Interfaces for four mainstream platforms (AArch64,
AMD64, Power64, and IA32).  Using these, we develop an executable
specification of static linking, covering (among other things) enough to link
small C programs (we use the example of \textsf{bzip2}) into a correctly
running executable.  We
provide our specification in Lem and Isabelle/HOL forms.  This is the
first formal specification of mainstream linking.  We have used the
Isabelle/HOL version to prove a sample
correctness property for one case of AMD64 ABI relocation,
demonstrating that the specification supports formal proof, and as a
first step towards the much more ambitious goal of verified linking.
Our work should enable several novel strands of research, including
linker-aware verified compilation and program analysis, and better
languages for controlling linking.
},
  pdf = {http://www.cl.cam.ac.uk/~pes20/rems/papers/oopsla-elf-linking-2016.pdf}
}
@inproceedings{operational-semantics-c11-2016,
  author = {Kyndylan Nienhuis and Kayvan Memarian and Peter Sewell},
  title = {An operational semantics for {C/C++11} concurrency},
  booktitle = {\textbf{OOPSLA 2016}: Proceedings of the ACM SIGPLAN International Conference on Object-Oriented Programming, Systems, Languages, and Applications},
  conf = {OOPSLA 2016},
  location = {Amsterdam, The Netherlands},
  numpages = {18},
  publisher = {ACM},
  address = {New York, NY, USA},
  month = nov,
  year = {2016},
  abstract = {The C/C++11 concurrency model balances two goals: it is relaxed enough to be efficiently implementable and (leaving aside the ``thin-air'' problem) it is strong enough to give useful guarantees to programmers. It is mathematically precise and has been used in verification research and compiler testing. However, the model is expressed in an axiomatic style, as predicates on complete candidate executions. This suffices for computing the set of allowed executions of a small litmus test, but it does not directly support the incremental construction of executions of larger programs. It is also at odds with conventional operational semantics, as used implicitly in the rest of the C/C++ standards.

Our main contribution is the development of an operational model for C/C++11 concurrency. This covers all the features of the previous formalised axiomatic model, and we have a mechanised proof that the two are equivalent, in Isabelle/HOL. We also integrate this semantics with an operational semantics for sequential C (described elsewhere); the combined semantics can incrementally execute programs in a small fragment of C.

Doing this uncovered several new aspects of the C/C++11 model: we show that one cannot build an equivalent operational model that simply follows program order, sequential consistent order, or the synchronises-with order. The first negative result is forced by hardware-observable behaviour, but the latter two are not, and so might be ameliorated by changing C/C++11. More generally, we hope that this work, with its focus on incremental construction of executions, will inform the future design of new concurrency models.},
  pdf = {http://www.cl.cam.ac.uk/~pes20/rems/papers/nienhuis-oopsla-2016.pdf}
}
@misc{N2089,
  optkey = {},
  author = {Kayvan Memarian and Peter Sewell},
  title = {Clarifying Unspecified Values (Draft Defect Report or Proposal for C2x)},
  howpublished = {ISO SC22 WG14 N2089},
  html = {http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2089.htm},
  month = sep,
  year = 2016,
  optnote = {},
  optannote = {}
}
@misc{N2090,
  optkey = {},
  author = {Kayvan Memarian and Peter Sewell},
  title = {Clarifying Pointer Provenance (Draft Defect Report or Proposal for C2x)},
  howpublished = {ISO SC22 WG14 N2090},
  html = {http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2090.htm},
  month = sep,
  year = 2016,
  optnote = {},
  optannote = {}
}
@misc{N2091,
  optkey = {},
  author = {Kayvan Memarian and Peter Sewell},
  title = {Clarifying Trap Representations (Draft Defect Report or Proposal for C2x)},
  howpublished = {ISO SC22 WG14 N2091},
  html = {http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2091.htm},
  month = sep,
  year = 2016,
  optnote = {},
  optannote = {}
}
@inproceedings{Chisnall2017,
  author = {Chisnall, David and Davis, Brooks and Gudka, Khilan and Brazdil, David and Joannou, Alexandre and Woodruff, Jonathan and Markettos, A. Theodore and Maste, J. Edward and Norton, Robert and Son, Stacey and Roe, Michael and Moore, Simon W. and Neumann, Peter G. and Laurie, Ben and Watson, Robert N.M.},
  title = {{CHERI JNI: Sinking the Java Security Model into the C}},
  booktitle = {\textbf{ASPLOS 2017}: Proceedings of the Twenty-Second International Conference on Architectural Support for Programming Languages and Operating Systems},
  series = {ASPLOS '17},
  conf = {ASPLOS 2017},
  year = {2017},
  month = apr,
  isbn = {978-1-4503-4465-4},
  location = {Xi'an, China},
  pages = {569--583},
  numpages = {15},
  url = {http://doi.acm.org/10.1145/3037697.3037725},
  doi = {10.1145/3037697.3037725},
  acmid = {3037725},
  publisher = {ACM},
  address = {New York, NY, USA},
  keywords = {architecture, capability systems, cheri, compartmentalization, compilers, hardware security, java, jni, language security, memory protection, sandboxing},
  abstract = {Java provides security and robustness by building a high-level security model atop the foundation of memory protection. Unfortunately, any native code linked into a Java program -- including the million lines used to implement the standard library -- is able to bypass both the memory protection and the higher-level policies. We present a hardware-assisted implementation of the Java native code interface, which extends the guarantees required for Java's security model to native code.

Our design supports safe direct access to buffers owned by the JVM, including hardware-enforced read-only access where appropriate. We also present Java language syntax to declaratively describe isolated compartments for native code.

We show that it is possible to preserve the memory safety and isolation requirements of the Java security model in C code, allowing native code to run in the same process as Java code with the same impact on security as running equivalent Java code. Our approach has a negligible impact on performance, compared with the existing unsafe native code interface. We demonstrate a prototype implementation running on the CHERI microprocessor synthesized in FPGA.}
}
@inproceedings{DBLP:conf/popl/KumarMNO14,
  author = {Ramana Kumar and
               Magnus O. Myreen and
               Michael Norrish and
               Scott Owens},
  title = {{CakeML}: a verified implementation of {ML}},
  booktitle = {\textbf{POPL 2014}: The 41st Annual {ACM} {SIGPLAN-SIGACT} Symposium on Principles of
               Programming Languages,  San Diego, CA, USA},
  conf = {POPL 2014},
  pages = {179--192},
  year = {2014},
  month = jan,
  optcrossref = {DBLP:conf/popl/2014},
  url = {http://doi.acm.org/10.1145/2535838.2535841},
  doi = {10.1145/2535838.2535841},
  timestamp = {Thu, 09 Jan 2014 08:32:32 +0100},
  biburl = {http://dblp.uni-trier.de/rec/bib/conf/popl/KumarMNO14},
  bibsource = {dblp computer science bibliography, http://dblp.org},
  abstract = {
      We have developed and mechanically verified an ML system called CakeML, which supports a substantial subset of Standard ML. CakeML is implemented as an interactive read-eval-print loop (REPL) in x86-64 machine code. Our correctness theorem ensures that this REPL implementation prints only those results permitted by the semantics of CakeML. Our verification effort touches on a breadth of topics including lexing, parsing, type checking, incremental and dynamic compilation, garbage collection, arbitrary-precision arithmetic, and compiler bootstrapping.

      Our contributions are twofold. The first is simply in building a system that is end-to-end verified, demonstrating that each piece of such a verification effort can in practice be composed with the others, and ensuring that none of the pieces rely on any over-simplifying assumptions. The second is developing novel approaches to some of the more challenging aspects of the verification. In particular, our formally verified compiler can bootstrap itself: we apply the verified compiler to itself to produce a verified machine-code implementation of the compiler. Additionally, our compiler proof handles diverging input programs with a lightweight approach based on logical timeout exceptions. The entire development was carried out in the HOL4 theorem prover.}
}
@article{DBLP:journals/jfp/MyreenO14,
  author = {Magnus O. Myreen and
               Scott Owens},
  title = {Proof-producing translation of higher-order logic into pure and stateful
               {ML}},
  journal = {\textbf{J. Funct. Program.}},
  volume = {24},
  number = {2-3},
  pages = {284--315},
  year = {2014},
  month = jan,
  url = {http://dx.doi.org/10.1017/S0956796813000282},
  doi = {10.1017/S0956796813000282},
  timestamp = {Mon, 26 May 2014 10:31:52 +0200},
  biburl = {http://dblp.uni-trier.de/rec/bib/journals/jfp/MyreenO14},
  bibsource = {dblp computer science bibliography, http://dblp.org},
  abstract = {
    The higher-order logic found in proof assistants such as Coq and various HOL systems provides a convenient setting for the development and verification of functional programs. However, to efficiently run these programs, they must be converted (or `extracted') to functional programs in a programming language such as ML or Haskell. With current techniques, this step, which must be trusted, relates similar looking objects that have very different semantic definitions, such as the set-theoretic model of a logic and the operational semantics of a programming language. In this paper, we show how to increase the trustworthiness of this step with an automated technique. Given a functional program expressed in higher-order logic, our technique provides the corresponding program for a functional language defined with an operational semantics, and it provides a mechanically checked theorem relating the two. This theorem can then be used to transfer verified properties of the logical function to the program. We have implemented our technique in the HOL4 theorem prover, translating functions to a subset of Standard ML, and have applied the implementation to examples including functional data structures, a parser generator, cryptographic algorithms, a garbage collector and the 500-line kernel of the HOL light theorem prover. This paper extends our previous conference publication with new material that shows how functions defined in terms of a state-and-exception monad can be translated, with proofs, into stateful ML code. The HOL light example is also new.}
}
@inproceedings{YongKiamMKFON16,
  author = {Tan, Yong Kiam and Magnus O. Myreen and Ramana Kumar and
Anthony Fox and Scott Owens and Michael Norrish},
  title = {A New Verified Compiler Backend for {CakeML}},
  booktitle = {Proc. \textbf{ICFP 2016}: International Conference on Functional Programming},
  conf = {ICFP 2016},
  year = 2016,
  month = sep,
  doi = {10.1145/2951913.2951924},
  pdf = {https://cakeml.org/icfp16.pdf},
  publisher = {ACM Press},
  abstract = {
    We have developed and mechanically verified a new compiler backend
    for CakeML.  Our new compiler features a sequence of intermediate
    languages that allows it to incrementally compile away high-level
    features and enables verification at the right levels of semantic
    detail.  In this way, it resembles mainstream (unverified)
    compilers for strict functional languages.  The compiler supports
    efficient curried multi-argument functions, configurable data
    representations, exceptions that unwind the call stack, register
    allocation, and more.  The compiler targets several architectures:
    x86-64, ARMv6, ARMv8, MIPS-64, and RISC-V.

    In this paper, we present the overall structure of the compiler,
    including its 12 intermediate languages, and explain how
    everything fits together.  We focus particularly on the
    interaction between the verification of the register allocator and
    the garbage collector, and memory representations.  The entire
    development has been carried out within the HOL4 theorem prover.
  }
}
@inproceedings{FoxMTK17,
  author = {Anthony C. J. Fox and
               Magnus O. Myreen and
               Yong Kiam Tan and
               Ramana Kumar},
  title = {Verified compilation of {CakeML} to multiple
machine-code targets},
  booktitle = {Proc. \textbf{CPP 2017}: Certified Programs and Proofs},
  conf = {CPP 2017},
  year = {2017},
  month = jan,
  doi = {10.1145/3018610.3018621},
  editor = {Yves Bertot and Viktor Vafeiadis},
  pdf = {https://cakeml.org/cpp17.pdf},
  publisher = {{ACM}},
  abstract = {
    This paper describes how the latest CakeML compiler supports
    verified compilation down to multiple realistically modelled
    target architectures.  In particular, we describe how the compiler
    definition, the various language semantics, and the correctness
    proofs were organised to minimize target-specific overhead.  With
    our setup we have incorporated compilation to four 64-bit
    architectures, ARMv8, x86-64, MIPS-62, RISC-V, and one 32-bit
    architecture, ARMv6.  Our correctness theorem allows interference
    from the environment: the top-level correctness statement takes
    into account execution of foreign code and per-instruction
    interference from external processes, such as interrupt handlers
    in operating systems.  The entire CakeML development is formalised
    in the HOL4 theorem prover.

  }
}
@inproceedings{kell_towards_20XX,
  author = {Stephen Kell},
  title = {Towards a dynamic object model within {Unix} processes},
  booktitle = {\textbf{Onward! 2015}: 2015 {ACM} International Symposium on New Ideas, New Paradigms, and
               Reflections on Programming and Software,  Pittsburgh,
               PA, USA},
  conf = {Onward! 2015},
  pages = {224--239},
  year = {2015},
  month = oct,
  xcrossref = {DBLP:conf/oopsla/2015onward},
  url = {http://doi.acm.org/10.1145/2814228.2814238},
  doi = {10.1145/2814228.2814238},
  timestamp = {Wed, 28 Oct 2015 16:51:55 +0100},
  biburl = {http://dblp.uni-trier.de/rec/bib/conf/oopsla/Kell15},
  bibsource = {dblp computer science bibliography, http://dblp.org},
  abstract = {Programmers face much complexity from the co-existence of 
``native'' (Unix-like) and virtual machine (VM) ``managed'' run-time environments.
Rather than having VMs replace Unix processes, we investigate whether it makes
sense for the latter to ``become VMs'', by evolving Unix's 
user-level services to subsume those of VMs.
We survey the (little-understood) VM-like features in modern Unix, noting 
common shortcomings: a lack of semantic metadata (``type information'')
and the inability to bind from objects ``back'' to their metadata. 
We describe the design and implementation of 
a system, \textsf{liballocs}, which adds these capabilities in a highly compatible way,
and explore its consequences.},
  pdf = {http://www.cl.cam.ac.uk/~srk31/research/papers/kell15towards-preprint.pdf}
}
@inproceedings{kell:some:2017,
  author = {Kell, Stephen},
  title = {Some were meant for {C}: the endurance of an unmanageable language},
  booktitle = {\textbf{Onward! 2017}: Proceedings of the 2017 ACM International Symposium on New Ideas, New Paradigms, and Reflections on Programming \& Software},
  conf = {Onward! 2017},
  series = {Onward! 2017},
  year = {2017},
  month = oct,
  location = {Vancouver, British Columbia, Canada},
  numpages = {18},
  pdf = {http://www.cl.cam.ac.uk/~srk31/research/papers/kell17some-preprint.pdf},
  doi = {10.1145/3133850.3133867},
  publisher = {ACM},
  address = {New York, NY, USA},
  keywords = {systems programming, virtual machine, managed languages, safety, undefined behavior},
  abstract = {The C language leads a double life: as an application
programming language of yesteryear, perpetuated by
circumstance, and as a systems programming language
which remains a weapon of choice decades after its creation. This essay is a C programmer's reaction to the call
to abandon ship. It questions several properties commonly held to define the experience of using C; these
include unsafety, undefined behaviour, and the motivation of performance. It argues all these are in fact
inessential; rather, it traces C's ultimate strength to a
communicative design which does not fit easily within
the usual conception of ``a programming language'', but
can be seen as a counterpoint to so-called ``managed
languages''. This communicativity is what facilitates the
essential aspect of system-building: creating parts which
interact with other, remote parts---being ``alongside'' not
``within''. }
}
@article{YALLOP2017,
  title = {A modular foreign function interface},
  journal = {\textbf{Science of Computer Programming}},
  year = {2017},
  month = apr,
  issn = {0167-6423},
  doi = {https://doi.org/10.1016/j.scico.2017.04.002},
  url = {http://www.sciencedirect.com/science/article/pii/S0167642317300709},
  pdf = {https://www.cl.cam.ac.uk/~jdy22/papers/a-modular-foreign-function-interface.pdf},
  author = {Jeremy Yallop and David Sheets and Anil Madhavapeddy},
  keywords = {Foreign functions},
  keywords = {Functional programming},
  keywords = {Modularity},
  abstract = {Abstract foreign function interfaces are typically organised monolithically, tying together the specification of each foreign function with the mechanism used to make the function available in the host language. This leads to inflexible systems, where switching from one binding mechanism to another (say from dynamic binding to static code generation) often requires changing tools and rewriting large portions of code. We show that ML-style module systems support exactly the kind of abstraction needed to separate these two aspects of a foreign function binding, leading to declarative foreign function bindings that support switching between a wide variety of binding mechanisms -- static and dynamic, synchronous and asynchronous, etc. -- with no changes to the function specifications. Note. This is a revised and expanded version of an earlier paper, Declarative Foreign Function Binding Through Generic Programming. This paper brings a greater focus on modularity, and adds new sections on error handling, and on the practicality of the approach we describe.}
}
@inproceedings{Watt:2018:MVW:3176245.3167082,
  author = {Watt, Conrad},
  title = {Mechanising and Verifying the {WebAssembly} Specification},
  conf = {CPP 2018},
  booktitle = {\textbf{CPP 2018}: Proceedings of the 7th ACM SIGPLAN International Conference on Certified Programs and Proofs},
  series = {CPP 2018},
  year = {2018},
  isbn = {978-1-4503-5586-5},
  location = {Los Angeles, CA, USA},
  pages = {53--65},
  numpages = {13},
  url = {http://doi.acm.org/10.1145/3167082},
  doi = {10.1145/3167082},
  acmid = {3167082},
  publisher = {ACM},
  address = {New York, NY, USA},
  keywords = {bytecode, reduction, soundness, stack machine},
  pdf = {http://www.cl.cam.ac.uk/users/pes20/mechanising-and-verifying-the-webassembly-specification.pdf},
  abstract = {WebAssembly is a new low-level language currently being implemented in all major web browsers. It is designed to become the universal compilation target for the web, obsoleting existing solutions in this area, such as asm.js and Native Client. The WebAssembly working group has incorporated formal techniques into the development of the language, but their efforts so far have focussed on pen and paper formal specification.

We present a mechanised Isabelle specification for the WebAssembly language, together with a verified executable interpreter and type checker. Moreover, we present a fully mechanised proof of the soundness of the WebAssembly type system, and detail how our work on this proof has exposed several issues with the official WebAssembly specification, influencing its development. Finally, we give a brief account of our efforts in performing differential fuzzing of our interpreter against industry implementations.}
}
@inproceedings{Chargueraud2018JSExplain,
  author = {Chargu{\'e}raud, Arthur and Schmitt, Alan and Wood, Thomas},
  title = {{JSExplain}: a Double Debugger for {JavaScript}},
  booktitle = {\textbf{WWW '18 Companion}: The 2018 Web Conference Companion, April 23--27, 2018, Lyon, France},
  conf = {WWW},
  year = {2018},
  month = apr,
  publisher = {ACM},
  abstract = {We present JSExplain, a reference interpreter for JavaScript that closely
  follows the specification and that produces execution traces.
  These traces may be interactively investigated in a browser, with an
  interface that displays not only the code and the state of the interpreter,
  but also the code and the state of the interpreted program.
  Conditional breakpoints may be expressed with respect to both the interpreter
  and the interpreted program. In that respect, JSExplain is a double-debugger
  for the specification of JavaScript.},
  doi = {10.1145/3184558.3185969},
  pdf = {https://psvg.doc.ic.ac.uk/publications/Chargueraud2018JSExplain.pdf}
}
@inproceedings{FragosoSantos2018JaVerT,
  author = {{Fragoso Santos}, Jos{\'{e}} and Maksimovi\'{c}, Petar and Naud\v{z}i\={u}nien\.{e}, Daiva and Wood, Thomas and Gardner, Philippa},
  title = {{JaVerT: JavaScript verification toolchain}},
  booktitle = {\textbf{POPL 2018}},
  conf = {POPL 2018},
  optjournal = {{PACMPL}},
  optvolume = {2},
  optnumber = {\textbf{POPL}},
  optconf = {{POPL}},
  optpages = {50:1--50:33},
  year = {2018},
  month = jan,
  abstract = {The dynamic nature of JavaScript and its complex semantics make it a difficult target for logic-based verification. 
    We introduce JaVerT, a semi-automatic JavaScript Verification Toolchain, based on separation logic and aimed at the specialist developer wanting rich, 
    mechanically verified specifications of critical JavaScript code. To specify JavaScript programs, 
    we design abstractions that capture its key heap structures (for example, prototype chains and function closures), 
    allowing the developer to write clear and succinct specifications with minimal knowledge of the JavaScript internals. 
    To verify JavaScript programs, we develop JaVerT, a verification pipeline consisting of: JS-2-JSIL, 
    a well-tested compiler from JavaScript to JSIL, an intermediate goto language capturing the fundamental dynamic features of JavaScript; 
  JSIL Verify, a semi-automatic verification tool based on a sound JSIL separation logic; 
  and verified axiomatic specifications of the JavaScript internal functions. 
  Using JaVerT, we verify functional correctness properties of: 
  data-structure libraries (key-value map, priority queue) written in an object-oriented style; 
  operations on data structures such as binary search trees (BSTs) and lists; examples illustrating function closures; 
  and test cases from the official ECMAScript test suite. The verification times suggest that reasoning about larger, 
  more complex code using JaVerT is feasible. 
      },
  doi = {10.1145/3158138},
  pdf = {https://psvg.doc.ic.ac.uk/publications/FragosoSantos2018JaVerT.pdf}
}
@inproceedings{cerberus-popl2019,
  author = {Kayvan Memarian and Victor B. F. Gomes and Brooks Davis and Stephen Kell and Alexander Richardson and Robert N. M. Watson and Peter Sewell},
  title = {Exploring {C} Semantics and Pointer Provenance},
  optcrossref = {},
  optkey = {},
  conf = {POPL 2019},
  booktitle = {\textbf{POPL 2019}: Proc. 46th ACM SIGPLAN Symposium on Principles of Programming Languages},
  optbooktitle = {},
  year = {2019},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optpages = {},
  month = jan,
  optaddress = {},
  optorganization = {},
  optpublisher = {},
  note = {Proc. ACM Program. Lang. 3, POPL, Article 67},
  optannote = {},
  doi = {10.1145/3290380},
  pdf = {http://www.cl.cam.ac.uk/users/pes20/cerberus/cerberus-popl2019.pdf},
  supplementarymaterial = {http://www.cl.cam.ac.uk/users/pes20/cerberus/supplementary-material-popl2019},
  topic = {Cerberus},
  project = {http://www.cl.cam.ac.uk/~pes20/cerberus},
  abstract = {The semantics of pointers and memory objects in C has been a vexed question for many years.  C values cannot be treated as either purely abstract or purely concrete entities: the language exposes their representations, but compiler optimisations rely on analyses that reason about provenance and initialisation status, not just runtime representations. The ISO WG14 standard leaves much of this unclear, and in some respects differs with de facto standard usage --- which itself is difficult to investigate.

In this paper we explore the possible source-language semantics for memory objects and pointers, in ISO C and in C as it is used and implemented in practice, focussing especially on pointer provenance.  We aim to, as far as possible, reconcile the ISO C standard, mainstream compiler behaviour, and the semantics relied on by the corpus of existing C code.  We present two coherent proposals, tracking provenance via integers and not; both address many design questions. We highlight some pros and cons and open questions, and illustrate the discussion with a library of test cases.  We make our semantics executable as a test oracle, integrating it with the Cerberus semantics for much of the rest of C, which we have made substantially more complete and robust, and equipped with a web-interface GUI.  This allows us to experimentally assess our proposals on those test cases.  To assess their viability with respect to larger bodies of C code, we analyse the changes required and the resulting behaviour for a port of FreeBSD to CHERI, a research architecture supporting hardware capabilities, which (roughly speaking) traps on the memory safety violations which our proposals deem undefined behaviour. We also develop a new runtime instrumentation tool to detect possible provenance violations in normal C code, and apply it to some of the SPEC benchmarks.  We compare our proposal with a source-language variant of the twin-allocation LLVM semantics proposal of Lee et al.  Finally, we describe ongoing interactions with WG14, exploring how our proposals could be incorporated into the ISO standard.
},
  recent = {true}
}
@inproceedings{ct-wasm,
  author = {Conrad Watt and John Renner and Natalie Popescu and Sunjay Cauligi and Deian Stefan},
  title = {{CT-Wasm}: Type-Driven Secure Cryptography for the Web Ecosystem},
  optnote = {Under submission.},
  conf = {POPL 2019},
  booktitle = {\textbf{POPL 2019}: Proc. 46th ACM SIGPLAN Symposium on Principles of Programming Languages},
  year = {2019},
  month = jan,
  abstract = {A significant amount of both client and server-side cryptography is implemented in JavaScript. Despite widespread concerns about its security, no other language has been able to match the convenience that comes from its ubiquitous support on the “web ecosystem” - the wide variety of technologies that collectively underpins the modern World Wide Web. With the introduction of the new WebAssembly bytecode language (Wasm) into the web ecosystem, we have a unique opportunity to advance a principled alternative to existing JavaScript cryptography use cases which does not compromise this convenience.

We present Constant-Time WebAssembly (CT-Wasm), a type-driven, strict extension to WebAssembly which facilitates the verifiably secure implementation of cryptographic algorithms. CT-Wasm’s type system ensures that code written in CT-Wasm is both information flow secure and resistant to timing side channel attacks; like base Wasm, these guarantees are verifiable in linear time. Building on an existing Wasm mechanization, we mechanize the full CT-Wasm specification, prove soundness of the extended type system, implement a verified type checker, and give several proofs of the language’s security properties.

We provide two implementations of CT-Wasm: an OCaml reference interpreter and a native implementation for Node.js and Chromium that extends Google’s V8 engine. We also implement a CT-Wasm to Wasm rewrite tool that allows developers to reap the benefits of CT-Wasm’s type system today, while developing cryptographic algorithms for base Wasm environments. We evaluate the language, our implementations, and supporting tools by porting several cryptographic primitives - Salsa20, SHA-256, and TEA - and the full TweetNaCl library. We find that CT-Wasm is fast, expressive, and generates code that we experimentally measure to be constant-time.},
  pdf = {https://arxiv.org/pdf/1808.01348.pdf}
}
@inproceedings{CSEJS,
  author = {{Fragoso Santos}, Jos{\'{e}} and Maksimovi\'{c}, Petar and Sampaio, Gabriela and Gardner, Philippa},
  title = {{JaVerT 2.0}: Compositional Symbolic Execution for {JavaScript}},
  conf = {POPL 2019},
  booktitle = {\textbf{POPL 2019}: Proc. 46th ACM SIGPLAN Symposium on Principles of Programming Languages},
  year = {2019},
  month = jan,
  optnote = {Submitted for publication},
  abstract = {
We propose a novel, unified approach to the development of compositional symbolic execution tools, bridging the gap between classical symbolic execution and compositional program reasoning based on separation logic. Using this approach, we build a new analysis tool for JavaScript that follows the semantics of the language without any simplifications, with support for whole-program symbolic testing, verification, and, for the first time, automatic compositional testing based on bi-abduction. The meta-theory underpinning the tool is developed in a modular way, streamlining the proofs and informing the implementation. Our explicit treatment of symbolic execution errors allows us to give meaningful feedback to the developer during whole-program symbolic testing and guides the inference of resource of the bi-abductive execution. We evaluate the performance of our tool on a number of JavaScript data-structure libraries, demonstrating the scalability of whole-program symbolic testing, an improvement over our previous work on JavaScript verification, and automatic creation of useful specifications using bi-abduction.},
  pdf = {http://vtss.doc.ic.ac.uk/publications/FragosoSantos2019JaVerT.pdf}
}
@inproceedings{cheriABIsub,
  optkey = {},
  author = {Brooks Davis and
              Robert N. M. Watson and
              Alexander Richardson and
              Peter G. Neumann and
              Simon W. Moore and
              John Baldwin and
              David Chisnall and
              James Clarke and
              Nathaniel Wesley Filardo and 
Khilan Gudka and
              Alexandre Joannou and
              Ben Laurie and
              A. Theodore Markettos and
              J. Edward Maste and
              Alfredo Mazzinghi and
           Edward Tomasz Napierala and
              Robert M. Norton and
              Michael Roe and
              Peter Sewell and
              Stacey Son and
              Jonathan Woodruff},
  title = {{CheriABI}: Enforcing Valid Pointer Provenance and Minimizing
              Pointer Privilege in the {POSIX C} Run-time Environment},
  opthowpublished = {Submitted for publication},
  conf = {ASPLOS 2019},
  booktitle = {\textbf{ASPLOS 2019}: the 24th ACM International Conference on Architectural Support for Programming Languages and Operating Systems},
  month = apr,
  year = {2019},
  note = {Best paper award},
  url = {http://doi.acm.org/10.1145/3297858.3304042},
  doi = {10.1145/3297858.3304042},
  acmid = {3304042},
  publisher = {ACM},
  optnote = {To appear},
  optannote = {},
  abstract = {The CHERI architecture allows pointers to be implemented as
capabilities (rather than integer virtual addresses) in a manner that
is compatible with, and strengthens, the
semantics of the C language.
In addition to the spatial protections offered by conventional fat pointers,
CHERI capabilities offer strong integrity, enforced provenance validity, and
access monotonicity.
The stronger guarantees of these architectural capabilities must be reconciled with the real-world behavior of operating systems, run-time environments, and applications.
When the process model, user-kernel interactions, dynamic linking, and memory
management are all considered, we observe that simple derivation of architectural capabilities is insufficient to describe appropriate access to memory.
We bridge this conceptual gap with a notional \emph{abstract capability} that
describes the accesses that should be allowed at a given point in execution,
whether in the kernel or userspace.
To investigate this notion at scale, we describe the first adaptation of a
full C-language operating system (FreeBSD) with an enterprise database
(PostgreSQL) for complete spatial and referential memory safety.
We show that awareness of abstract capabilities, coupled with CHERI
architectural capabilities, can provide more complete protection, strong
compatibility, and acceptable performance overhead compared with the pre-CHERI
baseline and software-only approaches.
Our observations also have potentially significant implications for other
mitigation techniques.},
  pdf = {https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/201904-asplos-cheriabi.pdf}
}
@inproceedings{Dolan:2018:BDR:3192366.3192421,
  author = {Dolan, Stephen and Sivaramakrishnan, KC and Madhavapeddy, Anil},
  title = {Bounding Data Races in Space and Time},
  booktitle = {\textbf{PLDI 2018}: Proceedings of the 39th ACM SIGPLAN Conference on Programming Language Design and Implementation},
  series = {PLDI 2018},
  conf = {PLDI 2018},
  year = {2018},
  month = jun,
  isbn = {978-1-4503-5698-5},
  location = {Philadelphia, PA, USA},
  pages = {242--255},
  numpages = {14},
  url = {http://doi.acm.org/10.1145/3192366.3192421},
  doi = {10.1145/3192366.3192421},
  acmid = {3192421},
  publisher = {ACM},
  address = {New York, NY, USA},
  keywords = {operational semantics, weak memory models},
  pdf = {http://kcsrk.info/papers/pldi18-memory.pdf},
  abstract = {We propose a new semantics for shared-memory parallel programs that gives strong guarantees even in the presence of data races. Our \emph{local data race freedom} property guarantees that all data-race-free portions of programs exhibit sequential semantics. We provide a straightforward operational semantics and an equivalent axiomatic model, and evaluate an implementation for the OCaml programming language. Our evaluation demonstrates that it is possible to balance a comprehensible memory model with a reasonable (no overhead on x86, $\sim$0.6\% on ARM) sequential performance trade-off in a mainstream programming language.}
}
@phdthesis{PichonPhD,
  author = {Pichon-Pharabod, Jean},
  title = {A no-thin-air memory model for programming languages},
  school = {University of Cambridge},
  year = 2018,
  abstract = {Many hardware and compiler optimisations introduced to speed up single-threaded programs also introduce additional, sometimes surprising, behaviours for concurrent programs with shared mutable state. How many of these extra behaviours occur in practice depends on the combination of the hardware, compiler, runtime, etc. that make up the platform. A memory model, which prescribes what values each read of a concurrent program can read, allows programmers to determine whether a program behaves as expected without having to worry about the details of the platform. However, capturing these behaviours in a memory model without also including undesirable ``out-of-thin-air'' behaviours that do not occur in practice has proved elusive. The memory model of C and C++ allows out-of-thin-air behaviour, while the Java memory model fails to capture some behaviours that are introduced in practice by compiler optimisations.
In this thesis, we propose a memory model that forbids out-of-thin-air behaviour, yet allows the behaviours that do occur. Our memory model follows operational intuitions of how the hardware and compilers operate. We illustrate that it behaves as desired on a series of litmus tests. We show that it captures at least some of the expected behaviours, that it forms an envelope around some common compiler optimisations, and that it is implementable on common hardware using the expected compilation schemes. We also show that it supports some established programming idioms.}
}
@misc{N2219,
  optkey = {},
  author = {Kayvan Memarian and Victor Gomes and Peter Sewell},
  title = {N2219: Clarifying Pointer Provenance (Q1-Q20) v3},
  howpublished = {ISO SC22 WG14 N2219},
  html = {http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2219.htm},
  month = mar,
  year = 2018,
  project = {http://www.cl.cam.ac.uk/~pes20/cerberus},
  optnote = {},
  optannote = {},
  topic = {Cerberus}
}
@misc{N2220,
  optkey = {},
  author = {Kayvan Memarian and Victor Gomes and Peter Sewell},
  title = {N2220: Clarifying Trap Representations (Q47) v3},
  howpublished = {ISO SC22 WG14 N2220},
  month = mar,
  year = 2018,
  html = {http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2220.htm},
  project = {http://www.cl.cam.ac.uk/~pes20/cerberus},
  optnote = {},
  optannote = {},
  topic = {Cerberus}
}
@misc{N2221,
  optkey = {},
  author = {Kayvan Memarian and Victor Gomes and Peter Sewell},
  title = {N2221: Clarifying Unspecified Values (Q48-Q59) v3},
  howpublished = {ISO SC22 WG14 N2221},
  month = mar,
  year = 2018,
  html = {http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2221.htm},
  project = {http://www.cl.cam.ac.uk/~pes20/cerberus},
  optnote = {},
  optannote = {},
  topic = {Cerberus}
}
@misc{N2222,
  optkey = {},
  author = {Kayvan Memarian and Victor Gomes and Peter Sewell},
  title = {N2222: Further Pointer Issues (Q21-Q46)},
  howpublished = {ISO SC22 WG14 N2222},
  month = mar,
  year = 2018,
  html = {http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2222.htm},
  project = {http://www.cl.cam.ac.uk/~pes20/cerberus},
  optnote = {},
  optannote = {},
  topic = {Cerberus}
}
@misc{N2223,
  optkey = {},
  author = {Kayvan Memarian and Victor Gomes and Peter Sewell},
  title = {N2223: Clarifying the C Memory Object Model: Introduction to N2219 -- N2222},
  howpublished = {ISO SC22 WG14 N2223},
  month = mar,
  year = 2018,
  html = {http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2223.htm},
  project = {http://www.cl.cam.ac.uk/~pes20/cerberus},
  optnote = {},
  optannote = {},
  topic = {Cerberus}
}
@misc{N2263,
  optkey = {},
  author = {Kayvan Memarian and Victor Gomes and Peter Sewell},
  title = {N2263 Sewell,Clarifying Pointer Provenance (Q1-Q20) v4},
  howpublished = {ISO SC22 WG14 N2263},
  month = mar,
  year = 2018,
  html = {http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2263.htm},
  project = {http://www.cl.cam.ac.uk/~pes20/cerberus},
  optnote = {},
  optannote = {},
  topic = {Cerberus}
}
@inproceedings{cerberusBMC,
  author = {Stella Lau and Victor B. F. Gomes and Kayvan Memarian and Jean Pichon-Pharabod and Peter Sewell},
  title = {{Cerberus-BMC}: a Principled Reference Semantics and Exploration
  Tool for Concurrent and Sequential {C}},
  optcrossref = {},
  optkey = {},
  conf = {CAV 2019},
  booktitle = {\textbf{CAV 2019}: Proc. 31st International Conference on Computer-Aided Verification},
  optpages = {},
  year = {2019},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optaddress = {},
  month = jul,
  optorganization = {},
  optpublisher = {},
  note = {(tool paper)},
  optannote = {},
  pdf = {http://www.cl.cam.ac.uk/users/pes20/cerberus/bmc-cerberus.pdf},
  topic = {Cerberus},
  project = {http://www.cl.cam.ac.uk/~pes20/cerberus},
  abstract = {C remains central to our infrastructure, making verification of C code
an essential and much-researched topic, but the semantics of C is
remarkably complex, and important aspects of it are still unsettled,
leaving programmers and verification tool builders on shaky
ground.

This paper describes a tool, Cerberus-BMC, that for the first time
provides a principled reference semantics that simultaneously supports
(1) a choice of concurrency memory model (including substantial
fragments of the C11, RC11, and Linux kernel memory models), (2) a modern memory object model,
and (3) a well-validated thread-local semantics for a large fragment
of the language.
The tool should be useful for C programmers, compiler writers,
verification tool builders, and members of the C/C++ standards committees.
},
  recent = {true}
}
@inproceedings{weakening-webassembly-2019,
  author = {Conrad Watt and Andreas Rossberg and Jean Pichon-Pharabod},
  title = {Weakening {WebAssembly}},
  conf = {OOPSLA 2019},
  booktitle = {\textbf{OOPSLA 2019}: Proceedings of the ACM SIGPLAN International Conference on Object-Oriented Programming, Systems, Languages, and Applications},
  optbooktitle = {},
  year = {2019},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optpages = {},
  month = oct,
  optaddress = {},
  optorganization = {},
  optpublisher = {},
  note = {Proc. ACM Program. Lang. 3, OOPSLA, Article 133},
  optnote = {},
  optannote = {},
  doi = {10.1145/3360559},
  abstract = {WebAssembly (Wasm) is a safe, portable virtual instruction set that can be hosted in a wide range of environments, such as a Web browser. It is a low-level language whose instructions are intended to compile directly to bare hardware. While the initial version of Wasm focussed on single-threaded computation, a recent proposal extends it with low-level support for multiple threads and atomic instructions for synchronised access to shared memory. To support the correct compilation of concurrent programs, it is necessary to give a suitable specification of its memory model.

Wasm's language definition is based on a fully formalised specification that carefully avoids undefined behaviour. We present a substantial extension to this semantics, incorporating a relaxed memory model, along with a few proposed extensions. Wasm's memory model is unique in that its linear address space can be dynamically grown during execution, while all accesses are bounds-checked. This leads to the novel problem of specifying how observations about the size of the memory can propagate between threads. We argue that, considering desirable compilation schemes, we cannot give a sequentially consistent semantics to memory growth.

We show that our model provides sequential consistency for data-race-free executions (SC-DRF). However, because Wasm is to run on the Web, we must also consider interoperability of its model with that of JavaScript. We show, by counter-example, that JavaScript's memory model is not SC-DRF, in contrast to what is claimed in its specification. We propose two axiomatic conditions that should be added to the JavaScript model to correct this difference.

We also describe a prototype SMT-based litmus tool which acts as an oracle for our axiomatic model, visualising its behaviours, including memory resizing.
},
  pdf = {https://dl.acm.org/ft_gateway.cfm?id=3360559}
}
@inproceedings{Watt2019Program,
  author = {Watt, Conrad and Maksimovic, Petar and Krishnaswami, Neelakantan R. and Gardner, Philippa},
  title = {A Program Logic for First-Order Encapsulated {WebAssembly}},
  conf = {ECOOP 2019},
  booktitle = {\textbf{ECOOP 2019}: Proceedings of the 33 \textsuperscript{rd} European Conference on Object-Oriented Programming). },
  pages = {10:1–10:30 },
  month = jul,
  year = {2019},
  doi = { 10.4230/LIPIcs.ECOOP.2019.10},
  abstract = { We introduce Wasm Logic, a sound program logic for first-order, encapsulated WebAssembly. We
  design a novel assertion syntax, tailored to WebAssembly’s stack-based semantics and the strong
  guarantees given by WebAssembly’s type system, and show how to adapt the standard separation
  logic triple and proof rules in a principled way to capture WebAssembly’s uncommon structured
  control flow. Using Wasm Logic, we specify and verify a simple WebAssembly B-tree library, giving
  abstract specifications independent of the underlying implementation. We mechanise Wasm Logic
  and its soundness proof in full in Isabelle/HOL. As part of the soundness proof, we formalise
  and fully mechanise a novel, big-step semantics of WebAssembly, which we prove equivalent, up
  to transitive closure, to the original WebAssembly small-step semantics. Wasm Logic is the first
  program logic for WebAssembly, and represents a first step towards the creation of static analysis
  tools for WebAssembly.}
}
@techreport{UCAM-CL-TR-947,
  author = {Watson, Robert N. M. and Richardson, Alexander and Davis,
          	  Brooks and Baldwin, John and Chisnall, David and Clarke,
          	  Jessica and Filardo, Nathaniel and Moore, Simon W. and
          	  Napierala, Edward and Sewell, Peter and Neumann, Peter G.},
  title = {{CHERI C/C++ Programming Guide}},
  year = 2020,
  month = jun,
  url = {https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-947.pdf},
  institution = {University of Cambridge, Computer Laboratory},
  number = {UCAM-CL-TR-947},
  abstract = {This document is a brief introduction to the CHERI C/C++ programming languages. We explain the principles underlying these language variants, and their grounding in CHERI’s multiple architectural instantiations: CHERI-MIPS, CHERI-RISC-V, and Arm’s Morello. We describe the most commonly encountered differences between these dialects and C/C++ on conventional architectures, and where existing software may require minor changes. We document new compiler warnings and errors that may be experienced compiling code with the CHERI Clang/LLVM compiler, and suggest how they may be addressed through typically minor source-code changes. We explain how modest language extensions allow selected software, such as memory allocators, to further refine permissions and bounds on pointers. This guidance is based on our experience adapting the FreeBSD operating-system userspace, and applications such as PostgreSQL and WebKit, to run in a CHERI C/C++ capability-based programming environment. We conclude by recommending further reading.},
  project = {https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/},
  topic = {cheri},
  recent = {true}
}
@inproceedings{Xiong2020Data,
  author = {Shale Xiong and
            Andrea Cerone and
            Azalea Raad and
            Philippa Gardner},
  title = {Data Consistency in Transactional Storage Systems: a Centralised Approach},
  conf = {ECOOP 2020},
  booktitle = {\textbf{ECOOP 2020}: 34th European Conference on Object-Oriented Programming},
  project = {concurrency},
  year = {2020},
  abstract = {We introduce an interleaving operational semantics for describing the
client-observable behaviour of atomic transactions on distributed key-value stores.
Our semantics builds on abstract states comprising centralised, global key-value
stores and partial client views. We provide operational definitions of consistency
models for our abstract states which we show to be equivalent to the well known
declarative definitions of consistency model on abstract executions. We explore
two applications, verifying that the COPS replicated database and the Clock-SI
partitioned database satisfy their consistency models using trace refinement,
and proving invariant properties of client programs.}
}
@inproceedings{FragosoSantos2020Gillian,
  author = {Jos{\'{e}} {Fragoso Santos} and Petar Maksimovic and Sacha{-}{\'{E}}lie Ayoun and Philippa Gardner},
  title = { Gillian, Part I: A Multi-language Platform for Symbolic Execution },
  conf = {PLDI 2020},
  booktitle = {\textbf{PLDI 2020}: Proceedings of the 41st ACM SIGPLAN
International Conference on Programming Language Design and Implementation},
  year = {2020},
  doi = {10.1145/3385412.3386014},
  project = { gillian },
  abstract = { We introduce Gillian, a platform for developing symbolic
analysis tools for programming languages. Here, we focus on
the symbolic execution engine at the heart of Gillian, which
is parametric on the memory model of the target language.
We give a formal description of the symbolic analysis and
a modular implementation that closely follows this description.
We prove a parametric soundness result, introducing
restriction on abstract states, which generalises path conditions
used in classical symbolic execution. We instantiate
Gillian to obtain trusted symbolic testing tools for JavaScript
and C, and use these tools to find bugs in real-world code,
thus demonstrating the viability of our parametric approach.}
}
@inproceedings{Sampaio2020Trusted,
  author = {Gabriela Sampaio and
            Jos{\'{e}} {Fragoso Santos} and
            Petar Maksimovic
            and Philippa Gardner},
  title = {A Trusted Infrastructure for Symbolic Analysis of Event-Driven Web Applications},
  conf = {ECOOP 2020},
  booktitle = {\textbf{ECOOP 2020}: 34th European Conference on Object-Oriented Programming},
  project = { web },
  year = {2020},
  abstract = {We introduce a trusted infrastructure for symbolic analysis of modern
event-driven Web applications. This infrastructure consists of reference
implementations of the DOM Core Level 1 and UI Events, JavaScript Promises,
and the JavaScript async/await APIs, all underpinned by a simple Core Event
Semantics that is sufficiently expressive to describe the event models underlying
all these APIs. Our reference implementations are trustworthy in that they follow
the API respective standards line-by-line and they are thoroughly tested against
the appropriate official test-suites, passing all the applicable tests. Using the
Core Events Semantics and the reference implementations, we develop JaVerT.Click,
a symbolic execution tool for JavaScript that, for the first time, supports reasoning
about JavaScript programs that use some (possibly all) these APIs. Using JaVerT.Click,
we perform comprehensive symbolic testing of the events module of Cash, a widely-used jQuery
alternative, creating a symbolic test suite with 100 per cent line coverage, establishing bounded
correctness of several essential properties of the module, and discovering two subtle,
previously unknown bugs.}
}
@phdthesis{Xiong2020Parametric,
  author = {Shale Xiong},
  title = {Parametric Operational Semantics for Consistency Models},
  school = {Imperial College London},
  year = {2020},
  project = {concurrency},
  phdthesis = {Y},
  abstract = {The cloud has become popular for its low cost, high availability and
high fault-tolerance-queue or de-queue, for example Amazon Web Service (AWS) and
Google Cloud Platform (GCP. Those cloud infrastructures provide fixed interface,
to hide the complex internal implementation that consists of hundreds of thousands
of machines globally that work together as a whole system, known as a distributed
system. Clients of those systems only need to work with the abstract interfaces.
Transactions are the de facto interfaces in modern distributed databases.
Because of the CAP theorem, a distributed system must sacrifice strong consistency
to achieve high availability and high fault-tolerance. Engineers and researchers
have proposed many reference implementations in specific setting for various weak
consistency models. However, there have been little work on formalising the interfaces.
We introduce an interleaving operational semantics for describing such interfaces,
with the focus on the client-observable behaviour of atomic transactions on
distributed key-value stores. Our semantics builds on abstract states comprising
centralised, global key-value stores and partial client views. We provide
operational definitions of consistency models for our key-value stores which
are shown to be equivalent to the well-known declarative definitions of consistency
models for execution graphs. We explore two immediate applications of our semantics:
specific protocols of databases for a specific consistency can be verified in
our centralised semantics; programs can be directly shown to have invariant
properties such as robustness results against a weak consistency model.}
}
@inproceedings{cornucopia,
  author = {Nathaniel Wesley Filardo and Brett F. Gutstein and Jonathan Woodruff and Sam Ainsworth and Lucian Paul-Trifu and Brooks Davis and Hongyan Xia and Edward Tomasz Napierala and Alexander Richardson and John Baldwin and David Chisnall and Jessica Clarke and Khilan Gudka and Alexandre Joannou and A. Theodore Markettos and Alfredo Mazzinghi and Robert M. Norton and Michael Roe and Peter Sewell and Stacey Son and Timothy M. Jones and Simon W. Moore and Peter G. Neumann and and Robert N. M. Watson},
  conf = {Security and Privacy 2020},
  booktitle = {\textbf{Security and Privacy 2020}: Proceedings of the 41st IEEE Symposium on Security and Privacy (SP)},
  title = {Cornucopia: Temporal Safety for {CHERI} Heaps},
  year = {2020},
  volume = {},
  issn = {2375-1207},
  pages = {1507-1524},
  keywords = {},
  doi = {10.1109/SP40000.2020.00098},
  pdf = {https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/2020oakland-cornucopia.pdf},
  apollourl = {https://www.repository.cam.ac.uk/handle/1810/304040},
  publisherurl = {https://doi.ieeecomputersociety.org/10.1109/SP40000.2020.00098},
  publisher = {IEEE Computer Society},
  address = {Los Alamitos, CA, USA},
  month = {may},
  abstract = {Use-after-free violations of temporal memory safety continue to plague software systems, underpinning many high-impact exploits. The CHERI capability system shows great promise in achieving C and C++language spatial memory safety, preventing out-of-bounds accesses. Enforcing language-level temporal safety on CHERI requires capability revocation, traditionally achieved either via table lookups (avoided for performance in the CHERI design) or by identifying capabilities in memory to revoke them (similar to a garbage-collector sweep). CHERIvoke,a prior feasibility study, suggested that CHERI’s tagged capabilities could make this latter strategy viable, but modeled only architectural limits and did not consider the full implementation or evaluation of the approach.

Cornucopia is a lightweight capability revocation system for CHERI that implements non-probabilistic C/C++temporal memory safety for standard heap allocations. It extends the CheriBSD virtual-memory subsystem to track capability flow through memory and provides a concurrent kernel-resident revocation service that is amenable to multi-processor and hardware acceleration. We demonstrate an average overhead of less than 2\% and a worst-case of 8.9\% for concurrent revocation on compatible SPECCPU2006 benchmarks on a multi-core CHERI CPU on FPGA, and we validate Cornucopia against the Juliet test suite’s corpus of temporally unsafe programs. We test its compatibility with a large corpus of C programs by using a revoking allocator as the system allocator while booting multi-user CheriBSD. Cornucopia is a viable strategy for always-on temporal heap memory safety, suitable for production environments.
},
  project = {https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/},
  topic = {cheri},
  recent = {true}
}

This file was generated by bibtex2html 1.99.