rems-task2.bib

@inproceedings{rems-fmics14,
  title = {Randomised Testing of a Microprocessor Model Using {SMT}-Solver State Generation},
  author = {Brian Campbell and Ian Stark},
  booktitle = {\textbf{FMICS 2014}: Formal Methods for Industrial Critical Systems},
  conf = {FMICS 2014},
  year = 2014,
  month = sep,
  editor = {Fr{\'{e}}d{\'{e}}ric Lang and Francesco Flammini},
  volume = 8718,
  series = {Lecture Notes in Computer Science},
  pages = {185--199},
  publisher = {Springer},
  doi = {10.1007/978-3-319-10702-8_13},
  slides = {rems/fmics-m0-talk.pdf},
  preprint = {rems/fmics2014.pdf},
  pdf = {http://homepages.inf.ed.ac.uk/bcampbe2/rems/fmics2014.pdf},
  preprintnote = {The final publication is available at link.springer.com},
  abstract = {We validate a HOL4 model of the ARM Cortex-M0 microcontroller core by testing the model's behaviour on randomly chosen
instructions against a real chip.
The model and our intended application involve precise timing information about instruction execution, but the implementations are pipelined,
so checking the behaviour of single instructions would not give us sufficient confidence in the model. Thus we test the model using sequences
of randomly chosen instructions.
The main challenge is to meet the constraints on the initial and intermediate execution states: we must ensure that memory accesses are in
range and that we respect restrictions on the instructions. By careful
transformation of these constraints an off-the-shelf SMT solver can be
used to find suitable states for executing test sequences.
}
}

@article{rems-scico15,
  author = {Brian Campbell and Ian Stark},
  title = {Randomised Testing of a Microprocessor Model Using {SMT}-Solver State Generation},
  journal = {\textbf{Sci. Comput. Program.}},
  volume = 118,
  pages = {60--76},
  year = 2016,
  month = mar,
  issn = {0167-6423},
  doi = {10.1016/j.scico.2015.10.012},
  pdf = {http://homepages.inf.ed.ac.uk/bcampbe2/rems/scico2015.pdf},
  abstract = {We validate a HOL4 model of the ARM Cortex-M0 microcontroller core by testing the model’s behaviour on randomly chosen instructions against real chips from several manufacturers.
The model and our intended application involve precise timing information about instruction execution, but the implementations are pipelined, so checking the behaviour of single instructions would not give us sufficient confidence in the model. Thus we test the model using sequences of randomly chosen instructions.
The main challenge is to meet the constraints on the initial and intermediate execution states: we must ensure that memory accesses are in range and that we respect restrictions on the instructions. By careful transformation of these constraints an off-the-shelf SMT solver can be used to find suitable states for executing test sequences. We also use additional constraints to test our hypotheses about the timing anomalies encountered.}
}

@misc{HCSS2015-Flur,
  optkey = {},
  author = {S. Flur and K. Gray and G. Kerneis and D. Mulligan and C. Pulte and S. Sarkar and P. Sewell},
  title = {Rigorous Architectural Modelling for Production Multiprocessors},
  opthowpublished = {},
  month = may,
  year = 2015,
  note = {Presentation at HCSS 2015: the Fifteenth Annual
High Confidence Software and Systems Conference},
  optannote = {},
  abstract = {
Processor architectures are critical interfaces in computing, but they
are typically defined only by prose and pseudocode documentation.
This is especially problematic for the subtle concurrency behaviour of
weakly consistent multiprocessors such as ARM and IBM POWER: the
traditional documentation does not define precisely what
programmer-observable behaviour is (and is not) allowed for concurrent
code; it is not executable as a test oracle for pre-Silicon or
post-Silicon hardware testing; it is not executable as an emulator for
software testing; and it is not mathematically rigorous enough to
serve as a foundation for software verification.

We present a rigorous architectural envelope model for IBM POWER
multiprocessors, and similar work in progress for ARM, that aims to
support all of these for small-but-intricate test cases, integrating
an operational concurrency model with an ISA model for the sequential
behaviour of a substantial fragment of the user-mode instruction set
(largely automatically derived from the vendor pseudocode, and
expressed in a new ISA description language).  The key question is the
interface between these two: we have to allow all the required
relaxed-memory behaviour, without overcommitting to some particular
microarchitectural model.  Our models can be automatically translated
into executable code, which, combined with front-ends for concurrency
litmus tests and ELF executables, can interactively or exhaustively
explore all the allowed behaviours of small test cases.
}
}

@inproceedings{DBLP:conf/asplos/AlglaveBDGKPSW15,
  author = {Jade Alglave and
               Mark Batty and
               Alastair F. Donaldson and
               Ganesh Gopalakrishnan and
               Jeroen Ketema and
               Daniel Poetzl and
               Tyler Sorensen and
               John Wickerson},
  title = {{GPU} Concurrency: Weak Behaviours and Programming Assumptions},
  booktitle = {\textbf{ASPLOS 2015}: Proceedings of the Twentieth International Conference on Architectural
               Support for Programming Languages and Operating Systems, Istanbul, Turkey},
  conf = {ASPLOS 2015},
  pages = {577--591},
  year = {2015},
  month = mar,
  optcrossref = {DBLP:conf/asplos/2015},
  url = {http://doi.acm.org/10.1145/2694344.2694391},
  doi = {10.1145/2694344.2694391},
  timestamp = {Thu, 26 Feb 2015 17:21:18 +0100},
  biburl = {http://dblp.uni-trier.de/rec/bib/conf/asplos/AlglaveBDGKPSW15},
  bibsource = {dblp computer science bibliography, http://dblp.org},
  abstract = {
Concurrency is pervasive and perplexing, particularly on graphics processing units (GPUs). Current specifications of languages and hardware are inconclusive; thus programmers often rely on folklore assumptions when writing software.

To remedy this state of affairs, we conducted a large empirical study of the concurrent behaviour of deployed GPUs. Armed with litmus tests (i.e. short concurrent programs), we questioned the assumptions in programming guides and vendor documentation about the guarantees provided by hardware. We developed a tool to generate thousands of litmus tests and run them under stressful workloads. We observed a litany of previously elusive weak behaviours, and exposed folklore beliefs about GPU programming---often supported by official tutorials---as false.

As a way forward, we propose a model of Nvidia GPU hardware, which correctly models every behaviour witnessed in our experiments. The model is a variant of SPARC Relaxed Memory Order (RMO), structured following the GPU concurrency hierarchy.
}
}

@inproceedings{CHERI-Oakland,
  author = {Robert N. M. Watson and Jonathan Woodruff and Peter G. Neumann and Simon W. Moore and Jonathan Anderson and David Chisnall and Nirav Dave and Brooks Davis and Khilan Gudka and Ben Laurie and Steven J. Murdoch and Robert Norton and Michael Roe and Stacey Son and Munraj Vadera},
  title = {{CHERI}: A Hybrid Capability-System Architecture for Scalable Software Compartmentalization},
  optcrossref = {},
  optkey = {},
  booktitle = {\textbf{Security and Privacy 2015}: {Proceedings of the 36th IEEE Symposium on Security and Privacy (``Oakland'')}},
  conf = {Security and Privacy 2015},
  optpages = {},
  year = {2015},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optaddress = {},
  month = may,
  optorganization = {},
  optpublisher = {},
  optnote = {},
  optannote = {},
  doi = {http://dx.doi.org/10.1109/SP.2015.9},
  pdf = {http://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/201505-oakland2015-cheri-compartmentalization.pdf},
  abstract = {CHERI extends a conventional RISC Instruction-Set Architecture, compiler, and operating system to support
fine-grained, capability-based memory protection to mitigate
memory-related vulnerabilities in C-language TCBs. We describe
how CHERI capabilities can also underpin a hardware-software
object-capability model for application compartmentalization
that can mitigate broader classes of attack. Prototyped as an
extension to the open-source 64-bit BERI RISC FPGA softcore processor, FreeBSD operating system, and LLVM compiler,
we demonstrate multiple orders-of-magnitude improvement in
scalability, simplified programmability, and resulting tangible
security benefits as compared to compartmentalization based on
pure Memory-Management Unit (MMU) designs. We evaluate
incrementally deployable CHERI-based compartmentalization
using several real-world UNIX libraries and applications.
}
}

@inproceedings{DBLP:conf/micro/GrayKMPSS15,
  author = {Kathryn E. Gray and
               Gabriel Kerneis and
               Dominic P. Mulligan and
               Christopher Pulte and
               Susmit Sarkar and
               Peter Sewell},
  title = {An integrated concurrency and core-{ISA} architectural envelope definition,
               and test oracle, for {IBM} {POWER} multiprocessors},
  abstract = {Weakly consistent multiprocessors such as ARM and IBM POWER have been with us for decades, but their subtle programmer-visible concurrency behaviour remains challenging, both to implement and to use; the traditional architecture documentation, with its mix of prose and pseudocode, leaves much unclear.

In this paper we show how a precise architectural envelope model for such architectures can be defined, taking IBM POWER as our example.  Our model specifies, for an arbitrary test program, the set of all its allowable executions, not just those of some particular implementation.  The model integrates an operational concurrency model with an ISA model for the fixed-point non-vector user-mode instruction set (largely automatically derived from the vendor pseudocode, and expressed in a new ISA description language).  The key question is the interface between these two: allowing all the required concurrency behaviour, without overcommitting to some particular microarchitectural implementation, requires a novel abstract structure.

Our model is expressed in a mathematically rigorous language that can be automatically translated to an executable test-oracle tool; this lets one either interactively explore or exhaustively compute the set of all allowed behaviours of intricate test cases, to provide a reference for hardware and software development.
},
  booktitle = {\textbf{MICRO 2015}: Proceedings of the 48th International Symposium on Microarchitecture,
            , Waikiki, HI, USA},
  conf = {MICRO 2015},
  pages = {635--646},
  year = {2015},
  month = dec,
  optcrossref = {DBLP:conf/micro/2015},
  url = {http://doi.acm.org/10.1145/2830772.2830775},
  pdf = {http://www.cl.cam.ac.uk/~pes20/micro-48-2015.pdf},
  doi = {10.1145/2830772.2830775},
  timestamp = {Sun, 10 Jan 2016 12:42:56 +0100},
  biburl = {http://dblp.uni-trier.de/rec/bib/conf/micro/GrayKMPSS15},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@inproceedings{DBLP:conf/popl/FlurGPSSMDS16,
  author = {Shaked Flur and
               Kathryn E. Gray and
               Christopher Pulte and
               Susmit Sarkar and
               Ali Sezgin and
               Luc Maranget and
               Will Deacon and
               Peter Sewell},
  title = {Modelling the {ARMv8} architecture, operationally: concurrency and {ISA}},
  abstract = {In this paper we develop semantics for key aspects of the ARMv8 multiprocessor architecture: the concurrency model and much of the 64-bit application-level instruction set (ISA).  Our goal is to clarify what the range of architecturally allowable behaviour is, and thereby to support future work on formal verification, analysis, and testing of concurrent ARM software and hardware.

Establishing such models with high confidence is intrinsically difficult: it involves capturing the vendor's architectural intent, aspects of which (especially for concurrency) have not previously been precisely defined.  We therefore first develop a concurrency model with a microarchitectural flavour, abstracting from many hardware implementation concerns but still close to hardware-designer intuition.  This means it can be discussed in detail with ARM architects.  We then develop a more abstract model, better suited for use as an architectural specification, which we prove sound w.r.t.~the first.

The instruction semantics involves further difficulties, handling the mass of detail and the subtle intensional information required to interface to the concurrency model.  We have a novel ISA description language, with a lightweight dependent type system, letting us do both with a rather direct representation of the ARM reference manual instruction descriptions.

We build a tool from the combined semantics that lets one explore, either interactively or exhaustively, the full range of architecturally allowed behaviour, for litmus tests and (small) ELF executables.  We prove correctness of some optimisations needed for tool performance.

We validate the models by discussion with ARM staff, and by comparison against ARM hardware behaviour, for ISA single- instruction tests and concurrent litmus tests.},
  booktitle = {\textbf{POPL 2016}: Proceedings of the 43rd Annual {ACM} {SIGPLAN-SIGACT} Symposium on
               Principles of Programming Languages, St. Petersburg,
               FL, USA },
  conf = {POPL 2016},
  pages = {608--621},
  year = {2016},
  month = jan,
  optcrossref = {DBLP:conf/popl/2016},
  url = {http://doi.acm.org/10.1145/2837614.2837615},
  pdf = {http://www.cl.cam.ac.uk/~pes20/popl16-armv8/top.pdf},
  doi = {10.1145/2837614.2837615},
  timestamp = {Fri, 08 Jan 2016 18:18:25 +0100},
  biburl = {http://dblp.uni-trier.de/rec/bib/conf/popl/FlurGPSSMDS16},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@inproceedings{DBLP:conf/memocode/NaylorM15,
  author = {Matthew Naylor and
       Simon W. Moore},
  title = {A generic synthesisable test bench},
  booktitle = {\textbf{MEMOCODE 2015}: {ACM/IEEE} International Conference on Formal
               Methods and Models
       for Codesign,  Austin, TX, USA},
  conf = {MEMOCODE 2015},
  pages = {128--137},
  year = {2015},
  month = sep,
  url = {http://dx.doi.org/10.1109/MEMCOD.2015.7340479},
  doi = {10.1109/MEMCOD.2015.7340479},
  timestamp = {Tue, 08 Dec 2015 17:08:30 +0100},
  biburl = {http://dblp.uni-trier.de/rec/bib/conf/memocode/NaylorM15},
  bibsource = {dblp computer science bibliography, http://dblp.org},
  pdf = {http://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/201509-memocode2015-bluecheck.pdf},
  abstract = {
Writing test benches is one of the most frequently-performed tasks in
the hardware development process.  The ability to reuse common
test bench features is therefore key to productivity.  In this paper,
we present a generic test bench, parameterised by a
specification of correctness, which can be used to test any
design.  Our test bench provides several important features, including
automatic test-sequence generation and shrinking of counter-examples,
and is fully synthesisable, allowing rigorous testing on FPGA as well
as in simulation.  The approach is easy to use, cheap to implement,
and encourages the formal specification of hardware components through
the reward of automatic testing and simple failure cases.
  }
}

@techreport{UCAM-CL-TR-876,
  author = {Watson, Robert N. M. and Neumann, Peter G. and Woodruff,
          	  Jonathan and Roe, Michael and Anderson, Jonathan and
          	  Chisnall, David and Davis, Brooks and Joannou, Alexandre
          	  and Laurie, Ben and Moore, Simon W. and Murdoch, Steven J.
          	  and Norton, Robert and Son, Stacey},
  title = {{Capability Hardware Enhanced RISC Instructions: CHERI
         	   Instruction-Set Architecture}},
  year = 2015,
  month = sep,
  pdf = {http://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-876.pdf},
  institution = {University of Cambridge, Computer Laboratory},
  number = {UCAM-CL-TR-876},
  abstract = {This technical report describes CHERI ISAv4, the fourth version of the Capability Hardware Enhanced RISC Instructions (CHERI) Instruction-Set Architecture (ISA). CHERI is being developed by SRI International and the University of Cambridge. This design captures four years of research, development, refinement, formal analysis, and testing, and is a substantial enhancement to the ISA version described in UCAM-CL-TR-850. Key improvements lie in tighter C-language integration, and more mature support for software object-capability models; these changes result from experience gained in adapting substantial software stacks to run on prototype hardware.

The CHERI instruction set is based on a hybrid capability-system architecture that adds new capability-system primitives to a commodity 64-bit RISC ISA enabling software to efficiently implement fine-grained memory protection and a hardware-software object-capability security model. These extensions support incrementally adoptable, high-performance, formally based, programmer-friendly underpinnings for fine-grained software decomposition and compartmentalization, motivated by and capable of enforcing the principle of least privilege.

The CHERI system architecture purposefully addresses known performance and robustness gaps in commodity ISAs that hinder the adoption of more secure programming models centered around the principle of least privilege. To this end, CHERI blends traditional paged virtual memory with a per-address-space capability model that includes capability registers, capability instructions, and tagged memory that have been added to the 64-bit MIPS ISA via a new capability coprocessor. CHERI also learns from the C-language fat-pointer literature: CHERI capabilities can describe not only regions of memory, but can also capture C pointer semantics allowing capabilities to be substituted for pointers in generated code.

CHERI’s hybrid system approach, inspired by the Capsicum security model, allows incremental adoption of capability-oriented software design: software implementations that are more robust and resilient can be deployed where they are most needed, while leaving less critical software largely unmodified, but nevertheless suitably constrained to be incapable of having adverse effects. For example, we are focusing conversion efforts on low-level TCB components of the system: separation kernels, hypervisors, operating system kernels, language runtimes, and userspace TCBs such as web browsers. Likewise, we see early-use scenarios (such as data compression, protocol parsing, image processing, and video processing) that relate to particularly high-risk software libraries, which are concentrations of both complex and historically vulnerability-prone code combined with untrustworthy data sources, while leaving containing applications unchanged.

This report describes the CHERI Instruction-Set Architecture (ISA) and design, and provides reference documentation and potential memory models, along with their requirements. It also briefly addresses the CHERI system hardware-software architecture, documenting our current thinking on integrating programming languages and operating systems with the CHERI hardware.}
}

@techreport{UCAM-CL-TR-877,
  author = {Watson, Robert N. M. and Chisnall, David and Davis, Brooks
                  and Koszek, Wojciech and Moore, Simon W. and Murdoch,
                  Steven J. and Neumann, Peter G. and Woodruff, Jonathan},
  title = {{Capability Hardware Enhanced RISC Instructions: CHERI
                   Programmer's Guide}},
  year = 2015,
  month = sep,
  pdf = {http://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-877.pdf},
  institution = {University of Cambridge, Computer Laboratory},
  number = {UCAM-CL-TR-877},
  abstract = {The CHERI Programmer's Guide documents the software environment for the Capability Hardware Enhanced RISC Instructions (CHERI) prototype developed by SRI International and the
University of Cambridge. The Guide is targeted at hardware and software developers working
with capability-enhanced software. It describes how to use the CHERI Clang/LLVM compiler
suite and CheriBSD operating system -- versions of the off-the-shelf LLVM suite and FreeBSD
operating system adapted to use CHERI’s protection features -- as well as implementation details of both.}
}

@inproceedings{NaylorFMCAD2016,
  author = {M. Naylor and S. W. Moore and A. Mujumdar},
  booktitle = {\textbf{FMCAD 2016}: Formal Methods in Computer-Aided Design},
  conf = {FMCAD 2016},
  title = {A Consistency Checker for Memory Subsystem Traces},
  year = {2016},
  month = sep,
  abstract = {Verifying the memory subsystem in a modern
shared-memory multiprocessor is a big challenge. Optimized
implementations are highly sophisticated, yet must provide subtle
consistency and liveness guarantees for the correct execution of
concurrent programs. We present a tool that supports efficient
specification-based testing of the memory subsystem against a
range of formally specified consistency models. Our tool operates
directly on the memory subsystem interface, promoting a compositional
approach to system-on-chip verification, and can be
used to search for simple failure cases – assisting rapid debug. It
has recently been incorporated into the development flows of two
open-source implementations -- Berkeley’s Rocket Chip (RISCV)
and Cambridge’s BERI (MIPS) -- where it has uncovered a
number of serious bugs.},
  pdf = {http://www.cl.cam.ac.uk/~pes20/rems/papers/fmcad2016-camera-ready.pdf},
  doi = {http://dx.doi.org/10.17863/CAM.4451}
}

@inproceedings{mixed17,
  author = {Shaked Flur and Susmit Sarkar and Christopher Pulte and Kyndylan Nienhuis and Luc Maranget and Kathryn E. Gray and Ali Sezgin and Mark Batty and Peter Sewell},
  title = {Mixed-size Concurrency: {ARM}, {POWER}, {C/C++11}, and {SC}},
  booktitle = {\textbf{POPL 2017}: The 44st Annual {ACM} {SIGPLAN-SIGACT} Symposium on Principles of
               Programming Languages,  Paris, France},
  conf = {POPL 2017},
  year = {2017},
  month = jan,
  pages = {429--442},
  numpages = {14},
  url = {http://doi.acm.org/10.1145/3009837.3009839},
  doi = {10.1145/3009837.3009839},
  abstract = {Previous work on the semantics of relaxed shared-memory concurrency has only considered the case in which each load reads the data of exactly one store.  In practice, however, multiprocessors support mixed-size accesses, and these are used by systems software and (to some degree) exposed at the C/C++ language level.  A semantic foundation for software, therefore, has to address them.


We investigate the mixed-size behaviour of ARMv8 and IBM POWER architectures and implementations: by experiment, by developing semantic models, by testing the correspondence between these, and by discussion with ARM and IBM staff.  This turns out to be surprisingly subtle, and on the way we have to revisit the fundamental concepts of coherence and sequential consistency, which change in this setting.  In particular, we show that adding a memory barrier between each instruction does not restore sequential consistency.  We go on to extend the C/C++11 model to support non-atomic mixed-size memory accesses.


This is a necessary step towards semantics for real-world shared-memory concurrent code, beyond litmus tests.
},
  pdf = {http://www.cl.cam.ac.uk/~pes20/popl17/mixed-size.pdf}
}

@article{Watson2015,
  author = {Robert N.M. Watson and Robert M. Norton and Jonathan Woodruff
  and Simon W. Moore and Peter G. Neumann and Jonathan Anderson and
  David Chisnall and Brooks Davis and Ben Laurie and Michael Roe and
  Nirav H. Dave and Khilan Gudka and Alexandre Joannou and
  A. Theodore Markettos and Ed Maste and Steven J. Murdoch and Colin Rothwell
  and Stacey D. Son and Munraj Vadera},
  title = {Fast Protection-Domain Crossing in the {CHERI} Capability-System
  Architecture},
  journal = {IEEE Micro},
  volume = {36},
  number = {5},
  pages = {38--49},
  month = sep,
  year = 2016,
  doi = {10.1109/MM.2016.84},
  pdf = {http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=7723791},
  abstract = {Capability Hardware Enhanced RISC Instructions (CHERI) supplement
the conventional memory management unit (MMU) with instruction-set architecture
(ISA) extensions that implement a capability system model in the address space.
CHERI can also underpin a hardware-software object-capability model for
scalable application compartmentalization that can mitigate broader classes of
attack. This article describes ISA additions to CHERI that support fast
protection-domain switching, not only in terms of low cycle count, but also
efficient memory sharing with mutual distrust. The authors propose ISA support
for sealed capabilities, hardware-assisted checking during protection-domain
switching, a lightweight capability flow-control model, and fast register
clearing, while retaining the flexibility of a software-defined
protection-domain transition model. They validate this approach through a
full-system experimental design, including ISA extensions, a field-programmable
gate array prototype (implemented in Bluespec SystemVerilog), and a software
stack including an OS (based on FreeBSD), compiler (based on LLVM), software
compartmentalization model, and open-source applications.}
}

@inproceedings{rems-fmcad16,
  title = {Extracting Behaviour from an Executable Instruction Set Model},
  author = {Brian Campbell and Ian Stark},
  booktitle = {\textbf{FMCAD 2016}: Formal Methods in Computer-Aided Design},
  conf = {FMCAD 2016},
  year = 2016,
  month = oct,
  editor = {Ruzica Piskac and Muralidhar Talupur},
  pages = {33--40},
  isbn = {978-0-9835678-6-8},
  pdf = {http://homepages.inf.ed.ac.uk/bcampbe2/rems/fmcad16.pdf},
  note = {Full proceedings \url{http://www.cs.utexas.edu/users/hunt/FMCAD/FMCAD16/proceedings/fmcad-2016-proceedings.pdf}},
  abstract = {Presenting large formal instruction set models as executable functions makes them accessible to engineers and useful for less formal purposes such as simulation.  However, it is more difficult to extract information about the behaviour of individual instructions for reasoning.  We present a method which combines symbolic evaluation and symbolic execution techniques to provide a rule-based view of instruction behaviour, with particular application to automatic test generation for large MIPS-like models.}
}

@techreport{UCAM-CL-TR-907,
  author = {Watson, Robert N. M. and Neumann, Peter G. and Woodruff,
          	 Jonathan and Roe, Michael and Anderson, Jonathan and
          	 Baldwin, John and Chisnall, David and Davis, Brooks and
          	 Joannou, Alexandre and Laurie, Ben and Moore, Simon W. and
          	 Murdoch, Steven J. and Norton, Robert and Son, Stacey and
          	 Xia, Hongyan},
  title = {{Capability Hardware Enhanced RISC Instructions: CHERI
         	  Instruction-Set Architecture (Version 6)}},
  year = 2017,
  month = apr,
  pdf = {http://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-907.pdf},
  institution = {University of Cambridge, Computer Laboratory},
  number = {UCAM-CL-TR-907},
  abstract = {This technical report describes CHERI ISAv6, the sixth version of the Capability Hardware Enhanced RISC Instructions (CHERI) Instruction-Set Architecture (ISA) being developed by SRI International and the University of Cambridge. This design captures seven years of research, development, experimentation, refinement, formal analysis, and validation through hardware and software implementation. CHERI ISAv6 is a substantial enhancement to prior ISA versions: it introduces support for kernel-mode compartmentalization, jump-based rather than exception-based domain transition, architecture-abstracted and efficient tag restoration, and more efficient generated code. A new chapter addresses potential applications of the CHERI model to the RISC-V and x86-64 ISAs, previously described relative only to the 64-bit MIPS ISA. CHERI ISAv6 better explains our design rationale and research methodology.

CHERI is a hybrid capability-system architecture that adds new capability-system primitives to a commodity 64-bit RISC ISA enabling software to efficiently implement fine-grained memory protection and scalable software compartmentalization. Design goals have included incremental adoptability within current ISAs and software stacks, low performance overhead for memory protection, significant performance improvements for software compartmentalization, formal grounding, and programmer-friendly underpinnings. Throughout, we have focused on providing strong and efficient architectural foundations for the principles of least privilege and intentional use in the execution of software at multiple levels of abstraction, preventing and mitigating vulnerabilities.

The CHERI system architecture purposefully addresses known performance and robustness gaps in commodity ISAs that hinder the adoption of more secure programming models centered around the principle of least privilege. To this end, CHERI blends traditional paged virtual memory with an in-address-space capability model that includes capability registers, capability instructions, and tagged memory. CHERI builds on C-language fat-pointer literature: its capabilities describe fine-grained regions of memory and can be substituted for data or code pointers in generated code, protecting data and also improving control-flow robustness. Strong capability integrity and monotonicity properties allow the CHERI model to express a variety of protection properties, from enforcing valid C-language pointer provenance and bounds checking to implementing the isolation and controlled communication structures required for software compartmentalization.

CHERI’s hybrid capability-system approach, inspired by the Capsicum security model, allows incremental adoption of capability-oriented design: software implementations that are more robust and resilient can be deployed where they are most needed, while leaving less critical software largely unmodified, but nevertheless suitably constrained to be incapable of having adverse effects. Potential deployment scenarios include low-level software Trusted Computing Bases (TCBs) such as separation kernels, hypervisors, and operating-system kernels, as well as userspace TCBs such as language runtimes and web browsers. Likewise, we see early-use scenarios (such as data compression, protocol parsing, and image processing) that relate to particularly high-risk software libraries, which are concentrations of both complex and historically vulnerability-prone code exposed to untrustworthy data sources, while leaving containing applications unchanged.}
}

@inproceedings{Joannou2017Tags,
author = {Alexandre Joannou and Jonathan Woodruff and Robert Kovacsics and Simon W. Moore and Alex Bradbury and Hongyan Xia and Robert N. M. Watson and David Chisnall and Michael Roe and Brooks Davis and Edward Napierala and John Baldwin and Khilan Gudka and Peter G. Neumann and Alfredo Mazzinghi and Alex Richardson and Stacey Son and A. Theodore Markettos},
title = {Efficient Tagged Memory},
booktitle = {\textbf{ICCD 2017}: IEEE 35th International Conference on Computer Design},
conf = {ICCD 2017},
year = {2017},
month = nov,
url = {},
doi = {},
pdf = {http://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/201711-iccd2017-efficient-tags.pdf},
abstract = {We characterize the cache behavior of an in-memory tag table and demonstrate that an optimized implementation can typically achieve a near-zero memory traffic overhead. Both industry and academia have repeatedly demonstrated tagged memory as a key mechanism to enable enforcement of power- ful security invariants, including capabilities, pointer integrity, watchpoints, and information-flow tracking. A single-bit tag shadowspace is the most commonly proposed requirement, as one bit is the minimum metadata needed to distinguish between an untyped data word and any number of new hardware- enforced types. We survey various tag shadowspace approaches and identify their common requirements and positive features of their implementations. To avoid non-standard memory widths, we identify the most practical implementation for tag storage to be an in-memory table managed next to the DRAM controller. We characterize the caching performance of such a tag table and demonstrate a DRAM traffic overhead below 5\% for the vast majority of applications. We identify spatial locality on a page scale as the primary factor that enables surprisingly high table cache-ability. We then demonstrate tag-table compression for a set of common applications. A hierarchical structure with elegantly simple optimizations reduces DRAM traffic overhead to below 1% for most applications. These insights and optimizations pave the way for commercial applications making use of single-bit tags stored in commodity memory.}
}

@inproceedings{armv8-mca,
  author = {Christopher Pulte and Shaked Flur and Will Deacon and Jon French and Susmit Sarkar and Peter Sewell},
  title = {{Simplifying ARM Concurrency: Multicopy-atomic Axiomatic and Operational Models for ARMv8}},
  booktitle = {\textbf{POPL 2018}},
  conf = {POPL 2018},
  optcrossref = {},
  optkey = {},
  optbooktitle = {},
  optpages = {},
  year = {2018},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optaddress = {},
  month = jan,
  issn = {2475-1421},
  pages = {19:1--19:29},
  articleno = {19},
  numpages = {29},
  url = {http://doi.acm.org/10.1145/3158107},
  doi = {10.1145/3158107},
  optorganization = {},
  optpublisher = {},
  optnote = {Draft},
  optannote = {},
  pdf = {http://www.cl.cam.ac.uk/~pes20/armv8-mca/armv8-mca-draft.pdf},
  project = {http://www.cl.cam.ac.uk/~pes20/armv8-mca/},
  abstract = {ARM has a relaxed memory model, previously specified in informal prose for ARMv7 and ARMv8.  Over time, and partly due to work building formal semantics for ARM concurrency, it has become clear that some of the complexity of the model is not justified by the potential benefits.  In particular, the model was originally \emph{non-multicopy-atomic}: writes could become visible to some other threads before becoming visible to all --- but this has not been exploited in production implementations, the corresponding potential hardware optimisations are thought to have insufficient benefits in the ARM context, and it gives rise to subtle complications when combined with other ARMv8 features.  The ARMv8 architecture has therefore been revised: it now has a multicopy-atomic model.  It has also been simplified in other respects, including more straightforward notions of dependency, and the architecture now includes a formal concurrency model.

In this paper we detail these changes and discuss their motivation. We define two formal concurrency models: an operational one, simplifying the Flowing model of Flur et al., and the axiomatic model of the revised ARMv8 specification. The models were developed by an academic group and by ARM staff, respectively, and this extended collaboration partly motivated the above changes. We prove the equivalence of the two models. The operational model is integrated into an executable exploration tool with new web interface, demonstrated by exhaustively checking the possible behaviours of a loop-unrolled version of a Linux kernel lock implementation, a previously known bug due to unprevented speculation, and a fixed version.
},
  topic = {Power_and_ARM},
  recent = {true}
}

@inproceedings{sail-popl2019,
  author = {Alasdair Armstrong and Thomas Bauereiss and Brian Campbell and Alastair Reid and Kathryn E. Gray and Robert M. Norton and Prashanth Mundkur and Mark Wassell and Jon French and Christopher Pulte and Shaked Flur and Ian Stark and Neel Krishnaswami and Peter Sewell},
  title = {{ISA} Semantics for {ARMv8-A, RISC-V, and CHERI-MIPS}},
  optcrossref = {},
  optkey = {},
  conf = {POPL 2019},
  booktitle = {\textbf{POPL 2019}: Proc. 46th ACM SIGPLAN Symposium on Principles of Programming Languages},
  optbooktitle = {},
  year = {2019},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optpages = {},
  month = jan,
  optaddress = {},
  optorganization = {},
  optpublisher = {},
  note = {Proc. ACM Program. Lang. 3, POPL, Article 71},
  optnote = {},
  optannote = {},
  doi = {10.1145/3290384},
  abstract = {Architecture specifications notionally define the fundamental interface between hardware and software: the envelope of allowed behaviour for processor implementations, and the basic assumptions for software development and verification.  But in practice, they are typically prose and pseudocode documents, not rigorous or executable artifacts, leaving software and verification on shaky ground.

In this paper, we present rigorous semantic models for the sequential behaviour of large parts of the mainstream ARMv8-A, RISC-V, and MIPS architectures, and the research CHERI-MIPS architecture, that are complete enough to boot operating systems, variously Linux, FreeBSD, or seL4.  Our ARMv8-A models are automatically translated from authoritative ARM-internal definitions, and (in one variant) tested against the ARM Architecture Validation Suite.

We do this using a custom language for ISA semantics, Sail, with a lightweight dependent type system, that supports automatic generation of emulator code in C and OCaml, and automatic generation of proof-assistant definitions for Isabelle, HOL4, and (currently only for MIPS) Coq.  We use the former for validation, and to assess specification coverage.  To demonstrate the usability of the latter, we prove (in Isabelle) correctness of a purely functional characterisation of ARMv8-A address translation.  We moreover integrate the RISC-V model into the RMEM tool for (user-mode) relaxed-memory concurrency exploration.  We prove (on paper) the soundness of the core Sail type system.

We thereby take a big step towards making the architectural abstraction actually well-defined, establishing foundations for verification and reasoning.
},
  pdf = {http://www.cl.cam.ac.uk/users/pes20/sail/sail-popl2019.pdf},
  topic = {ISA_semantics},
  project = {http://www.cl.cam.ac.uk/~pes20/sail/},
  recent = {true}
}

@book{riscv-20181221-Public-Review-draft,
  optkey = {},
  editor = {Andrew Waterman and Krste Asanovi{\'{c}}},
  optauthor = {},
  title = {The {RISC-V} Instruction Set Manual  {Volume I: Unprivileged ISA}},
  howpublished = {\url{https://github.com/riscv/riscv-isa-manual/releases/download/draft-20181227-c6741cb/riscv-spec.pdf}},
  month = dec,
  year = 2018,
  note = {Document Version {20181221-Public-Review-draft. Contributors: Arvind, Krste Asanovi\'{c}, Rimas Avi\v{z}ienis, Jacob Bachmeyer, Christopher
F. Batten, Allen J. Baum, Alex Bradbury, Scott Beamer, Preston Briggs,
Christopher Celio, Chuanhua Chang, David Chisnall, Paul Clayton, Palmer
Dabbelt, Roger Espasa, Shaked Flur, Stefan Freudenberger, Jan Gray, Michael
Hamburg, John Hauser, David Horner, Bruce Hoult, Alexandre Joannou,
Olof Johansson, Ben Keller, Yunsup Lee,
Paul Loewenstein, Daniel Lustig, Yatin Manerkar, Luc Maranget, Margaret
Martonosi, Joseph Myers, Vijayanand Nagarajan, Rishiyur Nikhil, Jonas
Oberhauser, Stefan O'Rear, Albert Ou, John Ousterhout, David Patterson,
Christopher Pulte, Jose Renau, Colin Schmidt, Peter Sewell, Susmit Sarkar,
Michael Taylor, Wesley Terpstra, Matt Thomas, Tommy Thorn, Caroline Trippel,
Ray VanDeWalker, Muralidaran Vijayaraghavan, Megan Wachs, Andrew Waterman,
Robert Watson, Derek Williams, Andrew Wright, Reinoud Zandijk, and Sizhuo Zhang}},
  pdf = {https://github.com/riscv/riscv-isa-manual/releases/download/draft-20181227-c6741cb/riscv-spec.pdf},
  optannote = {}
}

@inproceedings{promisingARM,
  author = {Christopher Pulte and
               Jean Pichon-Pharabod and 
               Jeehoon Kang and 
               Sung-Hwan Lee and 
               Chung-Kil Hur},
  abstract = {For ARMv8 and RISC-V, there are concurrency models in two styles,
               extensionally equivalent: axiomatic models, expressing the concurrency
               semantics in terms of global properties of complete executions; and
               operational models, that compute incrementally. The latter are in an
               abstract microarchitectural style: they execute each instruction in
               multiple steps, out-of-order and with explicit branch speculation.
               This similarity to hardware implementations has been important in
               developing the models and in establishing confidence, but involves
               complexity that, for programming and model-checking, one would prefer
               to avoid.

               We present new more abstract operational models for ARMv8 and RISC-V,
               and an exploration tool based on them. The models compute the allowed
               concurrency behaviours incrementally based on thread-local conditions
               and are significantly simpler than the existing operational models:
               executing instructions in a single step and (with the exception of
               early writes) in program order, and without branch speculation.  We
               prove the models equivalent to the existing ARMv8 and RISC-V axiomatic
               models in Coq. The exploration tool is the first such tool for ARMv8
               and RISC-V fast enough for exhaustively checking the concurrency
               behaviour of a number of interesting examples. We demonstrate using
               the tool for checking several standard concurrent datastructure and
               lock implementations, and for interactively stepping through
               model-allowed executions for debugging.},
  url = {https://www.cl.cam.ac.uk/~cp526/promising_camera_ready.pdf},
  title = {Promising-{ARM/RISC-V}: A Simpler and Faster Operational Concurrency Model},
  conf = {PLDI 2019},
  booktitle = {\textbf{PLDI 2019:} Proceedings of the 40th ACM SIGPLAN Conference on
               Programming Language Design and Implementation},
  year = {2019},
  month = jun,
  doi = {10.1145/3314221.3314624}
}

@techreport{UCAM-CL-TR-927,
  author = {Watson, Robert N. M. and Neumann, Peter G. and Woodruff,
          	  Jonathan and Roe, Michael and Almatary, Hesham and
          	  Anderson, Jonathan and Baldwin, John and Chisnall, David
          	  and Davis, Brooks and Filardo, Nathaniel Wesley and
          	  Joannou, Alexandre and Laurie, Ben and Markettos, A.
          	  Theodore and Moore, Simon W. and Murdoch, Steven J. and
          	  Nienhuis, Kyndylan and Norton, Robert and Richardson, Alex
          	  and Rugg, Peter and Sewell, Peter and Son, Stacey and Xia,
          	  Hongyan},
  title = {{Capability Hardware Enhanced RISC Instructions: CHERI
         	   Instruction-Set Architecture (Version 7)}},
  year = 2019,
  month = jun,
  url = {https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-927.pdf},
  institution = {University of Cambridge, Computer Laboratory},
  number = {UCAM-CL-TR-927},
  abstract = {This technical report describes CHERI ISAv7, the seventh version of the
Capability Hardware Enhanced RISC Instructions (CHERI) Instruction-Set
Architecture (ISA)
being developed by SRI International and the University of Cambridge.
This design captures nine years of research, development, experimentation,
refinement, formal analysis, and validation through hardware and software
implementation.
CHERI ISAv7 is a substantial enhancement to prior ISA versions.
We differentiate an architecture-neutral protection model vs.
architecture-specific instantiations in 64-bit MIPS, 64-bit RISC-V, and
x86-64.
We have defined a new CHERI Concentrate compression model.
CHERI-RISC-V is more substantially elaborated.
A new compartment-ID register assists in resisting microarchitectural
side-channel attacks.
Experimental features include linear capabilities, capability coloring,
temporal memory safety, and 64-bit capabilities for 32-bit architectures.

CHERI is a {\em hybrid capability-system architecture} that adds new
capability-system primitives to commodity 64-bit RISC ISAs, enabling software
to efficiently implement {\em fine-grained memory protection} and {\em
scalable software compartmentalization}.
Design goals include incremental adoptability within current ISAs and
software stacks, low performance overhead for memory protection, significant
performance improvements for software compartmentalization, formal grounding,
and programmer-friendly underpinnings.
We have focused on providing strong, non-probabilistic, efficient
architectural foundations for the principles of \textit{least privilege} and
\textit{intentional use} in the execution of software at multiple levels of
abstraction, preventing and mitigating vulnerabilities.

The CHERI system architecture purposefully addresses known performance and
robustness gaps in commodity ISAs that hinder the adoption of more secure
programming models centered around the principle of least privilege.
To this end, CHERI blends traditional paged virtual memory with an
in-address-space capability model that includes capability registers,
capability instructions, and tagged memory.
CHERI builds on the C-language fat-pointer literature: its capabilities can
describe fine-grained regions of memory, and can be substituted for data or
code pointers in generated code, protecting data and also improving
control-flow robustness.
Strong capability integrity and monotonicity properties allow the CHERI model
to express a variety of protection properties, from enforcing valid C-language
pointer provenance and bounds checking to implementing the isolation and
controlled communication structures required for software
compartmentalization.

CHERI's hybrid capability-system approach, inspired by the Capsicum security
model, allows incremental adoption of capability-oriented design: software
implementations that are more robust and resilient can be deployed where they
are most needed, while leaving less critical software largely unmodified, but
nevertheless suitably constrained to be incapable of having adverse effects.
Potential deployment scenarios include low-level software Trusted Computing
Bases (TCBs) such as separation kernels, hypervisors, and operating-system
kernels, as well as userspace TCBs such as language runtimes and web browsers.
We also see potential early-use scenarios around particularly high-risk
software libraries (such as data compression, protocol parsing, and image
processing), which are concentrations of both complex and historically
vulnerability-prone code exposed to untrustworthy data sources, while leaving
containing applications unchanged.}
}

@techreport{cheri-formal-tr,
  optkey = {},
  author = {
  Kyndylan Nienhuis
  and Alexandre Joannou
  and Anthony Fox
  and Michael Roe
  and Thomas Bauereiss
  and Brian Campbell
  and Matthew Naylor
  and Robert M. Norton
  and Simon W. Moore
  and Peter G. Neumann
  and Ian Stark
  and Robert N. M. Watson
  and Peter Sewell
  },
  title = {Rigorous engineering for hardware security: Formal modelling and proof in the {CHERI} design and implementation process},
  institution = {University of Cambridge, Computer Laboratory},
  number = {UCAM-CL-TR-940},
  opthowpublished = {Draft},
  month = sep,
  year = {2019},
  optnote = {},
  optannote = {},
  abstract = {The root causes of many security vulnerabilities include a pernicious combination of two problems, often regarded as inescapable aspects of computing.  First, the protection mechanisms provided by the mainstream processor architecture and C/C++ language abstractions, dating back to the 1970s and before, provide only coarse-grain virtual-memory-based protection.  Second, mainstream system engineering relies almost exclusively on test-and-debug methods, with (at best) prose specifications.  These methods have historically sufficed commercially for much of the computer industry, but they fail to prevent large numbers of exploitable bugs, and the security problems that this causes are becoming ever more acute.

In this paper we show how more rigorous engineering methods can be applied to the development of a new security-enhanced processor architecture, with its accompanying hardware implementation and software stack.  We use formal models of the complete instruction-set architecture (ISA) at the heart of the design and engineering process, both in lightweight ways that support and improve normal engineering practice -- as documentation, in emulators used as a test oracle for hardware and for running software, and for test generation -- and for formal verification.  We formalise key intended security properties of the design, and establish that these hold with mechanised proof.  This is for the same complete ISA models (complete enough to boot operating systems), without idealisation.

We do this for CHERI, an architecture with \emph{hardware capabilities} that supports fine-grained memory protection and scalable secure compartmentalisation, while offering a smooth adoption path for existing software.  CHERI is a maturing research architecture, developed since 2010, with work now underway to explore its possible adoption in mass-market commercial processors.  The rigorous engineering work described here has been an integral part of its development to date, enabling more rapid and confident experimentation, and boosting confidence in the design.
},
  optproject = {https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/},
  opttopic = {cheri},
  cheriformal = {true},
  recent = {true},
  pdf = {https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-940.pdf}
}

@techreport{UCAM-CL-TR-932,
  author = {Davis, Brooks and Watson, Robert N. M. and Richardson,
          	  Alexander and Neumann, Peter G. and Moore, Simon W. and
          	  Baldwin, John and Chisnall, David and Clarke, James and
          	  Filardo, Nathaniel Wesley and Gudka, Khilan and Joannou,
          	  Alexandre and Laurie, Ben and Markettos, A. Theodore and
          	  Maste, J. Edward and Mazzinghi, Alfredo and Napierala,
          	  Edward Tomasz and Norton, Robert M. and Roe, Michael and
          	  Sewell, Peter and Son, Stacey and Woodruff, Jonathan},
  title = {{CheriABI: Enforcing valid pointer provenance and
         	   minimizing pointer privilege in the POSIX C run-time
         	   environment}},
  year = 2019,
  month = jan,
  pdf = {https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-932.pdf},
  institution = {University of Cambridge, Computer Laboratory},
  number = {UCAM-CL-TR-932},
  abstract = {The CHERI architecture allows pointers to be implemented as capabilities (rather than integer virtual addresses) in a manner that is compatible with, and strengthens, the semantics of the C language. In addition to the spatial protections offered by conventional fat pointers, CHERI capabilities offer strong integrity, enforced provenance validity, and access monotonicity.

The stronger guarantees of these architectural capabilities must be reconciled with the real-world behavior of operating systems, run-time environments, and applications. When the process model, user-kernel interactions, dynamic linking, and memory management are all considered, we observe that simple derivation of architectural capabilities is insufficient to describe appropriate access to memory. We bridge this conceptual gap with a notional abstract capability that describes the accesses that should be allowed at a given point in execution, whether in the kernel or userspace.

To investigate this notion at scale, we describe the first adaptation of a full C-language operating system (FreeBSD) with an enterprise database (PostgreSQL) for complete spatial and referential memory safety. We show that awareness of abstract capabilities, coupled with CHERI architectural capabilities, can provide more complete protection, strong compatibility, and acceptable performance overhead compared with the pre-CHERI baseline and software-only approaches. Our observations also have potentially significant implications for other mitigation techniques.},
  project = {https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/},
  topic = {cheri},
  cheriformal = {true},
  recent = {false}
}

@techreport{UCAM-CL-TR-941,
  author = {Watson, Robert N. M. and  Moore, Simon W. and
          	  Sewell, Peter and Neumann, Peter},
  title = {An Introduction to {CHERI}},
  year = 2019,
  month = sep,
  pdf = {https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-941.pdf},
  institution = {University of Cambridge, Computer Laboratory},
  number = {UCAM-CL-TR-941},
  abstract = {},
  project = {https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/},
  topic = {cheri},
  cheriformal = {true},
  recent = {false}
}

@article{8703061,
  author = {J. {Woodruff} and A. {Joannou} and H. {Xia} and A. {Fox} and R. M. {Norton} and D. {Chisnall} and B. {Davis} and K. {Gudka} and N. W. {Filardo} and A. T. {Markettos} and M. {Roe} and P. G. {Neumann} and R. N. M. {Watson} and S. W. {Moore}},
  journal = {IEEE Transactions on Computers},
  title = {CHERI Concentrate: Practical Compressed Capabilities},
  year = {2019},
  month = oct,
  volume = {68},
  number = {10},
  pages = {1455-1469},
  abstract = {We present CHERI Concentrate, a new fat-pointer compression scheme applied to CHERI, the most developed capability-pointer system at present. Capability fat pointers are a primary candidate to enforce fine-grained and non-bypassable security properties in future computer systems, although increased pointer size can severely affect performance. Thus, several proposals for capability compression have been suggested elsewhere that do not support legacy instruction sets, ignore features critical to the existing software base, and also introduce design inefficiencies to RISC-style processor pipelines. CHERI Concentrate improves on the state-of-the-art region-encoding efficiency, solves important pipeline problems, and eases semantic restrictions of compressed encoding, allowing it to protect a full legacy software stack. We present the first quantitative analysis of compiled capability code, which we use to guide the design of the encoding format. We analyze and extend logic from the open-source CHERI prototype processor design on FPGA to demonstrate encoding efficiency, minimize delay of pointer arithmetic, and eliminate additional load-to-use delay. To verify correctness of our proposed high-performance logic, we present a HOL4 machine-checked proof of the decode and pointer-modify operations. Finally, we measure a 50 to 75 percent reduction in L2 misses for many compiled C-language benchmarks running under a commodity operating system using compressed 128-bit and 64-bit formats, demonstrating both compatibility with and increased performance over the uncompressed, 256-bit format.},
  keywords = {data compression;decoding;encoding;field programmable gate arrays;microprocessor chips;pipeline processing;program compilers;reduced instruction set computing;storage management;theorem proving;tree data structures;practical compressed capabilities;fat-pointer compression scheme;developed capability-pointer system;capability fat pointers;nonbypassable security properties;future computer systems;increased pointer size;capability compression;legacy instruction sets;existing software base;design inefficiencies;RISC-style processor pipelines;state-of-the-art region-encoding efficiency;compressed encoding;legacy software stack;compiled capability code;encoding format;open-source CHERI prototype processor design;pointer arithmetic;decode;pointer-modify operations;commodity operating system;CHERI concentrate;pipeline problems;efficiency 75.0 percent;Encoding;Pipelines;Delays;Safety;Software;Semantics;Registers;Capabilities;fat pointers;compression;memory safety;computer architecture},
  doi = {10.1109/TC.2019.2914037},
  pdf = {https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/2019tc-cheri-concentrate.pdf},
  issn = {},
  month = {Oct}
}

@inproceedings{cheri-formal-SP2020,
  author = {Kyndylan Nienhuis and Alexandre Joannou and Thomas Bauereiss and Anthony Fox and Michael Roe and Brian Campbell and Matthew Naylor and Robert M. Norton and Moore, Simon W.  and Neumann, Peter G.  and Ian Stark and Watson, Robert N. M.  and Peter Sewell},
  title = {Rigorous engineering for hardware security: Formal modelling and proof in the {CHERI} design and implementation process},
  optcrossref = {},
  optkey = {},
  conf = {Security and Privacy 2020},
  booktitle = {\textbf{Security and Privacy 2020}: Proceedings of the 41st IEEE Symposium on Security and Privacy (SP)},
  year = {2020},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optpages = {},
  month = may,
  optaddress = {},
  optorganization = {},
  optpublisher = {},
  optnote = {},
  optannote = {},
  abstract = {The root causes of many security vulnerabilities include a pernicious combination of two problems, often regarded as inescapable aspects of computing.  First, the protection mechanisms provided by the mainstream processor architecture and C/C++ language abstractions, dating back to the 1970s and before, provide only coarse-grain virtual-memory-based protection.  Second, mainstream system engineering relies almost exclusively on test-and-debug methods, with (at best) prose specifications.  These methods have historically sufficed commercially for much of the computer industry, but they fail to prevent large numbers of exploitable bugs, and the security problems that this causes are becoming ever more acute.

In this paper we show how more rigorous engineering methods can be applied to the development of a new security-enhanced processor architecture, with its accompanying hardware implementation and software stack.  We use formal models of the complete instruction-set architecture (ISA) at the heart of the design and engineering process, both in lightweight ways that support and improve normal engineering practice -- as documentation, in emulators used as a test oracle for hardware and for running software, and for test generation -- and for formal verification.  We formalise key intended security properties of the design, and establish that these hold with mechanised proof.  This is for the same complete ISA models (complete enough to boot operating systems), without idealisation.

We do this for CHERI, an architecture with \emph{hardware capabilities} that supports fine-grained memory protection and scalable secure compartmentalisation, while offering a smooth adoption path for existing software.  CHERI is a maturing research architecture, developed since 2010, with work now underway on an Arm industrial prototype to explore its possible adoption in mass-market commercial processors.  The rigorous engineering work described here has been an integral part of its development to date, enabling more rapid and confident experimentation, and boosting confidence in the design.
},
  pdf = {https://www.cl.cam.ac.uk/users/pes20/cheri-formal.pdf},
  project = {https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/},
  topic = {cheri},
  cheriformal = {true},
  recent = {true}
}

@inproceedings{iflat-esop2020-extended,
  author = {Ben Simner and Shaked Flur and Christopher Pulte and Alasdair Armstrong and Jean Pichon-Pharabod and Luc Maranget and Peter Sewell},
  title = {{ARMv8-A} system semantics: instruction fetch in relaxed architectures (extended version)},
  optcrossref = {},
  optkey = {},
  conf = {ESOP 2020},
  booktitle = {\textbf{ESOP 2020:} Proceedings of the 29th European Symposium on Programming},
  year = {2020},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optpages = {},
  month = apr,
  optaddress = {},
  optorganization = {},
  optpublisher = {},
  note = {This is an extended version of the ESOP 2020 paper, with appendices giving additional details},
  optannote = {},
  abstract = {Computing relies on \emph{architecture specifications} to decouple hardware and software development.  Historically these have been prose documents, with all the problems that entails, but research over the last ten years has developed rigorous and executable-as-test-oracle specifications of mainstream architecture instruction sets and ``user-mode'' concurrency, clarifying architectures and bringing them into the scope of programming-language semantics and verification.  However, the \emph{system semantics}, of instruction-fetch and cache maintenance, exceptions and interrupts, and address translation, remains obscure, leaving us without a solid foundation for verification of security-critical systems software.

In this paper we establish a robust model for one aspect of system semantics: instruction fetch and cache maintenance for ARMv8-A.  Systems code relies on executing instructions that were written by data writes, e.g.~in program loading, dynamic linking, JIT compilation, debugging, and OS configuration, but hardware implementations are often highly optimised, e.g.~with instruction caches, linefill buffers, out-of-order fetching, branch prediction, and instruction prefetching, which can affect programmer-observable behaviour.  It is essential, both for programming and verification, to abstract from such microarchitectural details as much as possible, but no more.  We explore the key architecture design questions with a series of examples, discussed in detail with senior Arm staff; capture the architectural intent in operational and axiomatic semantic models, extending previous work on ``user-mode'' concurrency; make these models executable as test oracles for small examples; and experimentally validate them against hardware behaviour (finding a bug in one hardware device).  We thereby bring these subtle issues into the mathematical domain, clarifying the architecture and enabling future work on system software verification.
},
  topic = {Power_and_ARM},
  recent = {true},
  project = {http://www.cl.cam.ac.uk/~pes20/iflat},
  pdf = {http://www.cl.cam.ac.uk/~pes20/iflat/top-extended.pdf}
}

This file was generated by bibtex2html 1.99.