cheriformal.bib

@comment{{This file has been generated by bib2bib 1.99}}

@comment{{Command line: bibtex2html-1.99-with-magiclink/bib2bib -c cheriformal:"true" -ob cheriformal.bib sewellbib2.bib cheriformal-extra.bib}}

@inproceedings{Cerberus-PLDI16,
  author = {
Kayvan Memarian and 
Justus Matthiesen and 
James Lingard and
Kyndylan Nienhuis and
David Chisnall and
Robert N.M. Watson and
Peter Sewell
},
  title = {Into the depths of {C}: elaborating the de facto standards},
  abstract = {
C remains central to our computing infrastructure.  It is notionally defined by ISO standards, but in reality the properties of C assumed by systems code and those implemented by compilers have diverged, both from the ISO standards and from each other, and none of these are clearly understood.

We make two contributions to help improve this error-prone situation.  First, we describe an in-depth analysis of the design space for the semantics of pointers and memory in C as it is used in practice.  We articulate many specific questions, build a suite of semantic test cases, gather experimental data from multiple implementations, and survey what C experts believe about the de facto standards.  We identify questions where there is a consensus (either following ISO or differing) and where there are conflicts. We apply all this to an experimental C implemented above capability hardware.  Second, we describe a formal model, Cerberus, for large parts of C.  Cerberus is parameterised on its memory model; it is linkable either with a candidate de facto memory object model, under construction, or with an operational C11 concurrency model; it is defined by elaboration to a much simpler Core language for accessibility, and it is executable as a test oracle on small examples.

This should provide a solid basis for discussion of what mainstream C is now: what programmers and analysis tools can assume and what compilers aim to implement. Ultimately we hope it will be a step towards clear, consistent, and accepted semantics for the various use-cases of C.
},
  optcrossref = {},
  optkey = {},
  conf = {PLDI 2016},
  booktitle = {Proceedings of the 37th ACM SIGPLAN conference on Programming Language Design and Implementation},
  optpages = {},
  year = {2016},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optaddress = {},
  month = jun,
  optorganization = {},
  optpublisher = {},
  note = {PLDI 2016 Distinguished Paper award},
  project = {http://www.cl.cam.ac.uk/~pes20/cerberus},
  url = {http://doi.acm.org/10.1145/2908080.2908081},
  doi = {10.1145/2908080.2908081},
  pdf = {http://www.cl.cam.ac.uk/users/pes20/cerberus/pldi16.pdf},
  optannote = {},
  topic = {Cerberus},
  topictwo = {cheri},
  topicthree = {WG14},
  cheriformal = {true},
  recent = {false}
}

@inproceedings{cerberus-popl2019,
  author = {Kayvan Memarian and Victor B. F. Gomes and Brooks Davis and Stephen Kell and Alexander Richardson and Robert N. M. Watson and Peter Sewell},
  title = {Exploring {C} Semantics and Pointer Provenance},
  optcrossref = {},
  optkey = {},
  conf = {POPL 2019},
  booktitle = {Proceedings of the 46th ACM SIGPLAN Symposium on Principles of Programming Languages},
  optbooktitle = {},
  year = {2019},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optpages = {},
  month = jan,
  optaddress = {},
  optorganization = {},
  optpublisher = {},
  note = {Proc. ACM Program. Lang. 3, POPL, Article 67. Also available as ISO/IEC JTC1/SC22/WG14 N2311 },
  optannote = {},
  doi = {10.1145/3290380},
  apollourl = {https://www.repository.cam.ac.uk/handle/1810/288833},
  pdf = {http://www.cl.cam.ac.uk/users/pes20/cerberus/cerberus-popl2019.pdf},
  supplementarymaterial = {http://www.cl.cam.ac.uk/users/pes20/cerberus/supplementary-material-popl2019},
  topic = {Cerberus},
  topictwo = {cheri},
  topicthree = {WG14},
  cheriformal = {true},
  project = {http://www.cl.cam.ac.uk/~pes20/cerberus},
  abstract = {The semantics of pointers and memory objects in C has been a vexed question for many years.  C values cannot be treated as either purely abstract or purely concrete entities: the language exposes their representations, but compiler optimisations rely on analyses that reason about provenance and initialisation status, not just runtime representations. The ISO WG14 standard leaves much of this unclear, and in some respects differs with de facto standard usage --- which itself is difficult to investigate.

In this paper we explore the possible source-language semantics for memory objects and pointers, in ISO C and in C as it is used and implemented in practice, focussing especially on pointer provenance.  We aim to, as far as possible, reconcile the ISO C standard, mainstream compiler behaviour, and the semantics relied on by the corpus of existing C code.  We present two coherent proposals, tracking provenance via integers and not; both address many design questions. We highlight some pros and cons and open questions, and illustrate the discussion with a library of test cases.  We make our semantics executable as a test oracle, integrating it with the Cerberus semantics for much of the rest of C, which we have made substantially more complete and robust, and equipped with a web-interface GUI.  This allows us to experimentally assess our proposals on those test cases.  To assess their viability with respect to larger bodies of C code, we analyse the changes required and the resulting behaviour for a port of FreeBSD to CHERI, a research architecture supporting hardware capabilities, which (roughly speaking) traps on the memory safety violations which our proposals deem undefined behaviour. We also develop a new runtime instrumentation tool to detect possible provenance violations in normal C code, and apply it to some of the SPEC benchmarks.  We compare our proposal with a source-language variant of the twin-allocation LLVM semantics proposal of Lee et al.  Finally, we describe ongoing interactions with WG14, exploring how our proposals could be incorporated into the ISO standard.
},
  elver = {true},
  optrecent = {true}
}

@inproceedings{sail-popl2019,
  author = {Alasdair Armstrong and Thomas Bauereiss and Brian Campbell and Alastair Reid and Kathryn E. Gray and Robert M. Norton and Prashanth Mundkur and Mark Wassell and Jon French and Christopher Pulte and Shaked Flur and Ian Stark and Neel Krishnaswami and Peter Sewell},
  title = {{ISA} Semantics for {ARMv8-A, RISC-V, and CHERI-MIPS}},
  optcrossref = {},
  optkey = {},
  conf = {POPL 2019},
  booktitle = {Proceedings of the 46th ACM SIGPLAN Symposium on Principles of Programming Languages},
  optbooktitle = {},
  year = {2019},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optpages = {},
  month = jan,
  optaddress = {},
  optorganization = {},
  optpublisher = {},
  note = {Proc. ACM Program. Lang. 3, POPL, Article 71},
  optnote = {},
  optannote = {},
  doi = {10.1145/3290384},
  abstract = {Architecture specifications notionally define the fundamental interface between hardware and software: the envelope of allowed behaviour for processor implementations, and the basic assumptions for software development and verification.  But in practice, they are typically prose and pseudocode documents, not rigorous or executable artifacts, leaving software and verification on shaky ground.

In this paper, we present rigorous semantic models for the sequential behaviour of large parts of the mainstream ARMv8-A, RISC-V, and MIPS architectures, and the research CHERI-MIPS architecture, that are complete enough to boot operating systems, variously Linux, FreeBSD, or seL4.  Our ARMv8-A models are automatically translated from authoritative ARM-internal definitions, and (in one variant) tested against the ARM Architecture Validation Suite.

We do this using a custom language for ISA semantics, Sail, with a lightweight dependent type system, that supports automatic generation of emulator code in C and OCaml, and automatic generation of proof-assistant definitions for Isabelle, HOL4, and (currently only for MIPS) Coq.  We use the former for validation, and to assess specification coverage.  To demonstrate the usability of the latter, we prove (in Isabelle) correctness of a purely functional characterisation of ARMv8-A address translation.  We moreover integrate the RISC-V model into the RMEM tool for (user-mode) relaxed-memory concurrency exploration.  We prove (on paper) the soundness of the core Sail type system.

We thereby take a big step towards making the architectural abstraction actually well-defined, establishing foundations for verification and reasoning.
},
  apollourl = {https://www.repository.cam.ac.uk/handle/1810/288588},
  pdf = {http://www.cl.cam.ac.uk/users/pes20/sail/sail-popl2019.pdf},
  supplementarymaterial = {http://www.cl.cam.ac.uk/users/pes20/sail/popl2019.html},
  topic = {ISA_semantics},
  topictwo = {cheri},
  cheriformal = {true},
  project = {http://www.cl.cam.ac.uk/~pes20/sail/},
  elver = {true},
  optrecent = {true}
}

@inproceedings{sail-arw18,
  author = {  Alasdair Armstrong and 
  Thomas Bauereiss and 
  Brian Campbell and 
  Shaked Flur and 
  Kathryn E. Gray and 
  Prashanth Mundkur and
  Robert M.~Norton and
  Christopher Pulte and
  Alastair Reid and 
  Peter Sewell and 
  Ian Stark and 
  Mark Wassell},
  title = {Detailed Models of Instruction Set Architectures: From
  Pseudocode to Formal Semantics},
  optcrossref = {},
  optkey = {},
  conf = {Automated Reasoning Workshop (ARW) 2018},
  booktitle = {Proc. Automated Reasoning Workshop},
  year = {2018},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  pages = {23--24},
  month = apr,
  optaddress = {},
  optorganization = {},
  optpublisher = {},
  note = {Two-page abstract},
  optnote = {Proceedings available at \url{https://www.cl.cam.ac.uk/events/arw2018/arw2018-proc.pdf}},
  optannote = {},
  project = {http://www.cl.cam.ac.uk/~pes20/sail/},
  pdf = {http://www.cl.cam.ac.uk/~pes20/sail/2018-04-12-arw-paper.pdf},
  abstract = {Processor instruction set architectures (ISAs) are typically specified using a mixture of prose and
pseudocode. We present ongoing work on expressing such specifications rigorously and automatically trans-
lating them to interactive theorem prover definitions, making them amenable to mechanised proof. Our ISA
descriptions are written in Sail—a custom ISA specification language designed to support idioms from var-
ious processor vendor’s pseudocode, with lightweight dependent typing for bitvectors, targeting a variety of
use cases including sequential and concurrent ISA semantics. From Sail we aim to portably generate usable
theorem prover definitions for multiple provers, including Isabelle, HOL4, and Coq. We are focusing on the
full ARMv8.3-A specification, CHERI-MIPS, and RISC-V, together with fragments of IBM POWER and x86.},
  topic = {ISA_semantics},
  topic2 = {cheri},
  cheriformal = {true}
}

@inproceedings{cheriABIsub,
  optkey = {},
  author = {Brooks Davis and
              Robert N. M. Watson and
              Alexander Richardson and
              Peter G. Neumann and
              Simon W. Moore and
              John Baldwin and
              David Chisnall and
              James Clarke and
              Nathaniel Wesley Filardo and 
Khilan Gudka and
              Alexandre Joannou and
              Ben Laurie and
              A. Theodore Markettos and
              J. Edward Maste and
              Alfredo Mazzinghi and
           Edward Tomasz Napierala and
              Robert M. Norton and
              Michael Roe and
              Peter Sewell and
              Stacey Son and
              Jonathan Woodruff},
  title = {{CheriABI}: Enforcing Valid Pointer Provenance and Minimizing
              Pointer Privilege in the {POSIX C} Run-time Environment},
  opthowpublished = {Submitted for publication},
  conf = {ASPLOS 2019},
  booktitle = {the 24th ACM International Conference on Architectural Support for Programming Languages and Operating Systems},
  month = apr,
  year = {2019},
  note = {Best paper award},
  url = {http://doi.acm.org/10.1145/3297858.3304042},
  doi = {10.1145/3297858.3304042},
  acmid = {3304042},
  publisher = {ACM},
  optannote = {},
  abstract = {The CHERI architecture allows pointers to be implemented as
capabilities (rather than integer virtual addresses) in a manner that
is compatible with, and strengthens, the
semantics of the C language.
In addition to the spatial protections offered by conventional fat pointers,
CHERI capabilities offer strong integrity, enforced provenance validity, and
access monotonicity.
The stronger guarantees of these architectural capabilities must be reconciled with the real-world behavior of operating systems, run-time environments, and applications.
When the process model, user-kernel interactions, dynamic linking, and memory
management are all considered, we observe that simple derivation of architectural capabilities is insufficient to describe appropriate access to memory.
We bridge this conceptual gap with a notional \emph{abstract capability} that
describes the accesses that should be allowed at a given point in execution,
whether in the kernel or userspace.
To investigate this notion at scale, we describe the first adaptation of a
full C-language operating system (FreeBSD) with an enterprise database
(PostgreSQL) for complete spatial and referential memory safety.
We show that awareness of abstract capabilities, coupled with CHERI
architectural capabilities, can provide more complete protection, strong
compatibility, and acceptable performance overhead compared with the pre-CHERI
baseline and software-only approaches.
Our observations also have potentially significant implications for other
mitigation techniques.},
  apollourl = {https://www.repository.cam.ac.uk/handle/1810/289187},
  pdf = {https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/201904-asplos-cheriabi.pdf},
  project = {https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/},
  topic = {cheri},
  cheriformal = {true},
  elver = {true},
  optrecent = {true}
}

@techreport{UCAM-CL-TR-927,
  author = {Watson, Robert N. M. and Neumann, Peter G. and Woodruff,
          	  Jonathan and Roe, Michael and Almatary, Hesham and
          	  Anderson, Jonathan and Baldwin, John and Chisnall, David
          	  and Davis, Brooks and Filardo, Nathaniel Wesley and
          	  Joannou, Alexandre and Laurie, Ben and Markettos, A.
          	  Theodore and Moore, Simon W. and Murdoch, Steven J. and
          	  Nienhuis, Kyndylan and Norton, Robert and Richardson, Alex
          	  and Rugg, Peter and Sewell, Peter and Son, Stacey and Xia,
          	  Hongyan},
  title = {{Capability Hardware Enhanced RISC Instructions: CHERI
         	   Instruction-Set Architecture (Version 7)}},
  year = 2019,
  month = jun,
  pdf = {https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-927.pdf},
  institution = {University of Cambridge, Computer Laboratory},
  number = {UCAM-CL-TR-927},
  note = {496pp},
  abstract = {This technical report describes CHERI ISAv7, the seventh version of the
Capability Hardware Enhanced RISC Instructions (CHERI) Instruction-Set
Architecture (ISA)
being developed by SRI International and the University of Cambridge.
This design captures nine years of research, development, experimentation,
refinement, formal analysis, and validation through hardware and software
implementation.
CHERI ISAv7 is a substantial enhancement to prior ISA versions.
We differentiate an architecture-neutral protection model vs.
architecture-specific instantiations in 64-bit MIPS, 64-bit RISC-V, and
x86-64.
We have defined a new CHERI Concentrate compression model.
CHERI-RISC-V is more substantially elaborated.
A new compartment-ID register assists in resisting microarchitectural
side-channel attacks.
Experimental features include linear capabilities, capability coloring,
temporal memory safety, and 64-bit capabilities for 32-bit architectures.

CHERI is a {\em hybrid capability-system architecture} that adds new
capability-system primitives to commodity 64-bit RISC ISAs, enabling software
to efficiently implement {\em fine-grained memory protection} and {\em
scalable software compartmentalization}.
Design goals include incremental adoptability within current ISAs and
software stacks, low performance overhead for memory protection, significant
performance improvements for software compartmentalization, formal grounding,
and programmer-friendly underpinnings.
We have focused on providing strong, non-probabilistic, efficient
architectural foundations for the principles of \textit{least privilege} and
\textit{intentional use} in the execution of software at multiple levels of
abstraction, preventing and mitigating vulnerabilities.

The CHERI system architecture purposefully addresses known performance and
robustness gaps in commodity ISAs that hinder the adoption of more secure
programming models centered around the principle of least privilege.
To this end, CHERI blends traditional paged virtual memory with an
in-address-space capability model that includes capability registers,
capability instructions, and tagged memory.
CHERI builds on the C-language fat-pointer literature: its capabilities can
describe fine-grained regions of memory, and can be substituted for data or
code pointers in generated code, protecting data and also improving
control-flow robustness.
Strong capability integrity and monotonicity properties allow the CHERI model
to express a variety of protection properties, from enforcing valid C-language
pointer provenance and bounds checking to implementing the isolation and
controlled communication structures required for software
compartmentalization.

CHERI's hybrid capability-system approach, inspired by the Capsicum security
model, allows incremental adoption of capability-oriented design: software
implementations that are more robust and resilient can be deployed where they
are most needed, while leaving less critical software largely unmodified, but
nevertheless suitably constrained to be incapable of having adverse effects.
Potential deployment scenarios include low-level software Trusted Computing
Bases (TCBs) such as separation kernels, hypervisors, and operating-system
kernels, as well as userspace TCBs such as language runtimes and web browsers.
We also see potential early-use scenarios around particularly high-risk
software libraries (such as data compression, protocol parsing, and image
processing), which are concentrations of both complex and historically
vulnerability-prone code exposed to untrustworthy data sources, while leaving
containing applications unchanged.
  },
  topic = {CHERI},
  cheriformal = {true},
  elver = {true},
  optrecent = {true}
}

@article{DBLP:journals/cacm/MarkettosWMSN19,
  author = {A. Theodore Markettos and
               Robert N. M. Watson and
               Simon W. Moore and
               Peter Sewell and
               Peter G. Neumann},
  title = {Through computer architecture, darkly},
  journal = {Commun. {ACM}},
  volume = {62},
  number = {6},
  pages = {25--27},
  year = {2019},
  url = {https://doi.org/10.1145/3325284},
  doi = {10.1145/3325284},
  timestamp = {Tue, 04 Jun 2019 15:14:10 +0200},
  biburl = {https://dblp.org/rec/bib/journals/cacm/MarkettosWMSN19},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  topic = {cheri},
  cheriformal = {true}
}

@techreport{cheri-formal-tr,
  optkey = {},
  author = {
  Kyndylan Nienhuis
  and Alexandre Joannou
  and Anthony Fox
  and Michael Roe
  and Thomas Bauereiss
  and Brian Campbell
  and Matthew Naylor
  and Robert M. Norton
  and Simon W. Moore
  and Peter G. Neumann
  and Ian Stark
  and Robert N. M. Watson
  and Peter Sewell
  },
  title = {Rigorous engineering for hardware security: Formal modelling and proof in the {CHERI} design and implementation process},
  institution = {University of Cambridge, Computer Laboratory},
  number = {UCAM-CL-TR-940},
  opthowpublished = {Draft},
  month = sep,
  year = {2019},
  optnote = {},
  optannote = {},
  abstract = {The root causes of many security vulnerabilities include a pernicious combination of two problems, often regarded as inescapable aspects of computing.  First, the protection mechanisms provided by the mainstream processor architecture and C/C++ language abstractions, dating back to the 1970s and before, provide only coarse-grain virtual-memory-based protection.  Second, mainstream system engineering relies almost exclusively on test-and-debug methods, with (at best) prose specifications.  These methods have historically sufficed commercially for much of the computer industry, but they fail to prevent large numbers of exploitable bugs, and the security problems that this causes are becoming ever more acute.

In this paper we show how more rigorous engineering methods can be applied to the development of a new security-enhanced processor architecture, with its accompanying hardware implementation and software stack.  We use formal models of the complete instruction-set architecture (ISA) at the heart of the design and engineering process, both in lightweight ways that support and improve normal engineering practice -- as documentation, in emulators used as a test oracle for hardware and for running software, and for test generation -- and for formal verification.  We formalise key intended security properties of the design, and establish that these hold with mechanised proof.  This is for the same complete ISA models (complete enough to boot operating systems), without idealisation.

We do this for CHERI, an architecture with \emph{hardware capabilities} that supports fine-grained memory protection and scalable secure compartmentalisation, while offering a smooth adoption path for existing software.  CHERI is a maturing research architecture, developed since 2010, with work now underway to explore its possible adoption in mass-market commercial processors.  The rigorous engineering work described here has been an integral part of its development to date, enabling more rapid and confident experimentation, and boosting confidence in the design.
},
  optproject = {https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/},
  opttopic = {cheri},
  cheriformal = {true},
  optrecent = {true},
  pdf = {https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-940.pdf}
}

@techreport{UCAM-CL-TR-932,
  author = {Davis, Brooks and Watson, Robert N. M. and Richardson,
          	  Alexander and Neumann, Peter G. and Moore, Simon W. and
          	  Baldwin, John and Chisnall, David and Clarke, James and
          	  Filardo, Nathaniel Wesley and Gudka, Khilan and Joannou,
          	  Alexandre and Laurie, Ben and Markettos, A. Theodore and
          	  Maste, J. Edward and Mazzinghi, Alfredo and Napierala,
          	  Edward Tomasz and Norton, Robert M. and Roe, Michael and
          	  Sewell, Peter and Son, Stacey and Woodruff, Jonathan},
  title = {{CheriABI: Enforcing valid pointer provenance and
         	   minimizing pointer privilege in the POSIX C run-time
         	   environment}},
  year = 2019,
  month = jan,
  pdf = {https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-932.pdf},
  institution = {University of Cambridge, Computer Laboratory},
  number = {UCAM-CL-TR-932},
  abstract = {The CHERI architecture allows pointers to be implemented as capabilities (rather than integer virtual addresses) in a manner that is compatible with, and strengthens, the semantics of the C language. In addition to the spatial protections offered by conventional fat pointers, CHERI capabilities offer strong integrity, enforced provenance validity, and access monotonicity.

The stronger guarantees of these architectural capabilities must be reconciled with the real-world behavior of operating systems, run-time environments, and applications. When the process model, user-kernel interactions, dynamic linking, and memory management are all considered, we observe that simple derivation of architectural capabilities is insufficient to describe appropriate access to memory. We bridge this conceptual gap with a notional abstract capability that describes the accesses that should be allowed at a given point in execution, whether in the kernel or userspace.

To investigate this notion at scale, we describe the first adaptation of a full C-language operating system (FreeBSD) with an enterprise database (PostgreSQL) for complete spatial and referential memory safety. We show that awareness of abstract capabilities, coupled with CHERI architectural capabilities, can provide more complete protection, strong compatibility, and acceptable performance overhead compared with the pre-CHERI baseline and software-only approaches. Our observations also have potentially significant implications for other mitigation techniques.},
  project = {https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/},
  topic = {cheri},
  cheriformal = {true},
  optrecent = {false}
}

@techreport{UCAM-CL-TR-941,
  author = {Watson, Robert N. M. and  Moore, Simon W. and
          	  Sewell, Peter and Neumann, Peter},
  title = {An Introduction to {CHERI}},
  year = 2019,
  month = sep,
  pdf = {https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-941.pdf},
  institution = {University of Cambridge, Computer Laboratory},
  number = {UCAM-CL-TR-941},
  abstract = {},
  project = {https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/},
  topic = {cheri},
  cheriformal = {true},
  elver = {true},
  recent = {true}
}

@inproceedings{cheri-formal-SP2020,
  author = {Kyndylan Nienhuis and Alexandre Joannou and Thomas Bauereiss and Anthony Fox and Michael Roe and Brian Campbell and Matthew Naylor and Robert M. Norton and Moore, Simon W.  and Neumann, Peter G.  and Ian Stark and Watson, Robert N. M.  and Peter Sewell},
  title = {Rigorous engineering for hardware security: Formal modelling and proof in the {CHERI} design and implementation process},
  optcrossref = {},
  optkey = {},
  conf = {Security and Privacy 2020},
  booktitle = {Proceedings of the 41st IEEE Symposium on Security and Privacy (SP)},
  year = {2020},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  pages = {1007--1024},
  month = may,
  optaddress = {},
  optorganization = {},
  optpublisher = {},
  optnote = {},
  optannote = {},
  abstract = {The root causes of many security vulnerabilities include a pernicious combination of two problems, often regarded as inescapable aspects of computing.  First, the protection mechanisms provided by the mainstream processor architecture and C/C++ language abstractions, dating back to the 1970s and before, provide only coarse-grain virtual-memory-based protection.  Second, mainstream system engineering relies almost exclusively on test-and-debug methods, with (at best) prose specifications.  These methods have historically sufficed commercially for much of the computer industry, but they fail to prevent large numbers of exploitable bugs, and the security problems that this causes are becoming ever more acute.

In this paper we show how more rigorous engineering methods can be applied to the development of a new security-enhanced processor architecture, with its accompanying hardware implementation and software stack.  We use formal models of the complete instruction-set architecture (ISA) at the heart of the design and engineering process, both in lightweight ways that support and improve normal engineering practice -- as documentation, in emulators used as a test oracle for hardware and for running software, and for test generation -- and for formal verification.  We formalise key intended security properties of the design, and establish that these hold with mechanised proof.  This is for the same complete ISA models (complete enough to boot operating systems), without idealisation.

We do this for CHERI, an architecture with \emph{hardware capabilities} that supports fine-grained memory protection and scalable secure compartmentalisation, while offering a smooth adoption path for existing software.  CHERI is a maturing research architecture, developed since 2010, with work now underway on an Arm industrial prototype to explore its possible adoption in mass-market commercial processors.  The rigorous engineering work described here has been an integral part of its development to date, enabling more rapid and confident experimentation, and boosting confidence in the design.
},
  pdf = {https://www.cl.cam.ac.uk/users/pes20/cheri-formal.pdf},
  apollourl = {https://www.repository.cam.ac.uk/handle/1810/302580},
  publisherurl = {https://www.computer.org/csdl/proceedings-article/sp/2020/349700b007/1j2Lg3o6fdK},
  doi = {10.1109/SP40000.2020.00055},
  project = {https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/},
  topic = {cheri},
  cheriformal = {true},
  elver = {true},
  recent = {true}
}

@techreport{UCAM-CL-TR-947,
  author = {Watson, Robert N. M. and Richardson, Alexander and Davis,
          	  Brooks and Baldwin, John and Chisnall, David and Clarke,
          	  Jessica and Filardo, Nathaniel and Moore, Simon W. and
          	  Napierala, Edward and Sewell, Peter and Neumann, Peter G.},
  title = {{CHERI C/C++ Programming Guide}},
  year = 2020,
  month = jun,
  pdf = {https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-947.pdf},
  institution = {University of Cambridge, Computer Laboratory},
  number = {UCAM-CL-TR-947},
  abstract = {This document is a brief introduction to the CHERI C/C++ programming languages. We explain the principles underlying these language variants, and their grounding in CHERI’s multiple architectural instantiations: CHERI-MIPS, CHERI-RISC-V, and Arm’s Morello. We describe the most commonly encountered differences between these dialects and C/C++ on conventional architectures, and where existing software may require minor changes. We document new compiler warnings and errors that may be experienced compiling code with the CHERI Clang/LLVM compiler, and suggest how they may be addressed through typically minor source-code changes. We explain how modest language extensions allow selected software, such as memory allocators, to further refine permissions and bounds on pointers. This guidance is based on our experience adapting the FreeBSD operating-system userspace, and applications such as PostgreSQL and WebKit, to run in a CHERI C/C++ capability-based programming environment. We conclude by recommending further reading.},
  project = {https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/},
  topic = {cheri},
  cheriformal = {true},
  elver = {true},
  recent = {true}
}

@inproceedings{morello-proofs-esop2022,
  author = {Bauereiss, Thomas and Campbell, Brian and Sewell, Thomas
          	  and Armstrong, Alasdair and Esswood, Lawrence and Stark,
          	  Ian and Barnes, Graeme and Watson, Robert N. M. and Sewell,
          	  Peter},
  title = {Verified Security for the {Morello} Capability-enhanced
         	   Prototype {Arm} Architecture},
  optcrossref = {},
  optkey = {},
  conf = {ESOP 2022},
  booktitle = {Proceedings of the 31st European Symposium on Programming},
  year = {2022},
  optbooktitle = {},
  optyear = {},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optpages = {},
  month = apr,
  optaddress = {},
  optorganization = {},
  optpublisher = {},
  opteditor = {Ilya Sergey},
  opttitle = {Verified Security for the Morello Capability-enhanced Prototype Arm
               Architecture},
  optbooktitle = {Programming Languages and Systems - 31st European Symposium on Programming,
               {ESOP} 2022, Held as Part of the European Joint Conferences on Theory
               and Practice of Software, {ETAPS} 2022, Munich, Germany, April 2-7,
               2022, Proceedings},
  optseries = {Lecture Notes in Computer Science},
  optvolume = {13240},
  pages = {174--203},
  publisher = {Springer},
  year = {2022},
  url = {https://doi.org/10.1007/978-3-030-99336-8\_7},
  doi = {10.1007/978-3-030-99336-8\_7},
  timestamp = {Fri, 01 Apr 2022 15:49:28 +0200},
  biburl = {https://dblp.org/rec/conf/esop/BauereissCSAESB22.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  optnote = {},
  optannote = {},
  abstract = {Memory safety bugs continue to be a major source of security vulnerabilities in our critical infrastructure.  The CHERI project has proposed extending conventional architectures with hardware-supported \emph{capabilities} to enable fine-grained memory protection and scalable compartmentalisation, allowing historically memory-unsafe C and C++ to be adapted to deterministically mitigate large classes of vulnerabilities, while requiring only minor changes to existing system software sources. Arm is currently designing and building Morello, a CHERI-enabled prototype architecture, processor, SoC, and board, extending the high-per\-for\-mance Neoverse N1, to enable industrial evaluation of CHERI and pave the way for potential mass-market adoption.  However, for such a major new security-oriented architecture feature, it is important to establish high confidence that it does provide the intended protections, and that cannot be done with conventional engineering techniques.

In this paper we put the Morello architecture on a solid mathematical footing from the outset.  We define the fundamental security property that Morello aims to provide, reachable capability monotonicity, and prove that the architecture definition satisfies it. This proof is mechanised in Isabelle/HOL, and applies to a translation of the official Arm specification of the Morello instruction-set architecture (ISA) into Isabelle.  The main challenge is handling the complexity and scale of a production architecture: 62,000 lines of specification, translated to 210,000 lines of Isabelle.  We do so by factoring the proof via a narrow abstraction capturing essential properties of arbitrary CHERI ISAs, expressed above a monadic intra-instruction semantics.  We also develop a model-based test generator, which generates instruction-sequence tests that give good specification coverage, used in early testing of the Morello implementation and in Morello QEMU development, and we use Arm's internal test suite to validate our model.

This gives us machine-checked mathematical proofs of whole-ISA security properties of a full-scale industry architecture, at design-time.  To the best of our knowledge, this is the first demonstration that that is feasible, and it significantly increases confidence in Morello.
},
  pdf = {http://www.cl.cam.ac.uk/~pes20/morello-proofs-esop2022.pdf},
  project = {https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/},
  topic = {cheri},
  topictwo = {ISA_semantics},
  cheriformal = {true},
  elver = {true},
  recent = {true}
}

@misc{cheri-formal-blog,
  optkey = {},
  author = {Peter Sewell and Thomas Bauereiss and Brian Campbell and Robert N. M. Watson},
  title = {Formal {CHERI}: rigorous engineering and design-time proof of full-scale architecture security properties},
  howpublished = {Blog post, \url{https://www.lightbluetouchpaper.org/2022/07/22/formal-cheri/}},
  month = jul,
  year = {2022},
  optnote = {},
  optannote = {},
  project = {https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/},
  url = {https://www.lightbluetouchpaper.org/2022/07/22/formal-cheri/},
  topic = {cheri},
  cheriformal = {true},
  elver = {true},
  recent = {true}
}

@article{nextwave2023,
  author = {Robert N. M. Watson and Peter Sewell and William Martin},
  title = {Improving Security with Hardware Support: {CHERI} and {Arm's Morello}},
  journal = {The Next Wave (The National Security Agency's review of emerging technologies)},
  year = 2023,
  optkey = {},
  volume = 4,
  number = 1,
  pages = {10--21},
  optmonth = {},
  pdf = {https://media.defense.gov/2023/Jan/23/2003148354/-1/-1/0/TNW_24-1_2023_20230112.PDF},
  note = {ISSN 2640-1789 (print), ISSN 2640-1797 (online)},
  optannote = {},
  project = {https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/},
  topic = {cheri},
  cheriformal = {true},
  elver = {true},
  recent = {true},
  abstract = {
  The CHERI project, from the University of Cambridge and SRI International, extends
instruction-set architectures (ISAs) with unforgeable architectural capabilities, to be
used in place of conventional machine-word addresses to access memory. CHERI, which
stands for Capability Hardware Enhanced Reduced Instruction Set Computer (RISC) Instructions,
deterministically protects C/C++ pointers and other references, and also enables in-address-space software sandboxing. With changes to the compiler and operating system (OS), CHERI
enables new hardware-software security protection models for existing software (typically with
only very minor changes for memory safety):

- Deterministic fine-grained C/C++ memory protection at low overheads; and
- Scalable software compartmentalization, including sandboxed libraries, with interprocess communication performance improvements and function-call-like domain transition.

In a 2020 blog post evaluating CHERI, the Microsoft Security Response Centre (MSRC) wrote:
``We've assessed the theoretical impact of CHERI on all the memory safety vulnerabilities we
received in 2019, and concluded that in its current state, and combined with other mitigations,
it would have deterministically mitigated at least two thirds of all those issues''[1]. Scalable
single-address software sandboxing has the potential to mitigate many more, and to enable a
more disruptive shift to stronger compartmentalized software architectures.

Arm has recently developed the Morello architecture and processor, incorporating the CHERI
protection model into a contemporary high-performance Arm design. Morello is an experimental
prototype extending the existing Armv8-A architecture and Neoverse N1 64-bit processor
design to support CHERI research and evaluation on the path to eventual productization, and
to demonstrate the viability of the CHERI technology using real commercial processes and
manufacturing. Extensive software porting is establishing feasibility. Development boards are
available for research and prototyping as of early 2022, and are already running significant
open-source software stacks, such as an adapted version of the FreeBSD OS and KDE desktop
stack running with strong memory safety [2].

In this article we give an overview of CHERI and Morello, and pointers to full discussions
elsewhere. It is based largely on material from the "Introduction to CHERI" [3] and "Verified
security for the Morello capability-enhanced prototype Arm architecture" [4] technical reports;
it does not contain new research results.
}
}

@article{10123148,
  author = {Grisenthwaite, Richard and Barnes, Graeme and Watson, Robert N. M. and Moore, Simon W. and Sewell, Peter and Woodruff, Jonathan},
  journal = {IEEE Micro},
  title = {The {Arm Morello} Evaluation Platform---Validating {CHERI}-Based Security in a High-Performance System},
  year = {2023},
  volume = {43},
  number = {3},
  pages = {50--57},
  doi = {10.1109/MM.2023.3264676},
  url = {https://ieeexplore.ieee.org/document/10123148},
  opturl = {https://www.repository.cam.ac.uk/items/5ce6229c-d21b-4f61-adae-483026648e84},
  abstract = {Memory safety issues are a persistent source of security vulnerabilities, with conventional architectures and the C/C++ codebase chronically prone to exploitable errors. The Capability Hardware Enhanced RISC Instructions (CHERI) research project has explored a novel architectural approach to ameliorate such issues using unforgeable hardware capabilities to implement pointers. Morello is an Arm experimental platform for evaluation of CHERI in the Arm architecture context to explore its potential for mass-market adoption. This article describes the Morello Evaluation Platform, covering the motivation and functionality of the Morello architectural hardware extensions; their potential for fine-grained memory safety and software compartmentalization; formally proven security properties; impact on the microarchitecture of the high-performance, out-of-order multiprocessor Arm Morello processor; and the software-enablement program by Arm, the University of Cambridge, and Linaro. Together, this allows a wide range of researchers in both industry and academia to explore and assess the Morello platform.
},
  topic = {cheri},
  cheriformal = {true},
  elver = {true},
  recent = {true}
}

@inproceedings{cheri-c-asplos,
  author = {Vadim Zaliva and Kayvan Memarian and Ricardo Almeida and Jessica Clarke and Brooks Davis and Alex Richardson and David Chisnall and Brian Campbell and Ian Stark and Robert N. M. Watson and Peter Sewell},
  title = {Formal Mechanised Semantics of {CHERI C}: Capabilities, Provenance, and Undefined Behaviour},
  conf = {ASPLOS 2024},
  optcrossref = {},
  optkey = {},
  optbooktitle = {},
  year = {2024},
  opteditor = {},
  optvolume = {},
  optnumber = {},
  optseries = {},
  optpages = {},
  month = apr,
  optaddress = {},
  optorganization = {},
  optpublisher = {},
  optnote = {Accepted for publication, subject to shepherd approval},
  optannote = {},
  abstract = {Memory safety issues are a persistent source of security vulnerabilities, with conventional architectures and the C codebase chronically prone to exploitable errors. The CHERI research project has shown how one can provide radically improved security for that existing codebase with minimal modification, using unforgeable hardware capabilities in place of machine-word pointers in CHERI dialects of C, implemented as adaptions of Clang/LLVM and GCC. CHERI was first prototyped as extensions of MIPS and RISC-V; it is currently being evaluated by Arm and others with the Arm Morello experimental architecture, processor, and platform, to explore its potential for mass-market adoption, and by Microsoft in their CHERIoT design for embedded cores.

There is thus considerable practical experience with CHERI C implementation and use, but exactly what CHERI C's semantics is (or should be) remains an open question. In this paper, we present the first attempt to rigorously and comprehensively define CHERI C semantics, discuss key semantics design questions relating to capabilities, provenance, and undefined behaviour, and clarify them with semantics in multiple complementary forms: in prose, as an executable semantics adapting the Cerberus C semantics, and mechanised in Coq.

This establishes a solid foundation for CHERI C, for those porting code to it, for compiler implementers, and for future semantics and verification.
},
  pdf = {http://www.cl.cam.ac.uk/users/pes20/asplos24spring-paper110.pdf},
  topic = {cheri},
  cheriformal = {true},
  elver = {true},
  recent = {true}
}

@inproceedings{rems-fmcad16,
  title = {Extracting Behaviour from an Executable Instruction Set Model},
  author = {Brian Campbell and Ian Stark},
  booktitle = {\textbf{FMCAD 2016}: Formal Methods in Computer-Aided Design},
  conf = {FMCAD 2016},
  year = 2016,
  month = oct,
  editor = {Ruzica Piskac and Muralidhar Talupur},
  pages = {33--40},
  isbn = {978-0-9835678-6-8},
  pdf = {http://homepages.inf.ed.ac.uk/bcampbe2/rems/fmcad16.pdf},
  note = {Full proceedings \url{http://www.cs.utexas.edu/users/hunt/FMCAD/FMCAD16/proceedings/fmcad-2016-proceedings.pdf}},
  abstract = {Presenting large formal instruction set models as executable functions makes them accessible to engineers and useful for less formal purposes such as simulation.  However, it is more difficult to extract information about the behaviour of individual instructions for reasoning.  We present a method which combines symbolic evaluation and symbolic execution techniques to provide a rule-based view of instruction behaviour, with particular application to automatic test generation for large MIPS-like models.},
  cheriformal = {true}
}

@article{CHERIconcentrate,
  author = {Jonathan Woodruff and Alexandre Joannou and Hongyan Xia and Anthony Fox and Robert Norton and Thomas Bauereiss and David Chisnall and Brooks Davis and Khilan Gudka and Nathaniel W. Filardo and A. Theodore Markettos and Michael Roe and Peter G. Neumann and Robert N. M. Watson and Simon W. Moore},
  title = {{CHERI Concentrate}: Practical Compressed Capabilities},
  journal = {IEEE Transactions on Computers},
  year = {2019},
  doi = {10.1109/TC.2019.2914037},
  pdf = {https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/2019tc-cheri-concentrate.pdf},
  abstract = {We present CHERI Concentrate, a new fat-pointer compression scheme applied to CHERI, the most developed
capability-pointer system at present. Capability fat pointers are a primary candidate to enforce fine-grained and non-bypassable
security properties in future computer systems, although increased pointer size can severely affect performance. Thus, several
proposals for capability compression have been suggested elsewhere that do not support legacy instruction sets, ignore features
critical to the existing software base, and also introduce design inefficiencies to RISC-style processor pipelines. CHERI Concentrate
improves on the state-of-the-art region-encoding efficiency, solves important pipeline problems, and eases semantic restrictions of
compressed encoding, allowing it to protect a full legacy software stack. We present the first quantitative analysis of compiled capability
code, which we use to guide the design of the encoding format. We analyze and extend logic from the open-source CHERI prototype
processor design on FPGA to demonstrate encoding efficiency, minimize delay of pointer arithmetic, and eliminate additional
load-to-use delay. To verify correctness of our proposed high-performance logic, we present a HOL4 machine-checked proof of the
decode and pointer-modify operations. Finally, we measure a 50\% to 75\% reduction in L2 misses for many compiled C-language
benchmarks running under a commodity operating system using compressed 128-bit and 64-bit formats, demonstrating both
compatibility with and increased performance over the uncompressed, 256-bit format.
},
  cheriformal = {true},
  optkey = {},
  optvolume = {},
  optnumber = {},
  optpages = {},
  optmonth = {},
  optnote = {},
  optannote = {}
}