blob: aa8fdafc551be54e4186003282c8d551de6fe877 [file] [log] [blame]
Lukasz Anforowiczccf55cb2022-10-05 06:00:57 -07001// Part of the Crubit project, under the Apache License v2.0 with LLVM
2// Exceptions. See /LICENSE for license information.
3// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
Lukasz Anforowicz55616892022-10-06 09:16:57 -07005use anyhow::{anyhow, ensure, Result};
Lukasz Anforowicz20a3c692022-10-06 08:48:15 -07006use once_cell::sync::Lazy;
Lukasz Anforowicze1aff8c2022-11-15 08:42:31 -08007use proc_macro2::{Ident, TokenStream};
8use quote::{format_ident, quote, ToTokens};
Lukasz Anforowicz434c4692022-11-01 14:05:24 -07009use std::collections::{BTreeSet, HashSet};
10use std::rc::Rc;
Lukasz Anforowiczccf55cb2022-10-05 06:00:57 -070011
12// TODO(lukasza): Consider adding more items into `code_gen_utils` (this crate).
13// For example, the following items from `src_code_gen.rs` will be most likely
14// reused from `cc_bindings_from_rs`:
15// - `make_rs_ident`
16// - `NamespaceQualifier`
17
Lukasz Anforowicze1aff8c2022-11-15 08:42:31 -080018/// Formats a C++ identifier. Returns an error when `ident` is a C++ reserved
19/// keyword or is an invalid identifier.
Lukasz Anforowicz55616892022-10-06 09:16:57 -070020pub fn format_cc_ident(ident: &str) -> Result<TokenStream> {
Lukasz Anforowiczc51aeb12022-11-07 10:56:18 -080021 ensure!(!ident.is_empty(), "Empty string is not a valid C++ identifier");
22
Lukasz Anforowicz20a3c692022-10-06 08:48:15 -070023 // C++ doesn't have an equivalent of
24 // https://doc.rust-lang.org/rust-by-example/compatibility/raw_identifiers.html and therefore
Lukasz Anforowicz55616892022-10-06 09:16:57 -070025 // an error is returned when `ident` is a C++ reserved keyword.
26 ensure!(
Lukasz Anforowicz20a3c692022-10-06 08:48:15 -070027 !RESERVED_CC_KEYWORDS.contains(ident),
Lukasz Anforowicze4333062022-10-17 14:47:53 -070028 "`{}` is a C++ reserved keyword and can't be used as a C++ identifier",
Lukasz Anforowicz20a3c692022-10-06 08:48:15 -070029 ident
30 );
31
Lukasz Anforowicz55616892022-10-06 09:16:57 -070032 ident.parse().map_err(
33 // Explicitly mapping the error via `anyhow!`, because `LexError` is not `Sync`
34 // (required for `anyhow::Error` to implement `From<LexError>`) and
35 // therefore we can't just use `?`.
36 |lex_error| anyhow!("Can't format `{ident}` as a C++ identifier: {lex_error}"),
37 )
Lukasz Anforowiczccf55cb2022-10-05 06:00:57 -070038}
39
Lukasz Anforowicze1aff8c2022-11-15 08:42:31 -080040/// Makes an 'Ident' to be used in the Rust source code. Escapes Rust keywords.
41/// Panics if `ident` is empty or is otherwise an invalid identifier.
42pub fn make_rs_ident(ident: &str) -> Ident {
43 // TODO(https://github.com/dtolnay/syn/pull/1098): Remove the hardcoded list once syn recognizes
44 // 2018 and 2021 keywords.
45 if ["async", "await", "try", "dyn"].contains(&ident) {
46 return format_ident!("r#{}", ident);
47 }
48 match syn::parse_str::<syn::Ident>(ident) {
49 Ok(_) => format_ident!("{}", ident),
50 Err(_) => format_ident!("r#{}", ident),
51 }
52}
53
Lukasz Anforowicz35ba2fe2022-11-23 15:56:19 -080054/// Representation of `foo::bar::baz::` where each component is either the name
55/// of a C++ namespace, or the name of a Rust module.
56#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
57// TODO(b/258265044): Make the `Vec<String>` payload private + guarantee
58// additional invariants in an explicit, public `new` method. This will help to
59// catch some error conditions early (e.g. an empty path component may trigger a
60// panic in `make_rs_ident`; a reserved C++ keyword might trigger a late error
61// in `format_for_cc` / `format_cc_ident`).
Lukasz Anforowicz8c1a6c42022-11-23 16:18:09 -080062pub struct NamespaceQualifier(pub Vec<Rc<str>>);
Lukasz Anforowicz35ba2fe2022-11-23 15:56:19 -080063
64impl NamespaceQualifier {
65 pub fn format_for_rs(&self) -> TokenStream {
66 let namespace_rs_idents = self.0.iter().map(|ns| make_rs_ident(ns));
67 quote! { #(#namespace_rs_idents::)* }
68 }
69
70 pub fn format_for_cc(&self) -> Result<TokenStream> {
Lukasz Anforowicza577d822022-12-12 15:00:46 -080071 let namespace_cc_idents = self.cc_idents()?;
Lukasz Anforowicz35ba2fe2022-11-23 15:56:19 -080072 Ok(quote! { #(#namespace_cc_idents::)* })
73 }
Lukasz Anforowicza577d822022-12-12 15:00:46 -080074
75 pub fn format_with_cc_body(&self, body: TokenStream) -> Result<TokenStream> {
76 if self.0.is_empty() {
77 Ok(body)
78 } else {
79 let namespace_cc_idents = self.cc_idents()?;
80 Ok(quote! {
81 namespace #(#namespace_cc_idents)::* {
82 #body
83 }
84 })
85 }
86 }
87
88 fn cc_idents(&self) -> Result<Vec<TokenStream>> {
89 self.0.iter().map(|ns| format_cc_ident(ns)).collect()
90 }
Lukasz Anforowicz35ba2fe2022-11-23 15:56:19 -080091}
92
Lukasz Anforowicz434c4692022-11-01 14:05:24 -070093/// `CcInclude` represents a single `#include ...` directive in C++.
94#[derive(Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
95pub enum CcInclude {
96 SystemHeader(&'static str),
97 UserHeader(Rc<str>),
98}
99
100impl CcInclude {
101 /// Creates a `CcInclude` that represents `#include <cstddef>` and provides
102 /// C++ types like `std::size_t` or `std::ptrdiff_t`. See also
103 /// https://en.cppreference.com/w/cpp/header/cstddef
104 pub fn cstddef() -> Self {
105 Self::SystemHeader("cstddef")
106 }
107
Lukasz Anforowiczed17d052022-11-02 12:07:28 -0700108 /// Creates a `CcInclude` that represents `#include <cstdint>` and provides
109 /// C++ types like `std::int16_t` or `std::uint32_t`. See also
110 /// https://en.cppreference.com/w/cpp/header/cstdint
111 pub fn cstdint() -> Self {
112 Self::SystemHeader("cstdint")
113 }
114
Lukasz Anforowicz434c4692022-11-01 14:05:24 -0700115 /// Creates a `CcInclude` that represents `#include <memory>`.
116 /// See also https://en.cppreference.com/w/cpp/header/memory
117 pub fn memory() -> Self {
118 Self::SystemHeader("memory")
119 }
120
Lukasz Anforowiczd16b6bf2022-11-22 18:35:08 -0800121 /// Creates a `CcInclude` that represents `#include <utility>` and provides
122 /// C++ functions like `std::move` and C++ types like `std::tuple`.
123 /// See also https://en.cppreference.com/w/cpp/header/utility
124 pub fn utility() -> Self {
125 Self::SystemHeader("utility")
126 }
127
Lukasz Anforowicz434c4692022-11-01 14:05:24 -0700128 /// Creates a user include: `#include "some/path/to/header.h"`.
129 pub fn user_header(path: Rc<str>) -> Self {
130 Self::UserHeader(path)
131 }
132}
133
134impl ToTokens for CcInclude {
135 fn to_tokens(&self, tokens: &mut TokenStream) {
136 match self {
137 Self::SystemHeader(path) => {
138 let path: TokenStream = path
139 .parse()
140 .expect("`pub` API of `CcInclude` guarantees validity of system includes");
141 quote! { __HASH_TOKEN__ include < #path > __NEWLINE__ }.to_tokens(tokens)
142 }
143 Self::UserHeader(path) => {
144 quote! { __HASH_TOKEN__ include #path __NEWLINE__ }.to_tokens(tokens)
145 }
146 }
147 }
148}
149
150/// Formats a set of `CcInclude`s, trying to follow the guidance from
151/// [the Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html#Names_and_Order_of_Includes).
152pub fn format_cc_includes(set_of_includes: &BTreeSet<CcInclude>) -> TokenStream {
153 let mut tokens = TokenStream::default();
154 let mut iter = set_of_includes.iter().peekable();
155 while let Some(include) = iter.next() {
156 include.to_tokens(&mut tokens);
157
158 // Add an empty line between system headers and user headers.
159 if let (CcInclude::SystemHeader(_), Some(CcInclude::UserHeader(_))) = (include, iter.peek())
160 {
161 quote! { __NEWLINE__ }.to_tokens(&mut tokens)
162 }
163 }
164 tokens
165}
166
Lukasz Anforowicz20a3c692022-10-06 08:48:15 -0700167static RESERVED_CC_KEYWORDS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
168 // `RESERVED_CC_KEYWORDS` are based on https://en.cppreference.com/w/cpp/keyword
169 [
170 "alignas",
171 "alignof",
172 "and",
173 "and_eq",
174 "asm",
175 "atomic_cancel",
176 "atomic_commit",
177 "atomic_noexcept",
178 "auto",
179 "bitand",
180 "bitor",
181 "bool",
182 "break",
183 "case",
184 "catch",
185 "char",
186 "char8_t",
187 "char16_t",
188 "char32_t",
189 "class",
190 "compl",
191 "concept",
192 "const",
193 "consteval",
194 "constexpr",
195 "constinit",
196 "const_cast",
197 "continue",
198 "co_await",
199 "co_return",
200 "co_yield",
201 "decltype",
202 "default",
203 "delete",
204 "do",
205 "double",
206 "dynamic_cast",
207 "else",
208 "enum",
209 "explicit",
210 "export",
211 "extern",
212 "false",
213 "float",
214 "for",
215 "friend",
216 "goto",
217 "if",
218 "inline",
219 "int",
220 "long",
221 "mutable",
222 "namespace",
223 "new",
224 "noexcept",
225 "not",
226 "not_eq",
227 "nullptr",
228 "operator",
229 "or",
230 "or_eq",
231 "private",
232 "protected",
233 "public",
234 "reflexpr",
235 "register",
236 "reinterpret_cast",
237 "requires",
238 "return",
239 "short",
240 "signed",
241 "sizeof",
242 "static",
243 "static_assert",
244 "static_cast",
245 "struct",
246 "switch",
247 "synchronized",
248 "template",
249 "this",
250 "thread_local",
251 "throw",
252 "true",
253 "try",
254 "typedef",
255 "typeid",
256 "typename",
257 "union",
258 "unsigned",
259 "using",
260 "virtual",
261 "void",
262 "volatile",
263 "wchar_t",
264 "while",
265 "xor",
266 "xor_eq",
267 ]
268 .into_iter()
269 .collect()
270});
271
Lukasz Anforowiczccf55cb2022-10-05 06:00:57 -0700272#[cfg(test)]
273pub mod tests {
274 use super::*;
275 use quote::quote;
Lukasz Anforowicze1aff8c2022-11-15 08:42:31 -0800276 use token_stream_matchers::{assert_cc_matches, assert_rs_matches};
Lukasz Anforowicz5bf49432022-12-12 12:17:24 -0800277 use token_stream_printer::cc_tokens_to_formatted_string_for_tests;
Lukasz Anforowiczccf55cb2022-10-05 06:00:57 -0700278
279 #[test]
280 fn test_format_cc_ident_basic() {
Lukasz Anforowicz55616892022-10-06 09:16:57 -0700281 assert_cc_matches!(
Lukasz Anforowicz4c19ad92022-12-16 15:23:14 -0800282 format_cc_ident("foo").unwrap(),
Lukasz Anforowicz55616892022-10-06 09:16:57 -0700283 quote! { foo }
284 );
Lukasz Anforowiczccf55cb2022-10-05 06:00:57 -0700285 }
286
287 #[test]
288 fn test_format_cc_ident_reserved_rust_keyword() {
Lukasz Anforowicz55616892022-10-06 09:16:57 -0700289 assert_cc_matches!(
Lukasz Anforowicz4c19ad92022-12-16 15:23:14 -0800290 format_cc_ident("impl").unwrap(),
Lukasz Anforowicz55616892022-10-06 09:16:57 -0700291 quote! { impl }
292 );
Lukasz Anforowiczccf55cb2022-10-05 06:00:57 -0700293 }
294
295 #[test]
296 fn test_format_cc_ident_reserved_cc_keyword() {
Lukasz Anforowicz4c19ad92022-12-16 15:23:14 -0800297 let err = format_cc_ident("reinterpret_cast").unwrap_err();
Lukasz Anforowicz55616892022-10-06 09:16:57 -0700298 let msg = err.to_string();
299 assert!(msg.contains("`reinterpret_cast`"));
300 assert!(msg.contains("C++ reserved keyword"));
301 }
302
303 #[test]
304 fn test_format_cc_ident_unfinished_group() {
305 let err = format_cc_ident("(foo") // No closing `)`.
Lukasz Anforowicz4c19ad92022-12-16 15:23:14 -0800306 .unwrap_err();
Lukasz Anforowicz55616892022-10-06 09:16:57 -0700307 let msg = err.to_string();
308 assert!(msg.contains("Can't format `(foo` as a C++ identifier"));
309 assert!(msg.contains("cannot parse"));
310 }
311
312 #[test]
313 fn test_format_cc_ident_unqualified_identifiers() {
314 // https://en.cppreference.com/w/cpp/language/identifiers#Unqualified_identifiers
315
316 // These may appear in `IR::Func::name`.
317 assert_cc_matches!(
Lukasz Anforowicz4c19ad92022-12-16 15:23:14 -0800318 format_cc_ident("operator==").unwrap(),
Lukasz Anforowicz55616892022-10-06 09:16:57 -0700319 quote! { operator== }
320 );
321 assert_cc_matches!(
Lukasz Anforowicz4c19ad92022-12-16 15:23:14 -0800322 format_cc_ident("operator new").unwrap(),
Lukasz Anforowicz55616892022-10-06 09:16:57 -0700323 quote! { operator new }
324 );
325
326 // This may appear in `IR::Record::cc_name` (although in practice these will
327 // be namespace-qualified most of the time).
328 assert_cc_matches!(
Lukasz Anforowicz4c19ad92022-12-16 15:23:14 -0800329 format_cc_ident("MyTemplate<int>").unwrap(),
Lukasz Anforowicz55616892022-10-06 09:16:57 -0700330 quote! { MyTemplate<int> }
331 );
332
333 // These forms of unqualified identifiers are not used by Crubit in practice,
334 assert_cc_matches!(
Lukasz Anforowicz4c19ad92022-12-16 15:23:14 -0800335 format_cc_ident("~MyClass").unwrap(),
Lukasz Anforowicz55616892022-10-06 09:16:57 -0700336 quote! { ~MyClass }
337 );
338 assert_cc_matches!(
Lukasz Anforowicz4c19ad92022-12-16 15:23:14 -0800339 format_cc_ident(r#" operator "" _km "#).unwrap(),
Lukasz Anforowicz55616892022-10-06 09:16:57 -0700340 quote! { operator "" _km }
341 );
342 }
343
344 #[test]
Lukasz Anforowiczc51aeb12022-11-07 10:56:18 -0800345 fn test_format_cc_ident_empty() {
Lukasz Anforowicz4c19ad92022-12-16 15:23:14 -0800346 let err = format_cc_ident("").unwrap_err();
Lukasz Anforowiczc51aeb12022-11-07 10:56:18 -0800347 let msg = err.to_string();
348 assert_eq!(msg, "Empty string is not a valid C++ identifier");
349 }
350
351 #[test]
Lukasz Anforowicz55616892022-10-06 09:16:57 -0700352 fn test_format_cc_ident_qualified_identifiers() {
353 // https://en.cppreference.com/w/cpp/language/identifiers#Qualified_identifiers
354
355 // This may appear in `IR::Record::cc_name`.
356 assert_cc_matches!(
Lukasz Anforowicz4c19ad92022-12-16 15:23:14 -0800357 format_cc_ident("std::vector<int>").unwrap(),
Lukasz Anforowicz55616892022-10-06 09:16:57 -0700358 quote! { std::vector<int> }
359 );
Lukasz Anforowiczccf55cb2022-10-05 06:00:57 -0700360 }
Lukasz Anforowicz434c4692022-11-01 14:05:24 -0700361
362 #[test]
Lukasz Anforowicze1aff8c2022-11-15 08:42:31 -0800363 fn test_make_rs_ident_basic() {
364 let id = make_rs_ident("foo");
365 assert_rs_matches!(quote! { #id }, quote! { foo });
366 }
367
368 #[test]
369 fn test_make_rs_ident_reserved_cc_keyword() {
370 let id = make_rs_ident("reinterpret_cast");
371 assert_rs_matches!(quote! { #id }, quote! { reinterpret_cast });
372 }
373
374 #[test]
375 fn test_make_rs_ident_reserved_rust_keyword() {
376 let id = make_rs_ident("impl");
377 assert_rs_matches!(quote! { #id }, quote! { r#impl });
378 }
379
380 #[test]
381 #[should_panic]
382 fn test_make_rs_ident_unfinished_group() {
383 make_rs_ident("(foo"); // No closing `)`.
384 }
385
386 #[test]
387 #[should_panic]
388 fn test_make_rs_ident_empty() {
389 make_rs_ident("");
390 }
391
392 #[test]
Lukasz Anforowicz434c4692022-11-01 14:05:24 -0700393 fn test_cc_include_to_tokens_for_system_header() {
394 let include = CcInclude::cstddef();
395 assert_cc_matches!(
396 quote! { #include },
397 quote! {
398 __HASH_TOKEN__ include <cstddef>
399 }
400 );
401 }
402
403 #[test]
404 fn test_cc_include_to_tokens_for_user_header() {
405 let include = CcInclude::user_header("some/path/to/header.h".into());
406 assert_cc_matches!(
407 quote! { #include },
408 quote! {
409 __HASH_TOKEN__ include "some/path/to/header.h"
410 }
411 );
412 }
413
414 #[test]
415 fn test_cc_include_ord() {
416 let cstddef = CcInclude::cstddef();
417 let memory = CcInclude::memory();
418 let a = CcInclude::user_header("a.h".into());
419 let b = CcInclude::user_header("b.h".into());
420 assert!(cstddef < memory);
421 assert!(cstddef < a);
422 assert!(cstddef < b);
423 assert!(memory < a);
424 assert!(memory < b);
425 assert!(a < b);
426 }
427
428 #[test]
429 fn test_format_cc_includes() {
430 let includes = [
431 CcInclude::cstddef(),
432 CcInclude::memory(),
433 CcInclude::user_header("a.h".into()),
434 CcInclude::user_header("b.h".into()),
435 ]
436 .into_iter()
437 .collect::<BTreeSet<_>>();
438
439 let tokens = format_cc_includes(&includes);
Lukasz Anforowicz5bf49432022-12-12 12:17:24 -0800440 let actual =
441 cc_tokens_to_formatted_string_for_tests(quote! { __NEWLINE__ #tokens }).unwrap();
Lukasz Anforowicz434c4692022-11-01 14:05:24 -0700442 assert_eq!(
443 actual,
444 r#"
445#include <cstddef>
446#include <memory>
447
448#include "a.h"
449#include "b.h"
450"#
451 );
452 }
Lukasz Anforowicz35ba2fe2022-11-23 15:56:19 -0800453
454 fn create_namespace_qualifier_for_tests(input: &[&str]) -> NamespaceQualifier {
Lukasz Anforowicz8c1a6c42022-11-23 16:18:09 -0800455 NamespaceQualifier(input.into_iter().map(|&s| s.into()).collect())
Lukasz Anforowicz35ba2fe2022-11-23 15:56:19 -0800456 }
457
458 #[test]
459 fn test_namespace_qualifier_empty() {
460 let ns = create_namespace_qualifier_for_tests(&[]);
461 let actual_rs = ns.format_for_rs();
462 assert!(actual_rs.is_empty());
463 let actual_cc = ns.format_for_cc().unwrap();
464 assert!(actual_cc.is_empty());
465 }
466
467 #[test]
468 fn test_namespace_qualifier_basic() {
469 let ns = create_namespace_qualifier_for_tests(&["foo", "bar"]);
470 let actual_rs = ns.format_for_rs();
471 assert_rs_matches!(actual_rs, quote! { foo::bar:: });
472 let actual_cc = ns.format_for_cc().unwrap();
473 assert_cc_matches!(actual_cc, quote! { foo::bar:: });
474 }
475
476 #[test]
477 fn test_namespace_qualifier_reserved_cc_keyword() {
478 let ns = create_namespace_qualifier_for_tests(&["foo", "impl", "bar"]);
479 let actual_rs = ns.format_for_rs();
480 assert_rs_matches!(actual_rs, quote! { foo :: r#impl :: bar :: });
481 let actual_cc = ns.format_for_cc().unwrap();
482 assert_cc_matches!(actual_cc, quote! { foo::impl::bar:: });
483 }
484
485 #[test]
486 fn test_namespace_qualifier_reserved_rust_keyword() {
487 let ns = create_namespace_qualifier_for_tests(&["foo", "reinterpret_cast", "bar"]);
488 let actual_rs = ns.format_for_rs();
489 assert_rs_matches!(actual_rs, quote! { foo :: reinterpret_cast :: bar :: });
Lukasz Anforowicz4c19ad92022-12-16 15:23:14 -0800490 let cc_error = ns.format_for_cc().unwrap_err();
Lukasz Anforowicz35ba2fe2022-11-23 15:56:19 -0800491 let msg = cc_error.to_string();
492 assert!(msg.contains("`reinterpret_cast`"));
493 assert!(msg.contains("C++ reserved keyword"));
494 }
Lukasz Anforowicza577d822022-12-12 15:00:46 -0800495
496 #[test]
497 fn test_namespace_qualifier_format_with_cc_body_top_level_namespace() {
498 let ns = create_namespace_qualifier_for_tests(&[]);
499 assert_cc_matches!(
500 ns.format_with_cc_body(quote! { cc body goes here }).unwrap(),
501 quote! { cc body goes here },
502 );
503 }
504
505 #[test]
506 fn test_namespace_qualifier_format_with_cc_body_nested_namespace() {
507 let ns = create_namespace_qualifier_for_tests(&["foo", "bar", "baz"]);
508 assert_cc_matches!(
509 ns.format_with_cc_body(quote! { cc body goes here }).unwrap(),
510 quote! {
511 namespace foo::bar::baz {
512 cc body goes here
513 } // namespace foo::bar::baz
514 },
515 );
516 }
Lukasz Anforowiczccf55cb2022-10-05 06:00:57 -0700517}