Marcel Hlopko | 3164eee | 2021-08-24 20:09:22 +0000 | [diff] [blame] | 1 | // Part of the Crubit project, under the Apache License v2.0 with LLVM |
| 2 | // Exceptions. See /LICENSE for license information. |
| 3 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 4 | |
Marcel Hlopko | ca84ff4 | 2021-12-09 14:15:14 +0000 | [diff] [blame] | 5 | use anyhow::{bail, Result}; |
Rosica Dejanovska | da9105d | 2022-03-30 09:38:16 -0700 | [diff] [blame] | 6 | use proc_macro2::{Delimiter, TokenStream, TokenTree}; |
Lukasz Anforowicz | 54ff318 | 2022-05-06 07:17:58 -0700 | [diff] [blame] | 7 | use std::ffi::{OsStr, OsString}; |
Marcel Hlopko | 65d05f0 | 2021-12-09 12:29:24 +0000 | [diff] [blame] | 8 | use std::fmt::Write as _; |
| 9 | use std::io::Write as _; |
| 10 | use std::process::{Command, Stdio}; |
Marcel Hlopko | 3164eee | 2021-08-24 20:09:22 +0000 | [diff] [blame] | 11 | |
Lukasz Anforowicz | 54ff318 | 2022-05-06 07:17:58 -0700 | [diff] [blame] | 12 | // TODO: The `RustfmtConfig` struct should be replaced with |
| 13 | // `rustfmt_nightly::Config` once we switch to using rustfmt as a library |
| 14 | // (instead of invoking the `rustfmt` executable). |
| 15 | pub struct RustfmtConfig { |
| 16 | args: Vec<OsString>, |
| 17 | } |
| 18 | |
| 19 | impl RustfmtConfig { |
| 20 | /// Creates a config that passes the provided `rustfmt_config_path` argument |
| 21 | /// as `rustfmt`'s `--config-path` cmdline parameter. |
| 22 | pub fn from_config_path(rustfmt_config_path: &OsStr) -> Self { |
| 23 | let mut config_path_arg: OsString = "--config-path=".into(); |
| 24 | config_path_arg.push(rustfmt_config_path); |
| 25 | Self { args: vec![config_path_arg] }.append_config_overrides() |
| 26 | } |
| 27 | |
| 28 | pub fn default() -> Self { |
| 29 | Self { args: vec!["--edition=2021".into(), "--config=version=Two".into()] } |
| 30 | .append_config_overrides() |
| 31 | } |
| 32 | |
| 33 | fn append_config_overrides(mut self: Self) -> Self { |
| 34 | self.args.extend(vec![ |
| 35 | // We are representing doc comments as attributes in the token stream and use rustfmt |
| 36 | // to unpack them again. |
| 37 | "--config=normalize_doc_attributes=true".into(), |
| 38 | // We don't want rustfmt to reflow C++ doc comments, so we turn off wrapping globally |
| 39 | // and reflow generated comments manually. |
| 40 | "--config=wrap_comments=false".into(), |
| 41 | ]); |
| 42 | self |
| 43 | } |
| 44 | } |
| 45 | |
Marcel Hlopko | 65d05f0 | 2021-12-09 12:29:24 +0000 | [diff] [blame] | 46 | /// Like `tokens_to_string` but also runs the result through rustfmt. |
Lukasz Anforowicz | 54ff318 | 2022-05-06 07:17:58 -0700 | [diff] [blame] | 47 | pub fn rs_tokens_to_formatted_string( |
| 48 | tokens: TokenStream, |
| 49 | config: &RustfmtConfig, |
| 50 | ) -> Result<String> { |
| 51 | rustfmt(tokens_to_string(tokens)?, config) |
| 52 | } |
| 53 | |
| 54 | /// Like `rs_tokens_to_formatted_string`, but always using a Crubit-internal, |
| 55 | /// default rustfmt config. This should only be called by tests - product code |
| 56 | /// should support custom `rustfmt.toml`. |
| 57 | pub fn rs_tokens_to_formatted_string_for_tests(input: TokenStream) -> Result<String> { |
| 58 | rs_tokens_to_formatted_string(input, &RustfmtConfig::default()) |
Googler | 42d540f | 2021-09-29 06:37:23 +0000 | [diff] [blame] | 59 | } |
| 60 | |
Marcel Hlopko | 65d05f0 | 2021-12-09 12:29:24 +0000 | [diff] [blame] | 61 | /// Produces source code out of the token stream. |
Marcel Hlopko | 3164eee | 2021-08-24 20:09:22 +0000 | [diff] [blame] | 62 | /// |
| 63 | /// Notable features: |
Michael Forster | bee8448 | 2021-10-13 08:35:38 +0000 | [diff] [blame] | 64 | /// * quote! cannot produce a single `#` token (that is not immediately followed |
| 65 | /// by `(`, `[`, `{`, or variable interpolation). For cases when we need `#` |
| 66 | /// to be produced in the C++ source code use the placeholder |
| 67 | /// `__HASH_TOKEN__`. |
| 68 | /// * The Rust tokenizer ignores newlines as they are not significant for Rust. |
| 69 | /// For C++ they are (for example there needs to be a newline after `#include |
| 70 | /// "foo/bar.h"`). We are also using explict newlines for making the generated |
| 71 | /// Rust/C++ source code more readable. Use the placeholder `__NEWLINE__` to |
| 72 | /// insert a newline character. |
Devin Jeanpierre | 2b4182b | 2022-04-19 08:23:50 -0700 | [diff] [blame] | 73 | /// * `TokenStream` cannot encode formatting whitespace, so we use the |
| 74 | /// placeholder `__SPACE__`. |
Michael Forster | bee8448 | 2021-10-13 08:35:38 +0000 | [diff] [blame] | 75 | /// * `TokenStream` cannot encode comments, so we use the placeholder |
| 76 | /// `__COMMENT__`, followed by a string literal. |
Michael Forster | db8101a | 2021-10-08 06:56:03 +0000 | [diff] [blame] | 77 | pub fn tokens_to_string(tokens: TokenStream) -> Result<String> { |
Michael Forster | 0de2b8c | 2021-10-11 08:28:49 +0000 | [diff] [blame] | 78 | let mut result = String::new(); |
Rosica Dejanovska | da9105d | 2022-03-30 09:38:16 -0700 | [diff] [blame] | 79 | tokens_to_string_impl(&mut result, tokens)?; |
| 80 | Ok(result) |
| 81 | } |
| 82 | |
| 83 | fn tokens_to_string_impl(result: &mut String, tokens: TokenStream) -> Result<()> { |
Michael Forster | 0de2b8c | 2021-10-11 08:28:49 +0000 | [diff] [blame] | 84 | let mut it = tokens.into_iter().peekable(); |
| 85 | while let Some(tt) = it.next() { |
Marcel Hlopko | 3164eee | 2021-08-24 20:09:22 +0000 | [diff] [blame] | 86 | match tt { |
| 87 | TokenTree::Ident(ref tt) if tt == "__NEWLINE__" => writeln!(result)?, |
Devin Jeanpierre | 2b4182b | 2022-04-19 08:23:50 -0700 | [diff] [blame] | 88 | TokenTree::Ident(ref tt) if tt == "__SPACE__" => write!(result, " ")?, |
Marcel Hlopko | 3164eee | 2021-08-24 20:09:22 +0000 | [diff] [blame] | 89 | TokenTree::Ident(ref tt) if tt == "__HASH_TOKEN__" => write!(result, "#")?, |
| 90 | |
Michael Forster | 523dbd4 | 2021-10-12 11:05:44 +0000 | [diff] [blame] | 91 | TokenTree::Ident(ref tt) if tt == "__COMMENT__" => { |
| 92 | if let Some(TokenTree::Literal(lit)) = it.next() { |
| 93 | writeln!( |
| 94 | result, |
| 95 | "// {}", |
| 96 | lit.to_string().trim_matches('"').replace("\\n", "\n// ") |
| 97 | )?; |
| 98 | } else { |
| 99 | bail!("__COMMENT__ must be followed by a literal") |
| 100 | } |
| 101 | } |
Rosica Dejanovska | da9105d | 2022-03-30 09:38:16 -0700 | [diff] [blame] | 102 | TokenTree::Group(ref tt) => { |
| 103 | let (open_delimiter, closed_delimiter) = match tt.delimiter() { |
| 104 | Delimiter::Parenthesis => ("(", ")"), |
| 105 | Delimiter::Bracket => ("[", "]"), |
| 106 | Delimiter::Brace => ("{ ", " }"), |
| 107 | Delimiter::None => ("", ""), |
| 108 | }; |
| 109 | write!(result, "{}", open_delimiter)?; |
| 110 | tokens_to_string_impl(result, tt.stream())?; |
| 111 | write!(result, "{}", closed_delimiter)?; |
| 112 | } |
Michael Forster | 0de2b8c | 2021-10-11 08:28:49 +0000 | [diff] [blame] | 113 | _ => { |
| 114 | write!(result, "{}", tt)?; |
Googler | 42d540f | 2021-09-29 06:37:23 +0000 | [diff] [blame] | 115 | |
Michael Forster | 0de2b8c | 2021-10-11 08:28:49 +0000 | [diff] [blame] | 116 | // Insert spaces between tokens only when they are needed to separate |
| 117 | // identifiers or literals from each other. |
| 118 | if is_ident_or_literal(&tt) |
| 119 | && matches!(it.peek(), Some(tt_next) if is_ident_or_literal(tt_next)) |
| 120 | { |
| 121 | write!(result, " ")?; |
| 122 | } |
| 123 | } |
| 124 | } |
Marcel Hlopko | 3164eee | 2021-08-24 20:09:22 +0000 | [diff] [blame] | 125 | } |
Rosica Dejanovska | da9105d | 2022-03-30 09:38:16 -0700 | [diff] [blame] | 126 | Ok(()) |
Marcel Hlopko | 3164eee | 2021-08-24 20:09:22 +0000 | [diff] [blame] | 127 | } |
| 128 | |
Marcel Hlopko | 65d05f0 | 2021-12-09 12:29:24 +0000 | [diff] [blame] | 129 | fn is_ident_or_literal(tt: &TokenTree) -> bool { |
Devin Jeanpierre | 326134a | 2022-04-19 08:19:05 -0700 | [diff] [blame] | 130 | match tt { |
Devin Jeanpierre | 2b4182b | 2022-04-19 08:23:50 -0700 | [diff] [blame] | 131 | TokenTree::Ident(id) => id != "__NEWLINE__" && id != "__SPACE__", |
| 132 | TokenTree::Literal(_) => true, |
Devin Jeanpierre | 326134a | 2022-04-19 08:19:05 -0700 | [diff] [blame] | 133 | _ => false, |
| 134 | } |
Marcel Hlopko | 65d05f0 | 2021-12-09 12:29:24 +0000 | [diff] [blame] | 135 | } |
| 136 | |
Lukasz Anforowicz | 54ff318 | 2022-05-06 07:17:58 -0700 | [diff] [blame] | 137 | fn rustfmt(input: String, config: &RustfmtConfig) -> Result<String> { |
| 138 | // TODO(b/230021743): Avoid hardcoding the path to `rustfmt`. Either: |
| 139 | // - Long-term: TODO(b/231320237): This should use rustfmt as a library as soon |
| 140 | // as b/200503084 is fixed. |
| 141 | // - Short-term: Add a way to specify `rustfmt_exe_path` as a command line |
| 142 | // parameter. Or just return `input` if the executable is not found at the |
| 143 | // given path. |
| 144 | let rustfmt_exe_path: &OsStr = |
| 145 | OsStr::new("third_party/unsupported_toolchains/rust/toolchains/nightly/bin/rustfmt"); |
Marcel Hlopko | 65d05f0 | 2021-12-09 12:29:24 +0000 | [diff] [blame] | 146 | |
Lukasz Anforowicz | 54ff318 | 2022-05-06 07:17:58 -0700 | [diff] [blame] | 147 | let mut child = Command::new(rustfmt_exe_path) |
| 148 | .args(config.args.iter()) |
Marcel Hlopko | 65d05f0 | 2021-12-09 12:29:24 +0000 | [diff] [blame] | 149 | .stdin(Stdio::piped()) |
| 150 | .stdout(Stdio::piped()) |
Lukasz Anforowicz | 97928ef | 2022-02-09 14:29:17 +0000 | [diff] [blame] | 151 | .stderr(Stdio::piped()) |
Marcel Hlopko | 65d05f0 | 2021-12-09 12:29:24 +0000 | [diff] [blame] | 152 | .spawn() |
Lukasz Anforowicz | 54ff318 | 2022-05-06 07:17:58 -0700 | [diff] [blame] | 153 | .unwrap_or_else(|_| panic!("Failed to spawn rustfmt at {:?}", rustfmt_exe_path)); |
Marcel Hlopko | 65d05f0 | 2021-12-09 12:29:24 +0000 | [diff] [blame] | 154 | |
| 155 | let mut stdin = child.stdin.take().expect("Failed to open rustfmt stdin"); |
| 156 | std::thread::spawn(move || { |
| 157 | stdin.write_all(input.as_bytes()).expect("Failed to write to rustfmt stdin"); |
| 158 | }); |
| 159 | let output = child.wait_with_output().expect("Failed to read rustfmt stdout"); |
| 160 | |
| 161 | if !output.status.success() { |
Lukasz Anforowicz | 97928ef | 2022-02-09 14:29:17 +0000 | [diff] [blame] | 162 | bail!("rustfmt reported an error: {}", String::from_utf8_lossy(&output.stderr)); |
Marcel Hlopko | 65d05f0 | 2021-12-09 12:29:24 +0000 | [diff] [blame] | 163 | } |
| 164 | |
Marcel Hlopko | ca84ff4 | 2021-12-09 14:15:14 +0000 | [diff] [blame] | 165 | Ok(String::from_utf8_lossy(&output.stdout).to_string()) |
Marcel Hlopko | 65d05f0 | 2021-12-09 12:29:24 +0000 | [diff] [blame] | 166 | } |
| 167 | |
Marcel Hlopko | 3164eee | 2021-08-24 20:09:22 +0000 | [diff] [blame] | 168 | #[cfg(test)] |
| 169 | mod tests { |
| 170 | use super::*; |
| 171 | |
| 172 | use super::Result; |
| 173 | use quote::quote; |
| 174 | |
| 175 | #[test] |
| 176 | fn test_simple_token_stream() -> Result<()> { |
| 177 | let token_stream = quote! { |
| 178 | struct Foo {} |
| 179 | |
| 180 | impl Bar for Foo { |
| 181 | fn bar(&self) {} |
| 182 | } |
| 183 | }; |
Googler | 42d540f | 2021-09-29 06:37:23 +0000 | [diff] [blame] | 184 | assert_eq!( |
Lukasz Anforowicz | dd9ae0f | 2022-02-17 15:52:53 +0000 | [diff] [blame] | 185 | tokens_to_string(token_stream)?, |
Rosica Dejanovska | da9105d | 2022-03-30 09:38:16 -0700 | [diff] [blame] | 186 | "struct Foo{ }impl Bar for Foo{ fn bar(&self){ } }" |
Googler | 42d540f | 2021-09-29 06:37:23 +0000 | [diff] [blame] | 187 | ); |
| 188 | Ok(()) |
| 189 | } |
| 190 | |
| 191 | #[test] |
| 192 | fn test_space_idents_and_literals() -> Result<()> { |
| 193 | let token_stream = quote! { foo 42 bar 23 }; |
Lukasz Anforowicz | dd9ae0f | 2022-02-17 15:52:53 +0000 | [diff] [blame] | 194 | assert_eq!(tokens_to_string(token_stream)?, "foo 42 bar 23"); |
Googler | 42d540f | 2021-09-29 06:37:23 +0000 | [diff] [blame] | 195 | Ok(()) |
| 196 | } |
| 197 | |
| 198 | #[test] |
| 199 | fn test_dont_space_punctuation() -> Result<()> { |
| 200 | let token_stream = quote! { foo+42+bar+23 }; |
Lukasz Anforowicz | dd9ae0f | 2022-02-17 15:52:53 +0000 | [diff] [blame] | 201 | assert_eq!(tokens_to_string(token_stream)?, "foo+42+bar+23"); |
Marcel Hlopko | 3164eee | 2021-08-24 20:09:22 +0000 | [diff] [blame] | 202 | Ok(()) |
| 203 | } |
| 204 | |
| 205 | #[test] |
| 206 | fn test_newline_token() -> Result<()> { |
| 207 | let token_stream = quote! { a __NEWLINE__ b }; |
Devin Jeanpierre | 326134a | 2022-04-19 08:19:05 -0700 | [diff] [blame] | 208 | assert_eq!(tokens_to_string(token_stream)?, "a\nb"); |
Marcel Hlopko | 3164eee | 2021-08-24 20:09:22 +0000 | [diff] [blame] | 209 | Ok(()) |
| 210 | } |
| 211 | |
| 212 | #[test] |
Devin Jeanpierre | 2b4182b | 2022-04-19 08:23:50 -0700 | [diff] [blame] | 213 | fn test_space_token() -> Result<()> { |
| 214 | let token_stream = quote! { a __SPACE__ = __SPACE__ b }; |
| 215 | assert_eq!(tokens_to_string(token_stream)?, "a = b"); |
| 216 | Ok(()) |
| 217 | } |
| 218 | |
| 219 | #[test] |
| 220 | fn test_redundant_space_token() -> Result<()> { |
| 221 | let token_stream = quote! { a __SPACE__ b }; |
| 222 | assert_eq!(tokens_to_string(token_stream)?, "a b"); |
| 223 | Ok(()) |
| 224 | } |
| 225 | |
| 226 | #[test] |
Marcel Hlopko | 3164eee | 2021-08-24 20:09:22 +0000 | [diff] [blame] | 227 | fn test_hash_token() -> Result<()> { |
| 228 | let token_stream = quote! { a __HASH_TOKEN__ b }; |
Lukasz Anforowicz | dd9ae0f | 2022-02-17 15:52:53 +0000 | [diff] [blame] | 229 | assert_eq!(tokens_to_string(token_stream)?, "a #b"); |
Marcel Hlopko | 3164eee | 2021-08-24 20:09:22 +0000 | [diff] [blame] | 230 | Ok(()) |
| 231 | } |
Googler | 42d540f | 2021-09-29 06:37:23 +0000 | [diff] [blame] | 232 | |
| 233 | #[test] |
| 234 | fn test_include_standard_header() -> Result<()> { |
| 235 | let token_stream = quote! { __HASH_TOKEN__ include <cstddef> }; |
Lukasz Anforowicz | dd9ae0f | 2022-02-17 15:52:53 +0000 | [diff] [blame] | 236 | assert_eq!(tokens_to_string(token_stream)?, "#include<cstddef>"); |
Googler | 42d540f | 2021-09-29 06:37:23 +0000 | [diff] [blame] | 237 | Ok(()) |
| 238 | } |
Michael Forster | 523dbd4 | 2021-10-12 11:05:44 +0000 | [diff] [blame] | 239 | |
| 240 | #[test] |
| 241 | fn test_comments() -> Result<()> { |
| 242 | let token_stream = quote! { __COMMENT__ "line1\nline2" }; |
Lukasz Anforowicz | dd9ae0f | 2022-02-17 15:52:53 +0000 | [diff] [blame] | 243 | assert_eq!(tokens_to_string(token_stream)?, "// line1\n// line2\n"); |
Michael Forster | 523dbd4 | 2021-10-12 11:05:44 +0000 | [diff] [blame] | 244 | Ok(()) |
| 245 | } |
| 246 | |
| 247 | #[test] |
| 248 | fn test_invalid_comment() -> Result<()> { |
| 249 | assert!(tokens_to_string(quote! { __COMMENT__ }).is_err()); |
| 250 | assert!(tokens_to_string(quote! { __COMMENT__ ident }).is_err()); |
| 251 | Ok(()) |
| 252 | } |
Marcel Hlopko | 8954775 | 2021-12-10 09:39:41 +0000 | [diff] [blame] | 253 | |
| 254 | #[test] |
| 255 | fn test_doc_comment() -> Result<()> { |
| 256 | // token_stream_printer (and rustfmt) don't put a space between /// and the doc |
| 257 | // comment, if the space is desired, it has to appear in the annotation. |
| 258 | assert_eq!( |
Lukasz Anforowicz | 54ff318 | 2022-05-06 07:17:58 -0700 | [diff] [blame] | 259 | rs_tokens_to_formatted_string_for_tests(quote! { #[doc = "hello"] struct X {} })?, |
Marcel Hlopko | 8954775 | 2021-12-10 09:39:41 +0000 | [diff] [blame] | 260 | "///hello\nstruct X {}\n" |
| 261 | ); |
| 262 | assert_eq!( |
Lukasz Anforowicz | 54ff318 | 2022-05-06 07:17:58 -0700 | [diff] [blame] | 263 | rs_tokens_to_formatted_string_for_tests(quote! { #[doc = "hello\nworld"] struct X {} })?, |
Marcel Hlopko | 8954775 | 2021-12-10 09:39:41 +0000 | [diff] [blame] | 264 | "///hello\n///world\nstruct X {}\n" |
| 265 | ); |
| 266 | Ok(()) |
| 267 | } |
| 268 | |
| 269 | #[test] |
| 270 | fn test_doc_comment_leading_spaces() -> Result<()> { |
| 271 | assert_eq!( |
Lukasz Anforowicz | 54ff318 | 2022-05-06 07:17:58 -0700 | [diff] [blame] | 272 | rs_tokens_to_formatted_string_for_tests(quote! { #[doc = " hello"] struct X {} })?, |
Marcel Hlopko | 8954775 | 2021-12-10 09:39:41 +0000 | [diff] [blame] | 273 | "/// hello\nstruct X {}\n" |
| 274 | ); |
| 275 | assert_eq!( |
Lukasz Anforowicz | 54ff318 | 2022-05-06 07:17:58 -0700 | [diff] [blame] | 276 | rs_tokens_to_formatted_string_for_tests(quote! { #[doc = " hello\n world"] struct X {} })?, |
Marcel Hlopko | 8954775 | 2021-12-10 09:39:41 +0000 | [diff] [blame] | 277 | "/// hello\n/// world\nstruct X {}\n" |
| 278 | ); |
| 279 | Ok(()) |
| 280 | } |
Rosica Dejanovska | da9105d | 2022-03-30 09:38:16 -0700 | [diff] [blame] | 281 | |
| 282 | #[test] |
| 283 | fn test_special_tokens_in_groups() -> Result<()> { |
Devin Jeanpierre | 326134a | 2022-04-19 08:19:05 -0700 | [diff] [blame] | 284 | assert_eq!(tokens_to_string(quote! {{ a __NEWLINE__ b }})?, "{ a\nb }"); |
Devin Jeanpierre | 2b4182b | 2022-04-19 08:23:50 -0700 | [diff] [blame] | 285 | assert_eq!(tokens_to_string(quote! {{ a __SPACE__ b }})?, "{ a b }"); |
Rosica Dejanovska | da9105d | 2022-03-30 09:38:16 -0700 | [diff] [blame] | 286 | assert_eq!(tokens_to_string(quote! {(a __COMMENT__ "b")})?, "(a // b\n)"); |
| 287 | assert_eq!(tokens_to_string(quote! {[__HASH_TOKEN__ a]})?, "[#a]"); |
| 288 | Ok(()) |
| 289 | } |
Marcel Hlopko | 3164eee | 2021-08-24 20:09:22 +0000 | [diff] [blame] | 290 | } |