blob: 71d6502280a131363764109e5e501a25026ad3ef [file] [log] [blame]
Marcel Hlopko3164eee2021-08-24 20:09:22 +00001// Part of the Crubit project, under the Apache License v2.0 with LLVM
2// Exceptions. See /LICENSE for license information.
3// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
Marcel Hlopkoca84ff42021-12-09 14:15:14 +00005use anyhow::{bail, Result};
Rosica Dejanovskada9105d2022-03-30 09:38:16 -07006use proc_macro2::{Delimiter, TokenStream, TokenTree};
Lukasz Anforowicz54ff3182022-05-06 07:17:58 -07007use std::ffi::{OsStr, OsString};
Marcel Hlopko65d05f02021-12-09 12:29:24 +00008use std::fmt::Write as _;
9use std::io::Write as _;
10use std::process::{Command, Stdio};
Marcel Hlopko3164eee2021-08-24 20:09:22 +000011
Lukasz Anforowiczd7d68f02022-05-26 07:41:02 -070012// TODO(b/231320237): The `RustfmtConfig` struct should be replaced with
Lukasz Anforowicz54ff3182022-05-06 07:17:58 -070013// `rustfmt_nightly::Config` once we switch to using rustfmt as a library
14// (instead of invoking the `rustfmt` executable).
15pub struct RustfmtConfig {
Lukasz Anforowiczd7d68f02022-05-26 07:41:02 -070016 /// Path to the `rustfmt` executable.
17 exe_path: OsString,
18
19 /// Cmdline arguments to be passed to the `rustfmt` executable.
20 cmdline_args: Vec<OsString>,
Lukasz Anforowicz54ff3182022-05-06 07:17:58 -070021}
22
23impl RustfmtConfig {
Lukasz Anforowiczd7d68f02022-05-26 07:41:02 -070024 /// Creates a config that will invoke `rustfmt` at the given
25 /// `rustfmt_exe_path`. If `rustfmt_config_path` is a non-empty string,
26 /// then a `rustfmt.toml` file at that path will be used to configure
27 /// the formatting details; otherwise a default formatting will be used.
28 pub fn new(rustfmt_exe_path: &OsStr, rustfmt_config_path: &OsStr) -> Self {
29 Self {
30 exe_path: rustfmt_exe_path.into(),
31 cmdline_args: if rustfmt_config_path.is_empty() {
32 Self::default_cmdline_args()
33 } else {
34 Self::cmdline_args_with_custom_config_path(rustfmt_config_path)
35 },
36 }
37 }
38
39 fn for_testing() -> Self {
40 Self {
41 exe_path: "third_party/unsupported_toolchains/rust/toolchains/nightly/bin/rustfmt"
42 .into(),
43 cmdline_args: Self::default_cmdline_args(),
44 }
45 }
46
47 fn cmdline_args_with_custom_config_path(rustfmt_config_path: &OsStr) -> Vec<OsString> {
Lukasz Anforowicz54ff3182022-05-06 07:17:58 -070048 let mut config_path_arg: OsString = "--config-path=".into();
49 config_path_arg.push(rustfmt_config_path);
Lukasz Anforowiczd7d68f02022-05-26 07:41:02 -070050 Self::append_config_overrides(vec![config_path_arg])
Lukasz Anforowicz54ff3182022-05-06 07:17:58 -070051 }
52
Lukasz Anforowiczd7d68f02022-05-26 07:41:02 -070053 fn default_cmdline_args() -> Vec<OsString> {
54 Self::append_config_overrides(vec!["--edition=2021".into(), "--config=version=Two".into()])
Lukasz Anforowicz54ff3182022-05-06 07:17:58 -070055 }
56
Lukasz Anforowiczd7d68f02022-05-26 07:41:02 -070057 fn append_config_overrides(mut cmdline_args: Vec<OsString>) -> Vec<OsString> {
58 cmdline_args.extend(vec![
Lukasz Anforowicz54ff3182022-05-06 07:17:58 -070059 // We are representing doc comments as attributes in the token stream and use rustfmt
60 // to unpack them again.
61 "--config=normalize_doc_attributes=true".into(),
62 // We don't want rustfmt to reflow C++ doc comments, so we turn off wrapping globally
63 // and reflow generated comments manually.
64 "--config=wrap_comments=false".into(),
65 ]);
Lukasz Anforowiczd7d68f02022-05-26 07:41:02 -070066 cmdline_args
Lukasz Anforowicz54ff3182022-05-06 07:17:58 -070067 }
68}
69
Marcel Hlopko65d05f02021-12-09 12:29:24 +000070/// Like `tokens_to_string` but also runs the result through rustfmt.
Lukasz Anforowicz54ff3182022-05-06 07:17:58 -070071pub fn rs_tokens_to_formatted_string(
72 tokens: TokenStream,
73 config: &RustfmtConfig,
74) -> Result<String> {
75 rustfmt(tokens_to_string(tokens)?, config)
76}
77
78/// Like `rs_tokens_to_formatted_string`, but always using a Crubit-internal,
79/// default rustfmt config. This should only be called by tests - product code
80/// should support custom `rustfmt.toml`.
81pub fn rs_tokens_to_formatted_string_for_tests(input: TokenStream) -> Result<String> {
Lukasz Anforowiczd7d68f02022-05-26 07:41:02 -070082 rs_tokens_to_formatted_string(input, &RustfmtConfig::for_testing())
Googler42d540f2021-09-29 06:37:23 +000083}
84
Marcel Hlopko65d05f02021-12-09 12:29:24 +000085/// Produces source code out of the token stream.
Marcel Hlopko3164eee2021-08-24 20:09:22 +000086///
87/// Notable features:
Michael Forsterbee84482021-10-13 08:35:38 +000088/// * quote! cannot produce a single `#` token (that is not immediately followed
89/// by `(`, `[`, `{`, or variable interpolation). For cases when we need `#`
90/// to be produced in the C++ source code use the placeholder
91/// `__HASH_TOKEN__`.
92/// * The Rust tokenizer ignores newlines as they are not significant for Rust.
93/// For C++ they are (for example there needs to be a newline after `#include
94/// "foo/bar.h"`). We are also using explict newlines for making the generated
95/// Rust/C++ source code more readable. Use the placeholder `__NEWLINE__` to
96/// insert a newline character.
Devin Jeanpierre2b4182b2022-04-19 08:23:50 -070097/// * `TokenStream` cannot encode formatting whitespace, so we use the
98/// placeholder `__SPACE__`.
Michael Forsterbee84482021-10-13 08:35:38 +000099/// * `TokenStream` cannot encode comments, so we use the placeholder
100/// `__COMMENT__`, followed by a string literal.
Michael Forsterdb8101a2021-10-08 06:56:03 +0000101pub fn tokens_to_string(tokens: TokenStream) -> Result<String> {
Michael Forster0de2b8c2021-10-11 08:28:49 +0000102 let mut result = String::new();
Rosica Dejanovskada9105d2022-03-30 09:38:16 -0700103 tokens_to_string_impl(&mut result, tokens)?;
104 Ok(result)
105}
106
107fn tokens_to_string_impl(result: &mut String, tokens: TokenStream) -> Result<()> {
Michael Forster0de2b8c2021-10-11 08:28:49 +0000108 let mut it = tokens.into_iter().peekable();
109 while let Some(tt) = it.next() {
Marcel Hlopko3164eee2021-08-24 20:09:22 +0000110 match tt {
111 TokenTree::Ident(ref tt) if tt == "__NEWLINE__" => writeln!(result)?,
Devin Jeanpierre2b4182b2022-04-19 08:23:50 -0700112 TokenTree::Ident(ref tt) if tt == "__SPACE__" => write!(result, " ")?,
Marcel Hlopko3164eee2021-08-24 20:09:22 +0000113 TokenTree::Ident(ref tt) if tt == "__HASH_TOKEN__" => write!(result, "#")?,
114
Michael Forster523dbd42021-10-12 11:05:44 +0000115 TokenTree::Ident(ref tt) if tt == "__COMMENT__" => {
116 if let Some(TokenTree::Literal(lit)) = it.next() {
117 writeln!(
118 result,
119 "// {}",
120 lit.to_string().trim_matches('"').replace("\\n", "\n// ")
121 )?;
122 } else {
123 bail!("__COMMENT__ must be followed by a literal")
124 }
125 }
Rosica Dejanovskada9105d2022-03-30 09:38:16 -0700126 TokenTree::Group(ref tt) => {
127 let (open_delimiter, closed_delimiter) = match tt.delimiter() {
128 Delimiter::Parenthesis => ("(", ")"),
129 Delimiter::Bracket => ("[", "]"),
130 Delimiter::Brace => ("{ ", " }"),
131 Delimiter::None => ("", ""),
132 };
133 write!(result, "{}", open_delimiter)?;
134 tokens_to_string_impl(result, tt.stream())?;
135 write!(result, "{}", closed_delimiter)?;
136 }
Michael Forster0de2b8c2021-10-11 08:28:49 +0000137 _ => {
138 write!(result, "{}", tt)?;
Googler42d540f2021-09-29 06:37:23 +0000139
Michael Forster0de2b8c2021-10-11 08:28:49 +0000140 // Insert spaces between tokens only when they are needed to separate
141 // identifiers or literals from each other.
142 if is_ident_or_literal(&tt)
143 && matches!(it.peek(), Some(tt_next) if is_ident_or_literal(tt_next))
144 {
145 write!(result, " ")?;
146 }
147 }
148 }
Marcel Hlopko3164eee2021-08-24 20:09:22 +0000149 }
Rosica Dejanovskada9105d2022-03-30 09:38:16 -0700150 Ok(())
Marcel Hlopko3164eee2021-08-24 20:09:22 +0000151}
152
Marcel Hlopko65d05f02021-12-09 12:29:24 +0000153fn is_ident_or_literal(tt: &TokenTree) -> bool {
Devin Jeanpierre326134a2022-04-19 08:19:05 -0700154 match tt {
Devin Jeanpierre2b4182b2022-04-19 08:23:50 -0700155 TokenTree::Ident(id) => id != "__NEWLINE__" && id != "__SPACE__",
156 TokenTree::Literal(_) => true,
Devin Jeanpierre326134a2022-04-19 08:19:05 -0700157 _ => false,
158 }
Marcel Hlopko65d05f02021-12-09 12:29:24 +0000159}
160
Lukasz Anforowicz54ff3182022-05-06 07:17:58 -0700161fn rustfmt(input: String, config: &RustfmtConfig) -> Result<String> {
Lukasz Anforowiczd7d68f02022-05-26 07:41:02 -0700162 let mut child = Command::new(&config.exe_path)
163 .args(config.cmdline_args.iter())
Marcel Hlopko65d05f02021-12-09 12:29:24 +0000164 .stdin(Stdio::piped())
165 .stdout(Stdio::piped())
Lukasz Anforowicz97928ef2022-02-09 14:29:17 +0000166 .stderr(Stdio::piped())
Marcel Hlopko65d05f02021-12-09 12:29:24 +0000167 .spawn()
Lukasz Anforowiczd7d68f02022-05-26 07:41:02 -0700168 .unwrap_or_else(|_| panic!("Failed to spawn rustfmt at {:?}", config.exe_path));
Marcel Hlopko65d05f02021-12-09 12:29:24 +0000169
170 let mut stdin = child.stdin.take().expect("Failed to open rustfmt stdin");
171 std::thread::spawn(move || {
172 stdin.write_all(input.as_bytes()).expect("Failed to write to rustfmt stdin");
173 });
174 let output = child.wait_with_output().expect("Failed to read rustfmt stdout");
175
176 if !output.status.success() {
Lukasz Anforowicz97928ef2022-02-09 14:29:17 +0000177 bail!("rustfmt reported an error: {}", String::from_utf8_lossy(&output.stderr));
Marcel Hlopko65d05f02021-12-09 12:29:24 +0000178 }
179
Marcel Hlopkoca84ff42021-12-09 14:15:14 +0000180 Ok(String::from_utf8_lossy(&output.stdout).to_string())
Marcel Hlopko65d05f02021-12-09 12:29:24 +0000181}
182
Marcel Hlopko3164eee2021-08-24 20:09:22 +0000183#[cfg(test)]
184mod tests {
185 use super::*;
186
187 use super::Result;
188 use quote::quote;
189
190 #[test]
191 fn test_simple_token_stream() -> Result<()> {
192 let token_stream = quote! {
193 struct Foo {}
194
195 impl Bar for Foo {
196 fn bar(&self) {}
197 }
198 };
Googler42d540f2021-09-29 06:37:23 +0000199 assert_eq!(
Lukasz Anforowiczdd9ae0f2022-02-17 15:52:53 +0000200 tokens_to_string(token_stream)?,
Rosica Dejanovskada9105d2022-03-30 09:38:16 -0700201 "struct Foo{ }impl Bar for Foo{ fn bar(&self){ } }"
Googler42d540f2021-09-29 06:37:23 +0000202 );
203 Ok(())
204 }
205
206 #[test]
207 fn test_space_idents_and_literals() -> Result<()> {
208 let token_stream = quote! { foo 42 bar 23 };
Lukasz Anforowiczdd9ae0f2022-02-17 15:52:53 +0000209 assert_eq!(tokens_to_string(token_stream)?, "foo 42 bar 23");
Googler42d540f2021-09-29 06:37:23 +0000210 Ok(())
211 }
212
213 #[test]
214 fn test_dont_space_punctuation() -> Result<()> {
215 let token_stream = quote! { foo+42+bar+23 };
Lukasz Anforowiczdd9ae0f2022-02-17 15:52:53 +0000216 assert_eq!(tokens_to_string(token_stream)?, "foo+42+bar+23");
Marcel Hlopko3164eee2021-08-24 20:09:22 +0000217 Ok(())
218 }
219
220 #[test]
221 fn test_newline_token() -> Result<()> {
222 let token_stream = quote! { a __NEWLINE__ b };
Devin Jeanpierre326134a2022-04-19 08:19:05 -0700223 assert_eq!(tokens_to_string(token_stream)?, "a\nb");
Marcel Hlopko3164eee2021-08-24 20:09:22 +0000224 Ok(())
225 }
226
227 #[test]
Devin Jeanpierre2b4182b2022-04-19 08:23:50 -0700228 fn test_space_token() -> Result<()> {
229 let token_stream = quote! { a __SPACE__ = __SPACE__ b };
230 assert_eq!(tokens_to_string(token_stream)?, "a = b");
231 Ok(())
232 }
233
234 #[test]
235 fn test_redundant_space_token() -> Result<()> {
236 let token_stream = quote! { a __SPACE__ b };
237 assert_eq!(tokens_to_string(token_stream)?, "a b");
238 Ok(())
239 }
240
241 #[test]
Marcel Hlopko3164eee2021-08-24 20:09:22 +0000242 fn test_hash_token() -> Result<()> {
243 let token_stream = quote! { a __HASH_TOKEN__ b };
Lukasz Anforowiczdd9ae0f2022-02-17 15:52:53 +0000244 assert_eq!(tokens_to_string(token_stream)?, "a #b");
Marcel Hlopko3164eee2021-08-24 20:09:22 +0000245 Ok(())
246 }
Googler42d540f2021-09-29 06:37:23 +0000247
248 #[test]
249 fn test_include_standard_header() -> Result<()> {
250 let token_stream = quote! { __HASH_TOKEN__ include <cstddef> };
Lukasz Anforowiczdd9ae0f2022-02-17 15:52:53 +0000251 assert_eq!(tokens_to_string(token_stream)?, "#include<cstddef>");
Googler42d540f2021-09-29 06:37:23 +0000252 Ok(())
253 }
Michael Forster523dbd42021-10-12 11:05:44 +0000254
255 #[test]
256 fn test_comments() -> Result<()> {
257 let token_stream = quote! { __COMMENT__ "line1\nline2" };
Lukasz Anforowiczdd9ae0f2022-02-17 15:52:53 +0000258 assert_eq!(tokens_to_string(token_stream)?, "// line1\n// line2\n");
Michael Forster523dbd42021-10-12 11:05:44 +0000259 Ok(())
260 }
261
262 #[test]
263 fn test_invalid_comment() -> Result<()> {
264 assert!(tokens_to_string(quote! { __COMMENT__ }).is_err());
265 assert!(tokens_to_string(quote! { __COMMENT__ ident }).is_err());
266 Ok(())
267 }
Marcel Hlopko89547752021-12-10 09:39:41 +0000268
269 #[test]
270 fn test_doc_comment() -> Result<()> {
271 // token_stream_printer (and rustfmt) don't put a space between /// and the doc
272 // comment, if the space is desired, it has to appear in the annotation.
273 assert_eq!(
Lukasz Anforowicz54ff3182022-05-06 07:17:58 -0700274 rs_tokens_to_formatted_string_for_tests(quote! { #[doc = "hello"] struct X {} })?,
Marcel Hlopko89547752021-12-10 09:39:41 +0000275 "///hello\nstruct X {}\n"
276 );
277 assert_eq!(
Lukasz Anforowicz54ff3182022-05-06 07:17:58 -0700278 rs_tokens_to_formatted_string_for_tests(quote! { #[doc = "hello\nworld"] struct X {} })?,
Marcel Hlopko89547752021-12-10 09:39:41 +0000279 "///hello\n///world\nstruct X {}\n"
280 );
281 Ok(())
282 }
283
284 #[test]
285 fn test_doc_comment_leading_spaces() -> Result<()> {
286 assert_eq!(
Lukasz Anforowicz54ff3182022-05-06 07:17:58 -0700287 rs_tokens_to_formatted_string_for_tests(quote! { #[doc = " hello"] struct X {} })?,
Marcel Hlopko89547752021-12-10 09:39:41 +0000288 "/// hello\nstruct X {}\n"
289 );
290 assert_eq!(
Lukasz Anforowicz54ff3182022-05-06 07:17:58 -0700291 rs_tokens_to_formatted_string_for_tests(quote! { #[doc = " hello\n world"] struct X {} })?,
Marcel Hlopko89547752021-12-10 09:39:41 +0000292 "/// hello\n/// world\nstruct X {}\n"
293 );
294 Ok(())
295 }
Rosica Dejanovskada9105d2022-03-30 09:38:16 -0700296
297 #[test]
298 fn test_special_tokens_in_groups() -> Result<()> {
Devin Jeanpierre326134a2022-04-19 08:19:05 -0700299 assert_eq!(tokens_to_string(quote! {{ a __NEWLINE__ b }})?, "{ a\nb }");
Devin Jeanpierre2b4182b2022-04-19 08:23:50 -0700300 assert_eq!(tokens_to_string(quote! {{ a __SPACE__ b }})?, "{ a b }");
Rosica Dejanovskada9105d2022-03-30 09:38:16 -0700301 assert_eq!(tokens_to_string(quote! {(a __COMMENT__ "b")})?, "(a // b\n)");
302 assert_eq!(tokens_to_string(quote! {[__HASH_TOKEN__ a]})?, "[#a]");
303 Ok(())
304 }
Marcel Hlopko3164eee2021-08-24 20:09:22 +0000305}