Retain struct doc comments from C++ headers.

We should also retain doc comments for functions and fields. That will come in
follow-up CLs.

PiperOrigin-RevId: 400961334
diff --git a/rs_bindings_from_cc/ast_visitor.cc b/rs_bindings_from_cc/ast_visitor.cc
index 009c2a5..726e734 100644
--- a/rs_bindings_from_cc/ast_visitor.cc
+++ b/rs_bindings_from_cc/ast_visitor.cc
@@ -17,8 +17,10 @@
 #include "third_party/llvm/llvm-project/clang/include/clang/AST/Decl.h"
 #include "third_party/llvm/llvm-project/clang/include/clang/AST/DeclCXX.h"
 #include "third_party/llvm/llvm-project/clang/include/clang/AST/Mangle.h"
+#include "third_party/llvm/llvm-project/clang/include/clang/AST/RawCommentList.h"
 #include "third_party/llvm/llvm-project/clang/include/clang/AST/RecordLayout.h"
 #include "third_party/llvm/llvm-project/clang/include/clang/AST/Type.h"
+#include "third_party/llvm/llvm-project/clang/include/clang/Basic/SourceManager.h"
 #include "third_party/llvm/llvm-project/clang/include/clang/Basic/Specifiers.h"
 #include "third_party/llvm/llvm-project/llvm/include/llvm/Support/Casting.h"
 
@@ -100,6 +102,15 @@
 }
 
 bool AstVisitor::VisitRecordDecl(clang::RecordDecl* record_decl) {
+  clang::ASTContext& ctx = record_decl->getASTContext();
+
+  clang::SourceManager& sm = ctx.getSourceManager();
+  clang::RawComment* raw_comment = ctx.getRawCommentForDeclNoCache(record_decl);
+  std::optional<std::string> doc_comment;
+  if (raw_comment != nullptr) {
+    doc_comment = raw_comment->getFormattedText(sm, sm.getDiagnostics());
+  }
+
   std::vector<Field> fields;
   clang::AccessSpecifier default_access = clang::AS_public;
   // The definition is always rewritten, but default access to `kPublic` in case
@@ -169,10 +180,9 @@
       }
     }
   }
-  const clang::ASTRecordLayout& layout =
-      record_decl->getASTContext().getASTRecordLayout(record_decl);
+  const clang::ASTRecordLayout& layout = ctx.getASTRecordLayout(record_decl);
   for (const clang::FieldDecl* field_decl : record_decl->fields()) {
-    auto type = ConvertType(field_decl->getType(), field_decl->getASTContext());
+    auto type = ConvertType(field_decl->getType(), ctx);
     if (!type.ok()) {
       // TODO(b/200239975):  Add diagnostics for declarations we can't import
       return true;
@@ -198,6 +208,7 @@
   }
   ir_.items.push_back(
       Record{.identifier = *record_name,
+             .doc_comment = doc_comment,
              .fields = std::move(fields),
              .size = layout.getSize().getQuantity(),
              .alignment = layout.getAlignment().getQuantity(),
diff --git a/rs_bindings_from_cc/ast_visitor_test.cc b/rs_bindings_from_cc/ast_visitor_test.cc
index 3c521cb..b92afd7 100644
--- a/rs_bindings_from_cc/ast_visitor_test.cc
+++ b/rs_bindings_from_cc/ast_visitor_test.cc
@@ -35,6 +35,15 @@
   return false;
 }
 
+// Matches an IR node that has the given doc comment.
+MATCHER_P(DocCommentIs, doc_comment, "") {
+  if (arg.doc_comment && *arg.doc_comment == doc_comment) return true;
+
+  *result_listener << "actual doc comment: '"
+                   << (arg.doc_comment ? *arg.doc_comment : "<none>") << "'";
+  return false;
+}
+
 // Matches a Func that has the given mangled name.
 MATCHER_P(MangledNameIs, mangled_name, "") {
   if (arg.mangled_name == mangled_name) return true;
@@ -584,5 +593,59 @@
           FieldType(IsSimpleType("f64", "double"))))));
 }
 
+TEST(AstVisitorTest, DocComment) {
+  IR ir = IrFromCc({R"(
+    /// Doc comment
+    ///
+    ///  * with three slashes
+    struct DocCommentSlashes {};
+
+    //! Doc comment
+    //!
+    //!  * with slashes and bang
+    struct DocCommentBang {};
+
+    /** Multiline comment
+
+         * with two stars */
+    struct MultilineCommentTwoStars {};
+
+    // Line comment
+    //
+    //  * with two slashes
+    struct LineComment {};
+
+    /* Multiline comment
+
+        * with one star */
+    struct MultilineOneStar {};
+    )"},
+                   {});
+
+  EXPECT_THAT(
+      ir.items,
+      ElementsAre(VariantWith<Record>(AllOf(
+                      IdentifierIs("DocCommentSlashes"),
+                      DocCommentIs("Doc comment\n\n * with three slashes"))),
+                  VariantWith<Record>(AllOf(
+                      IdentifierIs("DocCommentBang"),
+                      DocCommentIs("Doc comment\n\n * with slashes and bang"))),
+                  // TODO(forster): The bullet point is not retained in this
+                  // case. Instead we get the space at the end. Not sure if this
+                  // can be fixed easily...
+                  VariantWith<Record>(AllOf(
+                      IdentifierIs("MultilineCommentTwoStars"),
+                      DocCommentIs("Multiline comment\n\n with two stars "))),
+                  VariantWith<Record>(AllOf(
+                      IdentifierIs("LineComment"),
+                      DocCommentIs("Line comment\n\n * with two slashes"))),
+                  // TODO(forster): The bullet point is not retained in this
+                  // case. Instead we get the space at the end. Not sure if this
+                  // can be fixed easily...
+                  VariantWith<Record>(AllOf(
+                      IdentifierIs("MultilineOneStar"),
+                      DocCommentIs("Multiline comment\n\n with one star ")))));
+}
+
 }  // namespace
 }  // namespace rs_bindings_from_cc
diff --git a/rs_bindings_from_cc/ir.cc b/rs_bindings_from_cc/ir.cc
index 6145dec..5bdaf4f 100644
--- a/rs_bindings_from_cc/ir.cc
+++ b/rs_bindings_from_cc/ir.cc
@@ -143,6 +143,9 @@
 
   nlohmann::json record;
   record["identifier"] = identifier.ToJson();
+  if (doc_comment) {
+    record["doc_comment"] = *doc_comment;
+  }
   record["fields"] = std::move(json_fields);
   record["size"] = size;
   record["alignment"] = alignment;
diff --git a/rs_bindings_from_cc/ir.h b/rs_bindings_from_cc/ir.h
index 848bc3c..59ea7f4 100644
--- a/rs_bindings_from_cc/ir.h
+++ b/rs_bindings_from_cc/ir.h
@@ -234,6 +234,7 @@
   nlohmann::json ToJson() const;
 
   Identifier identifier;
+  std::optional<std::string> doc_comment;
   std::vector<Field> fields;
   // Size and alignment in bytes.
   int64_t size;
diff --git a/rs_bindings_from_cc/ir.rs b/rs_bindings_from_cc/ir.rs
index 41ac700..19523e6 100644
--- a/rs_bindings_from_cc/ir.rs
+++ b/rs_bindings_from_cc/ir.rs
@@ -89,6 +89,7 @@
 #[derive(Debug, PartialEq, Eq, Hash, Clone, Deserialize)]
 pub struct Record {
     pub identifier: Identifier,
+    pub doc_comment: Option<String>,
     pub fields: Vec<Field>,
     pub size: usize,
     pub alignment: usize,
@@ -263,6 +264,7 @@
         let expected = IR {
             items: vec![Item::Record(Record {
                 identifier: Identifier { identifier: "SomeStruct".to_string() },
+                doc_comment: None,
                 fields: vec![
                     Field {
                         identifier: Identifier { identifier: "public_int".to_string() },
@@ -370,6 +372,7 @@
         let expected = IR {
             items: vec![Item::Record(Record {
                 identifier: Identifier { identifier: "SomeStruct".to_string() },
+                doc_comment: None,
                 fields: vec![Field {
                     identifier: Identifier { identifier: "ptr".to_string() },
                     type_: MappedType {
diff --git a/rs_bindings_from_cc/ir_from_cc.cc b/rs_bindings_from_cc/ir_from_cc.cc
index 951dd3a..b32e8e4 100644
--- a/rs_bindings_from_cc/ir_from_cc.cc
+++ b/rs_bindings_from_cc/ir_from_cc.cc
@@ -35,6 +35,9 @@
 
   std::vector<std::string> args_as_strings(args.begin(), args.end());
   args_as_strings.push_back("--syntax-only");
+  // Needed, so that we can copy over non-doc comments that are used as
+  // documention.
+  args_as_strings.push_back("-fparse-all-comments");
   args_as_strings.push_back(std::string(kVirtualInputPath));
 
   IR ir;
diff --git a/rs_bindings_from_cc/ir_testing.rs b/rs_bindings_from_cc/ir_testing.rs
index af66922..2885ea2 100644
--- a/rs_bindings_from_cc/ir_testing.rs
+++ b/rs_bindings_from_cc/ir_testing.rs
@@ -2,7 +2,10 @@
 // Exceptions. See /LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-use ir::{self, CcType, Func, FuncParam, Identifier, Item, MappedType, Record, RsType, IR};
+use ir::{
+    self, CcType, Func, FuncParam, Identifier, Item, MappedType, Record, RsType, SpecialMemberFunc,
+    IR,
+};
 
 /// Creates an identifier
 pub fn ir_id(name: &str) -> Identifier {
@@ -33,20 +36,24 @@
     })
 }
 
-/// Creates a simple `Item::Record` with a given name.
-pub fn ir_record(name: &str) -> Item {
-    let public_trivial_special = ir::SpecialMemberFunc {
+pub fn ir_public_trivial_special() -> SpecialMemberFunc {
+    SpecialMemberFunc {
         definition: ir::SpecialMemberDefinition::Trivial,
         access: ir::AccessSpecifier::Public,
-    };
+    }
+}
+
+/// Creates a simple `Item::Record` with a given name.
+pub fn ir_record(name: &str) -> Item {
     Item::Record(Record {
         identifier: ir_id(name),
+        doc_comment: None,
         alignment: 0,
         size: 0,
         fields: vec![],
-        copy_constructor: public_trivial_special.clone(),
-        move_constructor: public_trivial_special.clone(),
-        destructor: public_trivial_special.clone(),
+        copy_constructor: ir_public_trivial_special(),
+        move_constructor: ir_public_trivial_special(),
+        destructor: ir_public_trivial_special(),
         is_trivial_abi: true,
     })
 }
diff --git a/rs_bindings_from_cc/rs_bindings_from_cc.cc b/rs_bindings_from_cc/rs_bindings_from_cc.cc
index bf325bb..1dc316e 100644
--- a/rs_bindings_from_cc/rs_bindings_from_cc.cc
+++ b/rs_bindings_from_cc/rs_bindings_from_cc.cc
@@ -50,6 +50,9 @@
       << "please specify at least one header in --public_headers";
 
   std::vector<std::string> command_line(argv, argv + argc);
+  // Needed, so that we can copy over non-doc comments that are used as
+  // documention.
+  command_line.push_back("-fparse-all-comments");
   command_line.push_back(std::string(kVirtualInputPath));
 
   std::string virtual_input_file_content;
diff --git a/rs_bindings_from_cc/src_code_gen.rs b/rs_bindings_from_cc/src_code_gen.rs
index ebd1658..df0879d 100644
--- a/rs_bindings_from_cc/src_code_gen.rs
+++ b/rs_bindings_from_cc/src_code_gen.rs
@@ -123,6 +123,13 @@
 /// Generates Rust source code for a given `Record`.
 fn generate_record(record: &Record) -> Result<TokenStream> {
     let ident = make_ident(&record.identifier.identifier);
+    let doc_comment = match &record.doc_comment {
+        Some(text) => {
+            let doc = format!(" {}", text.replace("\n", "\n "));
+            quote! {#[doc=#doc]}
+        }
+        None => quote! {},
+    };
     let field_idents =
         record.fields.iter().map(|f| make_ident(&f.identifier.identifier)).collect_vec();
     let field_types = record
@@ -153,6 +160,7 @@
             }
         });
     Ok(quote! {
+        #doc_comment
         #[repr(C)]
         pub struct #ident {
             #( #field_accesses #field_idents: #field_types, )*
@@ -229,7 +237,10 @@
 
     let mut child = Command::new(rustfmt)
         // TODO(forster): Add a way to specify this as a command line parameter.
-        .args(&["--config-path", "external/rustfmt/rustfmt.toml"])
+        .args(&[
+            "--config-path=external/rustfmt/rustfmt.toml",
+            "--config=normalize_doc_attributes=true",
+        ])
         .stdin(Stdio::piped())
         .stdout(Stdio::piped())
         .spawn()
@@ -390,7 +401,9 @@
     use super::{generate_rs_api, generate_rs_api_impl};
     use anyhow::anyhow;
     use ir::*;
-    use ir_testing::{ir_func, ir_id, ir_int, ir_int_param, ir_items, ir_record};
+    use ir_testing::{
+        ir_func, ir_id, ir_int, ir_int_param, ir_items, ir_public_trivial_special, ir_record,
+    };
     use quote::quote;
     use token_stream_printer::cc_tokens_to_string;
 
@@ -478,6 +491,7 @@
     fn test_simple_struct() -> Result<()> {
         let ir = ir_items(vec![Item::Record(Record {
             identifier: ir_id("SomeStruct"),
+            doc_comment: None,
             fields: vec![
                 Field {
                     identifier: ir_id("public_int"),
@@ -500,20 +514,12 @@
             ],
             size: 12,
             alignment: 4,
-            move_constructor: SpecialMemberFunc {
-                definition: SpecialMemberDefinition::Trivial,
-                access: AccessSpecifier::Public,
-            },
-            copy_constructor: SpecialMemberFunc {
-                definition: SpecialMemberDefinition::Trivial,
-                access: AccessSpecifier::Public,
-            },
-            destructor: SpecialMemberFunc {
-                definition: SpecialMemberDefinition::Trivial,
-                access: AccessSpecifier::Public,
-            },
+            copy_constructor: ir_public_trivial_special(),
+            move_constructor: ir_public_trivial_special(),
+            destructor: ir_public_trivial_special(),
             is_trivial_abi: true,
         })]);
+
         assert_eq!(
             generate_rs_api(&ir)?,
             rustfmt(
@@ -660,4 +666,28 @@
 
         Ok(())
     }
+
+    #[test]
+    fn test_doc_comment() -> Result<()> {
+        let ir = IR {
+            used_headers: vec![],
+            items: vec![Item::Record(Record {
+                identifier: ir_id("SomeStruct"),
+                doc_comment: Some("Doc Comment\n\n * with bullet".to_string()),
+                alignment: 0,
+                size: 0,
+                fields: vec![],
+                copy_constructor: ir_public_trivial_special(),
+                move_constructor: ir_public_trivial_special(),
+                destructor: ir_public_trivial_special(),
+                is_trivial_abi: true,
+            })],
+        };
+
+        generate_rs_api(&ir)?
+            .find("/// Doc Comment\n///\n///  * with bullet\n")
+            .expect("doc comment missing");
+
+        Ok(())
+    }
 }
diff --git a/rs_bindings_from_cc/test/golden/doc_comment.h b/rs_bindings_from_cc/test/golden/doc_comment.h
new file mode 100644
index 0000000..eac8552
--- /dev/null
+++ b/rs_bindings_from_cc/test/golden/doc_comment.h
@@ -0,0 +1,43 @@
+// Part of the Crubit project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CRUBIT_RS_BINDINGS_FROM_CC_TEST_GOLDEN_DOC_COMMENT_H_
+#define CRUBIT_RS_BINDINGS_FROM_CC_TEST_GOLDEN_DOC_COMMENT_H_
+
+/// Doc comment
+///
+///  * with three slashes
+struct DocCommentSlashes {
+  int i;
+};
+
+//! Doc comment
+//!
+//!  * with slashes and bang
+struct DocCommentBang {
+  int i;
+};
+
+/** Multiline comment
+
+     * with two stars */
+struct MultilineCommentTwoStars {
+  int i;
+};
+
+// Line comment
+//
+//  * with two slashes
+struct LineComment {
+  int i;
+};
+
+/* Multiline comment
+
+    * with one star */
+struct MultilineOneStar {
+  int i;
+};
+
+#endif  // CRUBIT_RS_BINDINGS_FROM_CC_TEST_GOLDEN_DOC_COMMENT_H_
diff --git a/rs_bindings_from_cc/test/golden/doc_comment_rs_api.rs b/rs_bindings_from_cc/test/golden/doc_comment_rs_api.rs
new file mode 100644
index 0000000..58d8f39
--- /dev/null
+++ b/rs_bindings_from_cc/test/golden/doc_comment_rs_api.rs
@@ -0,0 +1,57 @@
+#![feature(const_ptr_offset_from, const_maybe_uninit_as_ptr, const_raw_ptr_deref)]
+// Part of the Crubit project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+use memoffset_unstable_const::offset_of;
+use static_assertions::const_assert_eq;
+/// Doc comment
+///
+///  * with three slashes
+#[repr(C)]
+pub struct DocCommentSlashes {
+    pub i: i32,
+}
+const_assert_eq!(std::mem::size_of::<DocCommentSlashes>(), 4usize);
+const_assert_eq!(std::mem::align_of::<DocCommentSlashes>(), 4usize);
+const_assert_eq!(offset_of!(DocCommentSlashes, i) * 8, 0usize);
+/// Doc comment
+///
+///  * with slashes and bang
+#[repr(C)]
+pub struct DocCommentBang {
+    pub i: i32,
+}
+const_assert_eq!(std::mem::size_of::<DocCommentBang>(), 4usize);
+const_assert_eq!(std::mem::align_of::<DocCommentBang>(), 4usize);
+const_assert_eq!(offset_of!(DocCommentBang, i) * 8, 0usize);
+/// Multiline comment
+///
+///  with two stars
+#[repr(C)]
+pub struct MultilineCommentTwoStars {
+    pub i: i32,
+}
+const_assert_eq!(std::mem::size_of::<MultilineCommentTwoStars>(), 4usize);
+const_assert_eq!(std::mem::align_of::<MultilineCommentTwoStars>(), 4usize);
+const_assert_eq!(offset_of!(MultilineCommentTwoStars, i) * 8, 0usize);
+/// Line comment
+///
+///  * with two slashes
+#[repr(C)]
+pub struct LineComment {
+    pub i: i32,
+}
+const_assert_eq!(std::mem::size_of::<LineComment>(), 4usize);
+const_assert_eq!(std::mem::align_of::<LineComment>(), 4usize);
+const_assert_eq!(offset_of!(LineComment, i) * 8, 0usize);
+/// Multiline comment
+///
+///  with one star
+#[repr(C)]
+pub struct MultilineOneStar {
+    pub i: i32,
+}
+const_assert_eq!(std::mem::size_of::<MultilineOneStar>(), 4usize);
+const_assert_eq!(std::mem::align_of::<MultilineOneStar>(), 4usize);
+const_assert_eq!(offset_of!(MultilineOneStar, i) * 8, 0usize);
diff --git a/rs_bindings_from_cc/test/golden/doc_comment_rs_api_impl.cc b/rs_bindings_from_cc/test/golden/doc_comment_rs_api_impl.cc
new file mode 100644
index 0000000..695b9ba
--- /dev/null
+++ b/rs_bindings_from_cc/test/golden/doc_comment_rs_api_impl.cc
@@ -0,0 +1,21 @@
+// Part of the Crubit project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <cstddef>
+#include "rs_bindings_from_cc/test/golden/doc_comment.h"
+static_assert(sizeof(DocCommentSlashes) == 4);
+static_assert(alignof(DocCommentSlashes) == 4);
+static_assert(offsetof(DocCommentSlashes, i) * 8 == 0);
+static_assert(sizeof(DocCommentBang) == 4);
+static_assert(alignof(DocCommentBang) == 4);
+static_assert(offsetof(DocCommentBang, i) * 8 == 0);
+static_assert(sizeof(MultilineCommentTwoStars) == 4);
+static_assert(alignof(MultilineCommentTwoStars) == 4);
+static_assert(offsetof(MultilineCommentTwoStars, i) * 8 == 0);
+static_assert(sizeof(LineComment) == 4);
+static_assert(alignof(LineComment) == 4);
+static_assert(offsetof(LineComment, i) * 8 == 0);
+static_assert(sizeof(MultilineOneStar) == 4);
+static_assert(alignof(MultilineOneStar) == 4);
+static_assert(offsetof(MultilineOneStar, i) * 8 == 0);
\ No newline at end of file