Skip to content

Commit ba0d6c9

Browse files
Update generate-copyright
This tool now scans for cargo dependencies and includes any important looking license files. We do this because cargo package metadata is not sufficient - the Apache-2.0 license says you have to include any NOTICE file, for example. And authors != copyright holders (cargo has the former, we must include the latter).
1 parent 93ea767 commit ba0d6c9

File tree

7 files changed

+320
-12
lines changed

7 files changed

+320
-12
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,6 +1408,8 @@ dependencies = [
14081408
"anyhow",
14091409
"serde",
14101410
"serde_json",
1411+
"tempfile",
1412+
"thiserror",
14111413
]
14121414

14131415
[[package]]

src/bootstrap/src/core/build_steps/run.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,8 @@ impl Step for GenerateCopyright {
217217
let mut cmd = builder.tool_cmd(Tool::GenerateCopyright);
218218
cmd.env("LICENSE_METADATA", &license_metadata);
219219
cmd.env("DEST", &dest);
220+
cmd.env("OUT_DIR", &builder.out);
221+
cmd.env("CARGO", &builder.initial_cargo);
220222
cmd.run(builder);
221223

222224
dest

src/tools/collect-license-metadata/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
name = "collect-license-metadata"
33
version = "0.1.0"
44
edition = "2021"
5+
description = "Runs the reuse tool and caches the output, so rust toolchain devs don't need to have reuse installed"
6+
license = "MIT OR Apache-2.0"
57

68
[dependencies]
79
anyhow = "1.0.65"

src/tools/collect-license-metadata/src/main.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ use anyhow::Error;
88

99
use crate::licenses::LicensesInterner;
1010

11+
/// The entry point to the binary.
12+
///
13+
/// You should probably let `bootstrap` execute this program instead of running it directly.
14+
///
15+
/// Run `x.py run collect-license-metadata`
1116
fn main() -> Result<(), Error> {
1217
let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into();
1318
let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into();

src/tools/generate-copyright/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22
name = "generate-copyright"
33
version = "0.1.0"
44
edition = "2021"
5+
description = "Produces a manifest of all the copyrighted materials in the Rust Toolchain"
56

67
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
78

89
[dependencies]
910
anyhow = "1.0.65"
1011
serde = { version = "1.0.147", features = ["derive"] }
1112
serde_json = "1.0.85"
13+
thiserror = "1"
14+
tempfile = "3"
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
//! Gets metadata about a workspace from Cargo
2+
3+
use std::collections::{BTreeMap, BTreeSet};
4+
use std::ffi::{OsStr, OsString};
5+
use std::path::Path;
6+
7+
/// Describes how this module can fail
8+
#[derive(Debug, thiserror::Error)]
9+
pub enum Error {
10+
#[error("Failed to run cargo metadata: {0:?}")]
11+
LaunchingMetadata(#[from] std::io::Error),
12+
#[error("Failed get output from cargo metadata: {0:?}")]
13+
GettingMetadata(String),
14+
#[error("Failed parse JSON output from cargo metadata: {0:?}")]
15+
ParsingJson(#[from] serde_json::Error),
16+
#[error("Failed find expected JSON element {0} in output from cargo metadata")]
17+
MissingJsonElement(&'static str),
18+
#[error("Failed find expected JSON element {0} in output from cargo metadata for package {1}")]
19+
MissingJsonElementForPackage(String, String),
20+
#[error("Failed to run cargo vendor: {0:?}")]
21+
LaunchingVendor(std::io::Error),
22+
#[error("Failed to complete cargo vendor")]
23+
RunningVendor,
24+
}
25+
26+
/// Describes one of our dependencies
27+
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
28+
pub struct Dependency {
29+
/// The name of the package
30+
pub name: String,
31+
/// The version number
32+
pub version: String,
33+
/// The license it is under
34+
pub license: String,
35+
/// The list of authors from the package metadata
36+
pub authors: Vec<String>,
37+
/// A list of important files from the package, with their contents.
38+
///
39+
/// This includes *COPYRIGHT*, *NOTICE*, *AUTHOR*, *LICENSE*, and *LICENCE* files, case-insensitive.
40+
pub notices: BTreeMap<OsString, String>,
41+
}
42+
43+
/// Use `cargo` to get a list of dependencies and their license data.
44+
///
45+
/// This will involve running `cargo vendor` into `${BUILD}/vendor` so we can
46+
/// grab the license files.
47+
///
48+
/// Any dependency with a path beginning with `root_path` is ignored, as we
49+
/// assume `reuse` has covered it already.
50+
pub fn get(
51+
cargo: &Path,
52+
dest: &Path,
53+
root_path: &Path,
54+
manifest_paths: &[&Path],
55+
) -> Result<BTreeSet<Dependency>, Error> {
56+
let mut temp_set = BTreeSet::new();
57+
// Look at the metadata for each manifest
58+
for manifest_path in manifest_paths {
59+
if manifest_path.file_name() != Some(OsStr::new("Cargo.toml")) {
60+
panic!("cargo_manifest::get requires a path to a Cargo.toml file");
61+
}
62+
let metadata_json = get_metadata_json(cargo, manifest_path)?;
63+
let packages = metadata_json["packages"]
64+
.as_array()
65+
.ok_or_else(|| Error::MissingJsonElement("packages array"))?;
66+
for package in packages {
67+
let package =
68+
package.as_object().ok_or_else(|| Error::MissingJsonElement("package object"))?;
69+
let manifest_path = package
70+
.get("manifest_path")
71+
.and_then(|v| v.as_str())
72+
.map(Path::new)
73+
.ok_or_else(|| Error::MissingJsonElement("package.manifest_path"))?;
74+
if manifest_path.starts_with(&root_path) {
75+
// it's an in-tree dependency and reuse covers it
76+
continue;
77+
}
78+
// otherwise it's an out-of-tree dependency
79+
let get_string = |field_name: &str, package_name: &str| {
80+
package.get(field_name).and_then(|v| v.as_str()).ok_or_else(|| {
81+
Error::MissingJsonElementForPackage(
82+
format!("package.{field_name}"),
83+
package_name.to_owned(),
84+
)
85+
})
86+
};
87+
let name = get_string("name", "unknown")?;
88+
let license = get_string("license", name)?;
89+
let version = get_string("version", name)?;
90+
let authors_list = package
91+
.get("authors")
92+
.and_then(|v| v.as_array())
93+
.ok_or_else(|| Error::MissingJsonElement("package.authors"))?;
94+
let authors: Vec<String> =
95+
authors_list.iter().filter_map(|v| v.as_str()).map(|s| s.to_owned()).collect();
96+
temp_set.insert(Dependency {
97+
name: name.to_owned(),
98+
version: version.to_owned(),
99+
license: license.to_owned(),
100+
authors,
101+
notices: BTreeMap::new(),
102+
});
103+
}
104+
}
105+
106+
// Now do a cargo-vendor and grab everything
107+
let vendor_path = dest.join("vendor");
108+
println!("Vendoring deps into {}...", vendor_path.display());
109+
run_cargo_vendor(cargo, &vendor_path, manifest_paths)?;
110+
111+
// Now for each dependency we found, go and grab any important looking files
112+
let mut output = BTreeSet::new();
113+
for mut dep in temp_set {
114+
load_important_files(&mut dep, &vendor_path)?;
115+
output.insert(dep);
116+
}
117+
118+
Ok(output)
119+
}
120+
121+
/// Get cargo-metdata for a package, as JSON
122+
fn get_metadata_json(cargo: &Path, manifest_path: &Path) -> Result<serde_json::Value, Error> {
123+
let metadata_output = std::process::Command::new(cargo)
124+
.arg("metadata")
125+
.arg("--format-version=1")
126+
.arg("--all-features")
127+
.arg("--manifest-path")
128+
.arg(manifest_path)
129+
.env("RUSTC_BOOTSTRAP", "1")
130+
.output()
131+
.map_err(|e| Error::LaunchingMetadata(e))?;
132+
if !metadata_output.status.success() {
133+
return Err(Error::GettingMetadata(
134+
String::from_utf8(metadata_output.stderr).expect("UTF-8 output from cargo"),
135+
));
136+
}
137+
let json = serde_json::from_slice(&metadata_output.stdout)?;
138+
Ok(json)
139+
}
140+
141+
/// Run cargo-vendor, fetching into the given dir
142+
fn run_cargo_vendor(cargo: &Path, dest: &Path, manifest_paths: &[&Path]) -> Result<(), Error> {
143+
let mut vendor_command = std::process::Command::new(cargo);
144+
vendor_command.env("RUSTC_BOOTSTRAP", "1");
145+
vendor_command.arg("vendor");
146+
vendor_command.arg("--quiet");
147+
vendor_command.arg("--versioned-dirs");
148+
for manifest_path in manifest_paths {
149+
vendor_command.arg("-s");
150+
vendor_command.arg(manifest_path);
151+
}
152+
vendor_command.arg(dest);
153+
154+
let vendor_status = vendor_command.status().map_err(|e| Error::LaunchingVendor(e))?;
155+
156+
if !vendor_status.success() {
157+
return Err(Error::RunningVendor);
158+
}
159+
160+
Ok(())
161+
}
162+
163+
/// Add important files off disk into this dependency.
164+
///
165+
/// Maybe one-day Cargo.toml will contain enough information that we don't need
166+
/// to do this manual scraping.
167+
fn load_important_files(dep: &mut Dependency, vendor_root: &Path) -> Result<(), Error> {
168+
let name_version = format!("{}-{}", dep.name, dep.version);
169+
println!("Scraping notices for {}...", name_version);
170+
let dep_vendor_path = vendor_root.join(name_version);
171+
for entry in std::fs::read_dir(dep_vendor_path)? {
172+
let entry = entry?;
173+
let metadata = entry.metadata()?;
174+
let path = entry.path();
175+
if let Some(filename) = path.file_name() {
176+
let lc_filename = filename.to_ascii_lowercase();
177+
let lc_filename_str = lc_filename.to_string_lossy();
178+
let mut keep = false;
179+
for m in ["copyright", "licence", "license", "author", "notice"] {
180+
if lc_filename_str.contains(m) {
181+
keep = true;
182+
break;
183+
}
184+
}
185+
if keep {
186+
if metadata.is_dir() {
187+
// scoop up whole directory
188+
} else if metadata.is_file() {
189+
println!("Scraping {}", filename.to_string_lossy());
190+
dep.notices.insert(filename.to_owned(), std::fs::read_to_string(path)?);
191+
}
192+
}
193+
}
194+
}
195+
Ok(())
196+
}

0 commit comments

Comments
 (0)