Skip to content

Commit 33c2550

Browse files
committed
Ruby: add minimal path transformer support
Supports only a minimal subset of the project layout specification; enough to work with the transformers produced by the CLI when building an overlay database.
1 parent 2ba417f commit 33c2550

File tree

6 files changed

+137
-29
lines changed

6 files changed

+137
-29
lines changed

ruby/extractor/src/extractor.rs

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use clap::Args;
2+
use codeql_extractor::file_paths::PathTransformer;
23
use lazy_static::lazy_static;
34
use rayon::prelude::*;
45
use serde_json;
@@ -81,6 +82,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
8182
let file_list = fs::File::open(file_paths::path_from_string(&options.file_list))?;
8283

8384
let overlay_changed_files: Option<HashSet<PathBuf>> = get_overlay_changed_files();
85+
let path_transformer = file_paths::load_path_transformer()?;
8486

8587
let language: Language = tree_sitter_ruby::LANGUAGE.into();
8688
let erb: Language = tree_sitter_embedded_template::LANGUAGE.into();
@@ -105,7 +107,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
105107
}
106108
_ => {},
107109
}
108-
let src_archive_file = file_paths::path_for(&src_archive_dir, &path, "");
110+
let src_archive_file = file_paths::path_for(&src_archive_dir, &path, "", path_transformer.as_ref());
109111
let mut source = std::fs::read(&path)?;
110112
let mut needs_conversion = false;
111113
let code_ranges;
@@ -118,6 +120,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
118120
&erb_schema,
119121
&mut diagnostics_writer,
120122
&mut trap_writer,
123+
path_transformer.as_ref(),
121124
&path,
122125
&source,
123126
&[],
@@ -162,7 +165,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
162165
"character-decoding-error",
163166
"Character decoding error",
164167
)
165-
.file(&file_paths::normalize_path(&path))
168+
.file(&file_paths::normalize_and_transform_path(&path, path_transformer.as_ref()))
166169
.message(
167170
"Could not decode the file contents as {}: {}. The contents of the file must match the character encoding specified in the {} {}.",
168171
&[
@@ -182,7 +185,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
182185
diagnostics_writer.write(
183186
diagnostics_writer
184187
.new_entry("unknown-character-encoding", "Could not process some files due to an unknown character encoding")
185-
.file(&file_paths::normalize_path(&path))
188+
.file(&file_paths::normalize_and_transform_path(&path, path_transformer.as_ref()))
186189
.message(
187190
"Unknown character encoding {} in {} {}.",
188191
&[
@@ -205,6 +208,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
205208
&schema,
206209
&mut diagnostics_writer,
207210
&mut trap_writer,
211+
path_transformer.as_ref(),
208212
&path,
209213
&source,
210214
&code_ranges,
@@ -215,14 +219,20 @@ pub fn run(options: Options) -> std::io::Result<()> {
215219
} else {
216220
std::fs::copy(&path, &src_archive_file)?;
217221
}
218-
write_trap(&trap_dir, path, &trap_writer, trap_compression)
222+
write_trap(&trap_dir, path, &trap_writer, trap_compression, path_transformer.as_ref())
219223
})
220224
.expect("failed to extract files");
221225

222226
let path = PathBuf::from("extras");
223227
let mut trap_writer = trap::Writer::new();
224228
extractor::populate_empty_location(&mut trap_writer);
225-
let res = write_trap(&trap_dir, path, &trap_writer, trap_compression);
229+
let res = write_trap(
230+
&trap_dir,
231+
path,
232+
&trap_writer,
233+
trap_compression,
234+
path_transformer.as_ref(),
235+
);
226236
if let Ok(output_path) = std::env::var("CODEQL_EXTRACTOR_RUBY_OVERLAY_BASE_METADATA_OUT") {
227237
// We're extracting an overlay base. For now, we don't have any metadata we need to store
228238
// that would get read when extracting the overlay, but the CLI expects us to write
@@ -254,8 +264,14 @@ fn write_trap(
254264
path: PathBuf,
255265
trap_writer: &trap::Writer,
256266
trap_compression: trap::Compression,
267+
path_transformer: Option<&PathTransformer>,
257268
) -> std::io::Result<()> {
258-
let trap_file = file_paths::path_for(trap_dir, &path, trap_compression.extension());
269+
let trap_file = file_paths::path_for(
270+
trap_dir,
271+
&path,
272+
trap_compression.extension(),
273+
path_transformer,
274+
);
259275
std::fs::create_dir_all(trap_file.parent().unwrap())?;
260276
trap_writer.write_to_file(&trap_file, trap_compression)
261277
}

rust/extractor/src/archive.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ impl Archiver {
1515
}
1616

1717
fn try_archive(&self, source: &Path) -> std::io::Result<()> {
18-
let dest = file_paths::path_for(&self.root, source, "");
18+
let dest = file_paths::path_for(&self.root, source, "", None);
1919
if fs::metadata(&dest).is_ok() {
2020
return Ok(());
2121
}

rust/extractor/src/trap.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ impl TrapFile {
212212
);
213213
}
214214
pub fn emit_file(&mut self, absolute_path: &Path) -> Label<generated::File> {
215-
let untyped = extractor::populate_file(&mut self.writer, absolute_path);
215+
let untyped = extractor::populate_file(&mut self.writer, absolute_path, None);
216216
// SAFETY: populate_file emits `@file` typed labels
217217
unsafe { Label::from_untyped(untyped) }
218218
}
@@ -268,6 +268,7 @@ impl TrapFileProvider {
268268
&self.trap_dir.join(category),
269269
key.as_ref(),
270270
self.compression.extension(),
271+
None,
271272
);
272273
debug!("creating trap file {}", path.display());
273274
let mut writer = trap::Writer::new();

shared/tree-sitter-extractor/src/extractor/mod.rs

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -67,19 +67,26 @@ pub fn default_subscriber_with_level(
6767
),
6868
)
6969
}
70-
pub fn populate_file(writer: &mut trap::Writer, absolute_path: &Path) -> trap::Label {
70+
pub fn populate_file(
71+
writer: &mut trap::Writer,
72+
absolute_path: &Path,
73+
transformer: Option<&file_paths::PathTransformer>,
74+
) -> trap::Label {
7175
let (file_label, fresh) = writer.global_id(&trap::full_id_for_file(
72-
&file_paths::normalize_path(absolute_path),
76+
&file_paths::normalize_and_transform_path(absolute_path, transformer),
7377
));
7478
if fresh {
7579
writer.add_tuple(
7680
"files",
7781
vec![
7882
trap::Arg::Label(file_label),
79-
trap::Arg::String(file_paths::normalize_path(absolute_path)),
83+
trap::Arg::String(file_paths::normalize_and_transform_path(
84+
absolute_path,
85+
transformer,
86+
)),
8087
],
8188
);
82-
populate_parent_folders(writer, file_label, absolute_path.parent());
89+
populate_parent_folders(writer, file_label, absolute_path.parent(), transformer);
8390
}
8491
file_label
8592
}
@@ -117,16 +124,17 @@ pub fn populate_parent_folders(
117124
writer: &mut trap::Writer,
118125
child_label: trap::Label,
119126
path: Option<&Path>,
127+
transformer: Option<&file_paths::PathTransformer>,
120128
) {
121129
let mut path = path;
122130
let mut child_label = child_label;
123131
loop {
124132
match path {
125133
None => break,
126134
Some(folder) => {
127-
let (folder_label, fresh) = writer.global_id(&trap::full_id_for_folder(
128-
&file_paths::normalize_path(folder),
129-
));
135+
let parent = folder.parent();
136+
let folder = file_paths::normalize_and_transform_path(folder, transformer);
137+
let (folder_label, fresh) = writer.global_id(&trap::full_id_for_folder(&folder));
130138
writer.add_tuple(
131139
"containerparent",
132140
vec![
@@ -137,12 +145,9 @@ pub fn populate_parent_folders(
137145
if fresh {
138146
writer.add_tuple(
139147
"folders",
140-
vec![
141-
trap::Arg::Label(folder_label),
142-
trap::Arg::String(file_paths::normalize_path(folder)),
143-
],
148+
vec![trap::Arg::Label(folder_label), trap::Arg::String(folder)],
144149
);
145-
path = folder.parent();
150+
path = parent;
146151
child_label = folder_label;
147152
} else {
148153
break;
@@ -205,11 +210,12 @@ pub fn extract(
205210
schema: &NodeTypeMap,
206211
diagnostics_writer: &mut diagnostics::LogWriter,
207212
trap_writer: &mut trap::Writer,
213+
transformer: Option<&file_paths::PathTransformer>,
208214
path: &Path,
209215
source: &[u8],
210216
ranges: &[Range],
211217
) {
212-
let path_str = file_paths::normalize_path(path);
218+
let path_str = file_paths::normalize_and_transform_path(path, transformer);
213219
let span = tracing::span!(
214220
tracing::Level::TRACE,
215221
"extract",
@@ -225,7 +231,7 @@ pub fn extract(
225231
parser.set_included_ranges(ranges).unwrap();
226232
let tree = parser.parse(source, None).expect("Failed to parse file");
227233
trap_writer.comment(format!("Auto-generated TRAP file for {}", path_str));
228-
let file_label = populate_file(trap_writer, path);
234+
let file_label = populate_file(trap_writer, path, transformer);
229235
let mut visitor = Visitor::new(
230236
source,
231237
diagnostics_writer,

shared/tree-sitter-extractor/src/extractor/simple.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ impl Extractor {
123123
let mut diagnostics_writer = diagnostics.logger();
124124
let path = PathBuf::from(line).canonicalize()?;
125125
let src_archive_file =
126-
crate::file_paths::path_for(&self.source_archive_dir, &path, "");
126+
crate::file_paths::path_for(&self.source_archive_dir, &path, "", None);
127127
let source = std::fs::read(&path)?;
128128
let mut trap_writer = trap::Writer::new();
129129

@@ -152,6 +152,7 @@ impl Extractor {
152152
&schemas[i],
153153
&mut diagnostics_writer,
154154
&mut trap_writer,
155+
None,
155156
&path,
156157
&source,
157158
&[],
@@ -183,7 +184,7 @@ fn write_trap(
183184
trap_writer: &trap::Writer,
184185
trap_compression: trap::Compression,
185186
) -> std::io::Result<()> {
186-
let trap_file = crate::file_paths::path_for(trap_dir, path, trap_compression.extension());
187+
let trap_file = crate::file_paths::path_for(trap_dir, path, trap_compression.extension(), None);
187188
std::fs::create_dir_all(trap_file.parent().unwrap())?;
188189
trap_writer.write_to_file(&trap_file, trap_compression)
189190
}

shared/tree-sitter-extractor/src/file_paths.rs

Lines changed: 89 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,81 @@
1-
use std::path::{Path, PathBuf};
1+
use std::{
2+
fs,
3+
path::{Path, PathBuf},
4+
};
25

3-
/// Normalizes the path according the common CodeQL specification. Assumes that
4-
/// `path` has already been canonicalized using `std::fs::canonicalize`.
5-
pub fn normalize_path(path: &Path) -> String {
6+
/// This represents the minimum supported path transofmration that is needed to support extracting
7+
/// overlay databases. Specifically, it represents a transformer where one path prefix is replaced
8+
/// with a different prefix.
9+
pub struct PathTransformer {
10+
pub original: String,
11+
pub replacement: String,
12+
}
13+
14+
/// Normalizes the path according to the common CodeQL specification, and, applies the given path
15+
/// transformer, if any. Assumes that `path` has already been canonicalized using
16+
/// `std::fs::canonicalize`.
17+
pub fn normalize_and_transform_path(path: &Path, transformer: Option<&PathTransformer>) -> String {
18+
let path = normalize_path(path);
19+
match transformer {
20+
Some(transformer) => match path.strip_prefix(&transformer.original) {
21+
Some(suffix) => format!("{}{}", transformer.replacement, suffix),
22+
None => path,
23+
},
24+
None => path,
25+
}
26+
}
27+
28+
/**
29+
* Attempts to load a path transformer.
30+
*
31+
* If the `CODEQL_PATH_TRANSFORMER` environment variable is not set, no transformer has been
32+
* specified and the function returns `Ok(None)`.
33+
*
34+
* If the environment variable is set, the function attempts to load the transformer from the file
35+
* at the * specified path. If this is successful, it returns `Ok(Some(PathTransformer))`.
36+
*
37+
* If the file cannot be read, or if it does not match the minimal subset of the path-transformer
38+
* syntax supported by this extractor, the function returns an error.
39+
*/
40+
pub fn load_path_transformer() -> std::io::Result<Option<PathTransformer>> {
41+
let path = match std::env::var("CODEQL_PATH_TRANSFORMER") {
42+
Ok(p) => p,
43+
Err(_) => return Ok(None),
44+
};
45+
let file_content = fs::read_to_string(path)?;
46+
let lines = file_content
47+
.lines()
48+
.map(|line| line.trim().to_owned())
49+
.filter(|line| !line.is_empty())
50+
.collect::<Vec<String>>();
51+
52+
if lines.len() != 2 {
53+
return Err(unsupported_transformer_error());
54+
}
55+
let replacement = lines[0]
56+
.strip_prefix('#')
57+
.ok_or(unsupported_transformer_error())?;
58+
let original = lines[1]
59+
.strip_suffix("//")
60+
.ok_or(unsupported_transformer_error())?;
61+
62+
Ok(Some(PathTransformer {
63+
original: original.to_owned(),
64+
replacement: replacement.to_owned(),
65+
}))
66+
}
67+
68+
fn unsupported_transformer_error() -> std::io::Error {
69+
std::io::Error::new(
70+
std::io::ErrorKind::InvalidData,
71+
"This extractor only supports path transformers specifying a single path-prefix rewrite, \
72+
with the first line starting with a # and the second line ending with //.",
73+
)
74+
}
75+
76+
/// Normalizes the path according to the common CodeQL specification. Assumes that `path` has
77+
/// already been canonicalized using `std::fs::canonicalize`.
78+
fn normalize_path(path: &Path) -> String {
679
if cfg!(windows) {
780
// The way Rust canonicalizes paths doesn't match the CodeQL spec, so we
881
// have to do a bit of work removing certain prefixes and replacing
@@ -93,7 +166,18 @@ pub fn path_from_string(path: &str) -> PathBuf {
93166
result
94167
}
95168

96-
pub fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf {
169+
pub fn path_for(
170+
dir: &Path,
171+
path: &Path,
172+
ext: &str,
173+
transformer: Option<&PathTransformer>,
174+
) -> PathBuf {
175+
let path = if transformer.is_some() {
176+
let transformed = normalize_and_transform_path(path, transformer);
177+
PathBuf::from(transformed)
178+
} else {
179+
path.to_path_buf()
180+
};
97181
let mut result = PathBuf::from(dir);
98182
for component in path.components() {
99183
match component {

0 commit comments

Comments
 (0)