Skip to content

Commit 15645d0

Browse files
[IRSim] Adding basic implementation of llvm-sim.
This is a similarity visualization tool that accepts a Module and passes it to the IRSimilarityIdentifier. The resulting SimilarityGroups are output in a JSON file. Tests are found in test/tools/llvm-sim and check for the file not found, a bad module, and that the JSON is created correctly. Reviewers: paquette, jroelofs Differential Revision: https://reviews.llvm.org/D86974
1 parent 2a0ca17 commit 15645d0

File tree

8 files changed

+324
-0
lines changed

8 files changed

+324
-0
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
define void @similar_func1() {
2+
entry:
3+
%a = alloca i32, align 4
4+
%b = alloca i32, align 4
5+
%c = alloca i32, align 4
6+
store i32 2, i32* %a, align 4
7+
store i32 3, i32* %b, align 4
8+
store i32 4, i32* %c, align 4
9+
%al = load i32, i32* %a
10+
%bl = load i32, i32* %b
11+
%cl = load i32, i32* %c
12+
ret void
13+
}
14+
15+
define void @similar_func2() {
16+
entry:
17+
%a = alloca i32, align 4
18+
%b = alloca i32, align 4
19+
%c = alloca i32, align 4
20+
store i32 2, i32* %a, align 4
21+
store i32 3, i32* %b, align 4
22+
store i32 4, i32* %c, align 4
23+
%al = load i32, i32* %a
24+
%bl = load i32, i32* %b
25+
%cl = load i32, i32* %c
26+
ret void
27+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# RUN: not llvm-sim %s 2>&1 | FileCheck %s
2+
# RUN: not llvm-sim %s.2 2>&1 | FileCheck %s -check-prefix=EXIST
3+
4+
# File reading error messaging tests.
5+
6+
# CHECK: error: expected top-level entity
7+
8+
# EXIST: error: Could not open input file: No such file or directory
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# RUN: llvm-sim -o %t %S/Inputs/sim1.ll
2+
# RUN: cat %t | FileCheck %s
3+
4+
# Checking the output of a single module test.
5+
6+
# CHECK: {
7+
# CHECK-NEXT: "1": [
8+
# CHECK-NEXT: {
9+
# CHECK-NEXT: "start": 8,
10+
# CHECK-NEXT: "end": 9
11+
# CHECK-NEXT: },
12+
# CHECK-NEXT: {
13+
# CHECK-NEXT: "start": 18,
14+
# CHECK-NEXT: "end": 19
15+
# CHECK-NEXT: }
16+
# CHECK-NEXT: ],
17+
# CHECK-NEXT: "2": [
18+
# CHECK-NEXT: {
19+
# CHECK-NEXT: "start": 7,
20+
# CHECK-NEXT: "end": 9
21+
# CHECK-NEXT: },
22+
# CHECK-NEXT: {
23+
# CHECK-NEXT: "start": 17,
24+
# CHECK-NEXT: "end": 19
25+
# CHECK-NEXT: }
26+
# CHECK-NEXT: ],
27+
# CHECK-NEXT: "3": [
28+
# CHECK-NEXT: {
29+
# CHECK-NEXT: "start": 6,
30+
# CHECK-NEXT: "end": 9
31+
# CHECK-NEXT: },
32+
# CHECK-NEXT: {
33+
# CHECK-NEXT: "start": 16,
34+
# CHECK-NEXT: "end": 19
35+
# CHECK-NEXT: }
36+
# CHECK-NEXT: ],
37+
# CHECK-NEXT: "4": [
38+
# CHECK-NEXT: {
39+
# CHECK-NEXT: "start": 5,
40+
# CHECK-NEXT: "end": 9
41+
# CHECK-NEXT: },
42+
# CHECK-NEXT: {
43+
# CHECK-NEXT: "start": 15,
44+
# CHECK-NEXT: "end": 19
45+
# CHECK-NEXT: }
46+
# CHECK-NEXT: ],
47+
# CHECK-NEXT: "5": [
48+
# CHECK-NEXT: {
49+
# CHECK-NEXT: "start": 4,
50+
# CHECK-NEXT: "end": 9
51+
# CHECK-NEXT: },
52+
# CHECK-NEXT: {
53+
# CHECK-NEXT: "start": 14,
54+
# CHECK-NEXT: "end": 19
55+
# CHECK-NEXT: }
56+
# CHECK-NEXT: ]
57+
# CHECK-NEXT: }
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# RUN: llvm-sim -o - %S/Inputs/sim1.ll | FileCheck %s
2+
3+
# Checking the output of a single module test.
4+
5+
# CHECK: {
6+
# CHECK-NEXT: "1": [
7+
# CHECK-NEXT: {
8+
# CHECK-NEXT: "start": 8,
9+
# CHECK-NEXT: "end": 9
10+
# CHECK-NEXT: },
11+
# CHECK-NEXT: {
12+
# CHECK-NEXT: "start": 18,
13+
# CHECK-NEXT: "end": 19
14+
# CHECK-NEXT: }
15+
# CHECK-NEXT: ],
16+
# CHECK-NEXT: "2": [
17+
# CHECK-NEXT: {
18+
# CHECK-NEXT: "start": 7,
19+
# CHECK-NEXT: "end": 9
20+
# CHECK-NEXT: },
21+
# CHECK-NEXT: {
22+
# CHECK-NEXT: "start": 17,
23+
# CHECK-NEXT: "end": 19
24+
# CHECK-NEXT: }
25+
# CHECK-NEXT: ],
26+
# CHECK-NEXT: "3": [
27+
# CHECK-NEXT: {
28+
# CHECK-NEXT: "start": 6,
29+
# CHECK-NEXT: "end": 9
30+
# CHECK-NEXT: },
31+
# CHECK-NEXT: {
32+
# CHECK-NEXT: "start": 16,
33+
# CHECK-NEXT: "end": 19
34+
# CHECK-NEXT: }
35+
# CHECK-NEXT: ],
36+
# CHECK-NEXT: "4": [
37+
# CHECK-NEXT: {
38+
# CHECK-NEXT: "start": 5,
39+
# CHECK-NEXT: "end": 9
40+
# CHECK-NEXT: },
41+
# CHECK-NEXT: {
42+
# CHECK-NEXT: "start": 15,
43+
# CHECK-NEXT: "end": 19
44+
# CHECK-NEXT: }
45+
# CHECK-NEXT: ],
46+
# CHECK-NEXT: "5": [
47+
# CHECK-NEXT: {
48+
# CHECK-NEXT: "start": 4,
49+
# CHECK-NEXT: "end": 9
50+
# CHECK-NEXT: },
51+
# CHECK-NEXT: {
52+
# CHECK-NEXT: "start": 14,
53+
# CHECK-NEXT: "end": 19
54+
# CHECK-NEXT: }
55+
# CHECK-NEXT: ]
56+
# CHECK-NEXT: }

llvm/tools/LLVMBuild.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ subdirectories =
5151
llvm-rc
5252
llvm-reduce
5353
llvm-rtdyld
54+
llvm-sim
5455
llvm-size
5556
llvm-split
5657
llvm-undname

llvm/tools/llvm-sim/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
set(LLVM_LINK_COMPONENTS core support object coverage AsmParser IRReader TransformUtils ipo)
2+
3+
add_llvm_tool(llvm-sim
4+
llvm-sim.cpp
5+
)

llvm/tools/llvm-sim/LLVMBuild.txt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
;===- ./tools/llvm-sim/LLVMBuild.txt ---------------------------*- Conf -*--===;
2+
;
3+
; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
; See https://llvm.org/LICENSE.txt for license information.
5+
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
;
7+
;===------------------------------------------------------------------------===;
8+
;
9+
; This is an LLVMBuild description file for the components in this subdirectory.
10+
;
11+
; For more information on the LLVMBuild system, please see:
12+
;
13+
; http://llvm.org/docs/LLVMBuild.html
14+
;
15+
;===------------------------------------------------------------------------===;
16+
17+
[component_0]
18+
type = Tool
19+
name = llvm-sim
20+
parent = Tools
21+
required_libraries = Core Support AsmParser IRReader IPO

llvm/tools/llvm-sim/llvm-sim.cpp

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
//===-- llvm-sim.cpp - Find similar sections of programs -------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This program finds similar sections of a Module, and exports them as a JSON
10+
// file.
11+
//
12+
// To find similarities contained across multiple modules, please use llvm-link
13+
// first to merge the modules.
14+
//
15+
//===----------------------------------------------------------------------===//
16+
17+
#include "llvm/Analysis/IRSimilarityIdentifier.h"
18+
#include "llvm/IRReader/IRReader.h"
19+
#include "llvm/Support/CommandLine.h"
20+
#include "llvm/Support/FileSystem.h"
21+
#include "llvm/Support/InitLLVM.h"
22+
#include "llvm/Support/JSON.h"
23+
#include "llvm/Support/SourceMgr.h"
24+
#include "llvm/Support/ToolOutputFile.h"
25+
26+
using namespace llvm;
27+
using namespace IRSimilarity;
28+
29+
static cl::opt<std::string> OutputFilename("o", cl::desc("Output Filename"),
30+
cl::init("-"),
31+
cl::value_desc("filename"));
32+
33+
static cl::opt<std::string> InputSourceFile(cl::Positional,
34+
cl::desc("<Source file>"),
35+
cl::init("-"),
36+
cl::value_desc("filename"));
37+
38+
/// Retrieve the unique number \p I was mapped to in parseBitcodeFile.
39+
///
40+
/// \param I - The Instruction to find the instruction number for.
41+
/// \param LLVMInstNum - The mapping of Instructions to their location in the
42+
/// module represented by an unsigned integer.
43+
/// \returns The instruction number for \p I if it exists.
44+
Optional<unsigned>
45+
getPositionInModule(const Instruction *I,
46+
const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
47+
assert(I && "Instruction is nullptr!");
48+
DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(I);
49+
if (It == LLVMInstNum.end())
50+
return None;
51+
return It->second;
52+
}
53+
54+
/// Exports the given SimilarityGroups to a JSON file at \p FilePath.
55+
///
56+
/// \param FilePath - The path to the output location.
57+
/// \param SimSections - The similarity groups to process.
58+
/// \param LLVMInstNum - The mapping of Instructions to their location in the
59+
/// module represented by an unsigned integer.
60+
/// \returns A nonzero error code if there was a failure creating the file.
61+
std::error_code
62+
exportToFile(const StringRef FilePath,
63+
const SimilarityGroupList &SimSections,
64+
const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
65+
std::error_code EC;
66+
std::unique_ptr<ToolOutputFile> Out(
67+
new ToolOutputFile(FilePath, EC, sys::fs::OF_None));
68+
if (EC)
69+
return EC;
70+
71+
json::OStream J(Out->os(), 1);
72+
J.objectBegin();
73+
74+
unsigned SimOption = 1;
75+
// Process each list of SimilarityGroups organized by the Module.
76+
for (const SimilarityGroup &G : SimSections) {
77+
std::string SimOptionStr = std::to_string(SimOption);
78+
J.attributeBegin(SimOptionStr);
79+
J.arrayBegin();
80+
// For each file there is a list of the range where the similarity
81+
// exists.
82+
for (const IRSimilarityCandidate &C : G) {
83+
Optional<unsigned> Start =
84+
getPositionInModule((*C.front()).Inst, LLVMInstNum);
85+
Optional<unsigned> End =
86+
getPositionInModule((*C.back()).Inst, LLVMInstNum);
87+
88+
assert(Start.hasValue() &&
89+
"Could not find instruction number for first instruction");
90+
assert(End.hasValue() &&
91+
"Could not find instruction number for last instruction");
92+
93+
J.object([&] {
94+
J.attribute("start", Start.getValue());
95+
J.attribute("end", End.getValue());
96+
});
97+
}
98+
J.arrayEnd();
99+
J.attributeEnd();
100+
SimOption++;
101+
}
102+
J.objectEnd();
103+
104+
Out->keep();
105+
106+
return EC;
107+
}
108+
109+
int main(int argc, const char *argv[]) {
110+
InitLLVM X(argc, argv);
111+
112+
cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n");
113+
114+
LLVMContext CurrContext;
115+
SMDiagnostic Err;
116+
std::unique_ptr<Module> ModuleToAnalyze =
117+
parseIRFile(InputSourceFile, Err, CurrContext);
118+
119+
if (!ModuleToAnalyze) {
120+
Err.print(argv[0], errs());
121+
return 1;
122+
}
123+
124+
// Mapping from an Instruction pointer to its occurrence in a sequential
125+
// list of all the Instructions in a Module.
126+
DenseMap<Instruction *, unsigned> LLVMInstNum;
127+
128+
// We give each instruction a number, which gives us a start and end value
129+
// for the beginning and end of each IRSimilarityCandidate.
130+
unsigned InstructionNumber = 1;
131+
for (Function &F : *ModuleToAnalyze)
132+
for (BasicBlock &BB : F)
133+
for (Instruction &I : BB.instructionsWithoutDebug())
134+
LLVMInstNum[&I]= InstructionNumber++;
135+
136+
// The similarity identifier we will use to find the similar sections.
137+
IRSimilarityIdentifier SimIdent;
138+
SimilarityGroupList SimilaritySections =
139+
SimIdent.findSimilarity(*ModuleToAnalyze);
140+
141+
std::error_code E =
142+
exportToFile(OutputFilename, SimilaritySections, LLVMInstNum);
143+
if (E) {
144+
errs() << argv[0] << ": " << E.message() << '\n';
145+
return 2;
146+
}
147+
148+
return 0;
149+
}

0 commit comments

Comments
 (0)