Context: I am trying to find out the depth between two CFGs using BFS but the code is not properly working as it’s only showing the results up to depth 0 and 1 but it’s not showing the other depths like 2,3,4 and so on. I am posting here the code and a sample example.
Example:1
define i32 @func1() {
entry:
%a = alloca i32
store i32 10, i32* %a
%b = alloca i32
store i32 20, i32* %b
br label %loop
loop:
%i = phi i32 [ 0, %entry ], [ %next, %loop ]
%val = load i32, i32* %a
%val2 = load i32, i32* %b
%sum = add i32 %val, %val2
%next = add i32 %i, 1
%cmp = icmp slt i32 %next, 10
br i1 %cmp, label %loop, label %exit
exit:
ret i32 %sum
}
Example:2
define i32 @func2() {
entry:
%a = alloca i32
store i32 5, i32* %a
%b = alloca i32
store i32 7, i32* %b
br label %loop
loop:
%i = phi i32 [ 0, %entry ], [ %next, %loop ]
%val = load i32, i32* %a
%val2 = load i32, i32* %b
%sum = add i32 %val, %val2
%next = add i32 %i, 1
%cmp = icmp slt i32 %next, 5
br i1 %cmp, label %loop, label %exit
exit:
ret i32 %sum
}
output : Basic blocks in func2/entry (CFG1) and func2/entry (CFG2) are dissimilar: Instruction count mismatch
Basic blocks in func2/loop (CFG1) and func2/loop (CFG2) are dissimilar: Instruction count mismatch
Basic blocks in func2/exit (CFG1) and func2/exit (CFG2) are dissimilar: Instruction count mismatch
Basic blocks /func2 (CFG1) and /func1 (CFG2) at depth 0 are similar with 3/3 matching instructions
Basic blocks /entry (CFG1) and /entry (CFG2) at depth 1 are identical
Basic blocks /exit (CFG1) and /exit (CFG2) at depth 1 are identical
Basic blocks /loop (CFG1) and /loop (CFG2) at depth 1 are identical
I expected that depth 2 will generate also, but my code only shows depth 0 and 1 for each & every examples I tried
I have tried this
import os
import re
import graphviz
from llvmlite import binding as llvm
from collections import deque
class Instruction:
def __init__(self, instr_type, operands):
self.type = instr_type
self.operands = operands
class BasicBlock:
def __init__(self, instructions):
self.instructions = instructions
def extract_instruction(inst):
inst_str = str(inst)
inst_str = re.sub(r',s*aligns+d+', '', inst_str)
return inst_str
def extract_cfg(ir_content):
dot = graphviz.Digraph()
dot.attr(rankdir='TB')
llvm_module = llvm.parse_assembly(ir_content)
for func in llvm_module.functions:
if not func.is_declaration:
func_name = func.name
for block in func.blocks:
block_name = block.name
label = f"{func_name}_{block_name}nInstructions:n"
for inst in block.instructions:
inst_str = extract_instruction(inst)
label += f" {inst_str}n"
dot.node(f"{func_name}_{block_name}", label=label.strip())
for instr in block.instructions:
if instr.opcode == 'br':
for op in instr.operands:
if isinstance(op, llvm.ValueRef) and op.name:
successor = op.name
dot.edge(f"{func_name}_{block_name}", f"{func_name}_{successor}")
elif instr.opcode == 'ret':
dot.edge(f"{func_name}_{block_name}", f"{func_name}_return")
return dot
def read_ir_file(file_path):
with open(file_path, 'r') as file:
return file.read()
def extract_function_blocks(ir_content):
blocks = {}
llvm_module = llvm.parse_assembly(ir_content)
for func in llvm_module.functions:
if not func.is_declaration:
func_name = func.name
blocks[func_name] = {}
for block in func.blocks:
block_name = block.name
instructions = [extract_instruction(inst) for inst in block.instructions]
blocks[func_name][block_name] = instructions
return blocks
def compare_basic_blocks(cfg1, cfg2):
matching_blocks = 0
total_blocks = 0
compared_pairs = set()
for func_name, blocks1 in cfg1.items():
blocks2 = cfg2.get(func_name, {})
for bb_name, instructions1 in blocks1.items():
total_blocks += 1
instructions2 = blocks2.get(bb_name, [])
pair = (func_name, bb_name)
if pair in compared_pairs:
continue
compared_pairs.add(pair)
if len(instructions1) != len(instructions2):
print(f"Basic blocks in {func_name}/{bb_name} (CFG1) and {func_name}/{bb_name} (CFG2) are dissimilar: Instruction count mismatch")
continue
matching_instructions = 0
for i, (instr1, instr2) in enumerate(zip(instructions1, instructions2)):
if instr1.split()[0] == instr2.split()[0]:
matching_instructions += 1
operands1 = set(instr1.split()[1:])
operands2 = set(instr2.split()[1:])
if operands1 != operands2:
print(f"Basic blocks in {func_name}/{bb_name} (CFG1) and {func_name}/{bb_name} (CFG2) are dissimilar: Operand names mismatch at position {i}")
break
else:
print(f"Basic blocks in {func_name}/{bb_name} (CFG1) and {func_name}/{bb_name} (CFG2) are dissimilar: Instruction type mismatch at position {i}")
break
else:
print(f"Basic blocks in {func_name}/{bb_name} (CFG1) and {func_name}/{bb_name} (CFG2) are similar with {matching_instructions}/{len(instructions1)} matching instructions")
matching_blocks += 1
return matching_blocks, total_blocks
def bfs_compare(cfg1, cfg2):
if not cfg1 or not cfg2:
print("One or both CFGs are empty. Cannot perform depth-level comparison.")
return
visited1 = set()
visited2 = set()
queue1 = deque([(block_name, 0) for block_name in cfg1.keys()])
queue2 = deque([(block_name, 0) for block_name in cfg2.keys()])
depth_blocks1 = {}
depth_blocks2 = {}
compared_pairs = set()
while queue1 and queue2:
bb1, depth1 = queue1.popleft()
bb2, depth2 = queue2.popleft()
visited1.add(bb1)
visited2.add(bb2)
depth_blocks1.setdefault(depth1, []).append(bb1)
depth_blocks2.setdefault(depth2, []).append(bb2)
successors1 = set(cfg1.get(bb1, [])) - visited1
successors2 = set(cfg2.get(bb2, [])) - visited2
queue1.extend([(succ, depth1 + 1) for succ in successors1])
queue2.extend([(succ, depth2 + 1) for succ in successors2])
for depth in set(depth_blocks1.keys()) & set(depth_blocks2.keys()):
blocks1 = depth_blocks1[depth]
blocks2 = depth_blocks2[depth]
if len(blocks1) != len(blocks2):
print(f"Number of basic blocks at depth {depth} in CFG1 and CFG2 are different")
else:
for bb1, bb2 in zip(sorted(blocks1), sorted(blocks2)):
pair = (bb1, bb2)
if pair not in compared_pairs:
compared_pairs.add(pair)
instrs1 = cfg1.get(bb1, [])
instrs2 = cfg2.get(bb2, [])
func1, block1 = bb1.split('/', 1) if '/' in bb1 else ('', bb1)
func2, block2 = bb2.split('/', 1) if '/' in bb2 else ('', bb2)
if instrs1 == instrs2:
print(f"Basic blocks {func1}/{block1} (CFG1) and {func2}/{block2} (CFG2) at depth {depth} are identical")
else:
matching_instructions = 0
for i, (instr1, instr2) in enumerate(zip(instrs1, instrs2)):
if instr1.split()[0] == instr2.split()[0]:
matching_instructions += 1
operands1 = set(instr1.split()[1:])
operands2 = set(instr2.split()[1:])
if operands1 != operands2:
print(f"Basic blocks {func1}/{block1} (CFG1) and {func2}/{block2} (CFG2) at depth {depth} are similar but have operand names mismatch at position {i}")
break
else:
print(f"Basic blocks {func1}/{block1} (CFG1) and {func2}/{block2} (CFG2) at depth {depth} are dissimilar: Instruction type mismatch at position {i}")
break
else:
print(f"Basic blocks {func1}/{block1} (CFG1) and {func2}/{block2} (CFG2) at depth {depth} are similar with {matching_instructions}/{len(instrs1)} matching instructions")
# Directory containing LLVM IR files
dir_path = 'path'
ir_files = [f for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f)) and f.endswith('.ll')]
# Load and compare CFGs for each pair of IRs
for i in range(len(ir_files)):
for j in range(i+1, len(ir_files)):
ir1_content = read_ir_file(os.path.join(dir_path, ir_files[i]))
ir2_content = read_ir_file(os.path.join(dir_path, ir_files[j]))
cfg1 = extract_function_blocks(ir1_content)
cfg2 = extract_function_blocks(ir2_content)
print(f"Comparing CFGs for {ir_files[i]} and {ir_files[j]}")
compare_basic_blocks(cfg1, cfg2)
bfs_compare(cfg1, cfg2)
# Generate and save CFGs as PDF files
cfg1_dot = extract_cfg(ir1_content)
cfg2_dot = extract_cfg(ir2_content)
cfg1_dot.render(filename=f"cfg_{ir_files[i]}", format="pdf", directory=dir_path, cleanup=True)
cfg2_dot.render(filename=f"cfg_{ir_files[j]}", format="pdf", directory=dir_path, cleanup=True)
New contributor
tamanna is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.