Thiết kế website giá rẻ

Question

Context: I am trying to find out the depth between two CFGs using BFS but the code is not properly working as it’s only showing the results up to depth 0 and 1 but it’s not showing the other depths like 2,3,4 and so on. I am posting here the code and a sample example.

Example:1

define i32 @func1() {
entry:
  %a = alloca i32
  store i32 10, i32* %a
  %b = alloca i32
  store i32 20, i32* %b
  br label %loop

loop:
  %i = phi i32 [ 0, %entry ], [ %next, %loop ]
  %val = load i32, i32* %a
  %val2 = load i32, i32* %b
  %sum = add i32 %val, %val2
  %next = add i32 %i, 1
  %cmp = icmp slt i32 %next, 10
  br i1 %cmp, label %loop, label %exit

exit:
  ret i32 %sum
}

Example:2

define i32 @func2() {
entry:
  %a = alloca i32
  store i32 5, i32* %a
  %b = alloca i32
  store i32 7, i32* %b
  br label %loop

loop:
  %i = phi i32 [ 0, %entry ], [ %next, %loop ]
  %val = load i32, i32* %a
  %val2 = load i32, i32* %b
  %sum = add i32 %val, %val2
  %next = add i32 %i, 1
  %cmp = icmp slt i32 %next, 5
  br i1 %cmp, label %loop, label %exit

exit:
  ret i32 %sum
}

output : Basic blocks in func2/entry (CFG1) and func2/entry (CFG2) are dissimilar: Instruction count mismatch
Basic blocks in func2/loop (CFG1) and func2/loop (CFG2) are dissimilar: Instruction count mismatch
Basic blocks in func2/exit (CFG1) and func2/exit (CFG2) are dissimilar: Instruction count mismatch
Basic blocks /func2 (CFG1) and /func1 (CFG2) at depth 0 are similar with 3/3 matching instructions
Basic blocks /entry (CFG1) and /entry (CFG2) at depth 1 are identical
Basic blocks /exit (CFG1) and /exit (CFG2) at depth 1 are identical
Basic blocks /loop (CFG1) and /loop (CFG2) at depth 1 are identical 

I expected that depth 2 will generate also, but my code only shows depth 0 and 1 for each & every examples I tried

I have tried this

import os
import re
import graphviz
from llvmlite import binding as llvm
from collections import deque

class Instruction:
    def __init__(self, instr_type, operands):
        self.type = instr_type
        self.operands = operands

class BasicBlock:
    def __init__(self, instructions):
        self.instructions = instructions

def extract_instruction(inst):
    inst_str = str(inst)
    inst_str = re.sub(r',s*aligns+d+', '', inst_str)
    return inst_str

def extract_cfg(ir_content):
    dot = graphviz.Digraph()
    dot.attr(rankdir='TB')
    llvm_module = llvm.parse_assembly(ir_content)
    for func in llvm_module.functions:
        if not func.is_declaration:
            func_name = func.name
            for block in func.blocks:
                block_name = block.name
                label = f"{func_name}_{block_name}nInstructions:n"
                for inst in block.instructions:
                    inst_str = extract_instruction(inst)
                    label += f"  {inst_str}n"
                dot.node(f"{func_name}_{block_name}", label=label.strip())
                for instr in block.instructions:
                    if instr.opcode == 'br':
                        for op in instr.operands:
                            if isinstance(op, llvm.ValueRef) and op.name:
                                successor = op.name
                                dot.edge(f"{func_name}_{block_name}", f"{func_name}_{successor}")
                    elif instr.opcode == 'ret':
                        dot.edge(f"{func_name}_{block_name}", f"{func_name}_return")
    return dot

def read_ir_file(file_path):
    with open(file_path, 'r') as file:
        return file.read()

def extract_function_blocks(ir_content):
    blocks = {}
    llvm_module = llvm.parse_assembly(ir_content)
    for func in llvm_module.functions:
        if not func.is_declaration:
            func_name = func.name
            blocks[func_name] = {}
            for block in func.blocks:
                block_name = block.name
                instructions = [extract_instruction(inst) for inst in block.instructions]
                blocks[func_name][block_name] = instructions
    return blocks

def compare_basic_blocks(cfg1, cfg2):
    matching_blocks = 0
    total_blocks = 0
    compared_pairs = set()

    for func_name, blocks1 in cfg1.items():
        blocks2 = cfg2.get(func_name, {})
        for bb_name, instructions1 in blocks1.items():
            total_blocks += 1
            instructions2 = blocks2.get(bb_name, [])
            pair = (func_name, bb_name)
            if pair in compared_pairs:
                continue
            compared_pairs.add(pair)
            if len(instructions1) != len(instructions2):
                print(f"Basic blocks in {func_name}/{bb_name} (CFG1) and {func_name}/{bb_name} (CFG2) are dissimilar: Instruction count mismatch")
                continue
            matching_instructions = 0
            for i, (instr1, instr2) in enumerate(zip(instructions1, instructions2)):
                if instr1.split()[0] == instr2.split()[0]:
                    matching_instructions += 1
                    operands1 = set(instr1.split()[1:])
                    operands2 = set(instr2.split()[1:])
                    if operands1 != operands2:
                        print(f"Basic blocks in {func_name}/{bb_name} (CFG1) and {func_name}/{bb_name} (CFG2) are dissimilar: Operand names mismatch at position {i}")
                        break
                else:
                    print(f"Basic blocks in {func_name}/{bb_name} (CFG1) and {func_name}/{bb_name} (CFG2) are dissimilar: Instruction type mismatch at position {i}")
                    break
            else:
                print(f"Basic blocks in {func_name}/{bb_name} (CFG1) and {func_name}/{bb_name} (CFG2) are similar with {matching_instructions}/{len(instructions1)} matching instructions")
                matching_blocks += 1
    return matching_blocks, total_blocks

def bfs_compare(cfg1, cfg2):
    if not cfg1 or not cfg2:
        print("One or both CFGs are empty. Cannot perform depth-level comparison.")
        return

    visited1 = set()
    visited2 = set()
    queue1 = deque([(block_name, 0) for block_name in cfg1.keys()])
    queue2 = deque([(block_name, 0) for block_name in cfg2.keys()])
    depth_blocks1 = {}
    depth_blocks2 = {}
    compared_pairs = set()

    while queue1 and queue2:
        bb1, depth1 = queue1.popleft()
        bb2, depth2 = queue2.popleft()
        visited1.add(bb1)
        visited2.add(bb2)
        depth_blocks1.setdefault(depth1, []).append(bb1)
        depth_blocks2.setdefault(depth2, []).append(bb2)
        successors1 = set(cfg1.get(bb1, [])) - visited1
        successors2 = set(cfg2.get(bb2, [])) - visited2
        queue1.extend([(succ, depth1 + 1) for succ in successors1])
        queue2.extend([(succ, depth2 + 1) for succ in successors2])

    for depth in set(depth_blocks1.keys()) & set(depth_blocks2.keys()):
        blocks1 = depth_blocks1[depth]
        blocks2 = depth_blocks2[depth]
        if len(blocks1) != len(blocks2):
            print(f"Number of basic blocks at depth {depth} in CFG1 and CFG2 are different")
        else:
            for bb1, bb2 in zip(sorted(blocks1), sorted(blocks2)):
                pair = (bb1, bb2)
                if pair not in compared_pairs:
                    compared_pairs.add(pair)
                    instrs1 = cfg1.get(bb1, [])
                    instrs2 = cfg2.get(bb2, [])
                    func1, block1 = bb1.split('/', 1) if '/' in bb1 else ('', bb1)
                    func2, block2 = bb2.split('/', 1) if '/' in bb2 else ('', bb2)
                    if instrs1 == instrs2:
                        print(f"Basic blocks {func1}/{block1} (CFG1) and {func2}/{block2} (CFG2) at depth {depth} are identical")
                    else:
                        matching_instructions = 0
                        for i, (instr1, instr2) in enumerate(zip(instrs1, instrs2)):
                            if instr1.split()[0] == instr2.split()[0]:
                                matching_instructions += 1
                                operands1 = set(instr1.split()[1:])
                                operands2 = set(instr2.split()[1:])
                                if operands1 != operands2:
                                    print(f"Basic blocks {func1}/{block1} (CFG1) and {func2}/{block2} (CFG2) at depth {depth} are similar but have operand names mismatch at position {i}")
                                    break
                            else:
                                print(f"Basic blocks {func1}/{block1} (CFG1) and {func2}/{block2} (CFG2) at depth {depth} are dissimilar: Instruction type mismatch at position {i}")
                                break
                        else:
                            print(f"Basic blocks {func1}/{block1} (CFG1) and {func2}/{block2} (CFG2) at depth {depth} are similar with {matching_instructions}/{len(instrs1)} matching instructions")

# Directory containing LLVM IR files
dir_path = 'path'
ir_files = [f for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f)) and f.endswith('.ll')]

# Load and compare CFGs for each pair of IRs
for i in range(len(ir_files)):
    for j in range(i+1, len(ir_files)):
        ir1_content = read_ir_file(os.path.join(dir_path, ir_files[i]))
        ir2_content = read_ir_file(os.path.join(dir_path, ir_files[j]))

        cfg1 = extract_function_blocks(ir1_content)
        cfg2 = extract_function_blocks(ir2_content)

        print(f"Comparing CFGs for {ir_files[i]} and {ir_files[j]}")
        compare_basic_blocks(cfg1, cfg2)
        bfs_compare(cfg1, cfg2)

        # Generate and save CFGs as PDF files
        cfg1_dot = extract_cfg(ir1_content)
        cfg2_dot = extract_cfg(ir2_content)
        cfg1_dot.render(filename=f"cfg_{ir_files[i]}", format="pdf", directory=dir_path, cleanup=True)
        cfg2_dot.render(filename=f"cfg_{ir_files[j]}", format="pdf", directory=dir_path, cleanup=True)

Thiết kế website giá rẻ

Danh mục

Depth Search between CFGs in BFS algorithm using python with LLVM IR