Skip to content

29.4 代码理解模块

29.4.1 代码理解概述

代码理解模块是编程 Agent 的另一个核心能力,它能够分析、解释和理解现有代码的功能、结构和设计。代码理解涉及代码解析、语义分析、依赖分析等多个环节。

代码理解流程

输入代码 ↓ 代码解析 ↓ 结构分析 ↓ 语义分析 ↓ 依赖分析 ↓ 功能推断 ↓ 生成解释

29.4.2 代码解析

代码解析器

python

    class CodeParser:
        """代码解析器"""

        def __init__(self):
            self.parsers = {
                'python': PythonParser(),
                'javascript': JavaScriptParser(),
                'java': JavaParser(),
                'cpp': CppParser()
            }

        def parse(self, code: str, language: str) -> ParsedCode:
            """解析代码"""
            parser = self.parsers.get(language.lower())

            if not parser:
                raise ValueError(f"Unsupported language: {language}")

            return parser.parse(code)

    class PythonParser:
        """Python 解析器"""

        def parse(self, code: str) -> ParsedCode:
            """解析 Python 代码"""
            try:
                tree = ast.parse(code)

                parsed_code = ParsedCode(
                    language='python',
                    original_code=code,
                    ast=tree
                )

                # 提取类
                parsed_code.classes = self._extract_classes(tree)

                # 提取函数
                parsed_code.functions = self._extract_functions(tree)

                # 提取导入
                parsed_code.imports = self._extract_imports(tree)

                # 提取全局变量
                parsed_code.global_variables = self._extract_global_variables(tree)

                return parsed_code

            except SyntaxError as e:
                raise ValueError(f"Invalid Python code: {e}")

        def _extract_classes(self, tree: ast.AST) -> List[ClassInfo]:
            """提取类信息"""
            classes = []

            for node in ast.walk(tree):
                if isinstance(node, ast.ClassDef):
                    class_info = ClassInfo(
                        name=node.name,
                        bases=[self._get_name(base) for base in node.bases],
                        methods=[self._extract_method(m) for m in node.body
                                if isinstance(m, ast.FunctionDef)],
                        attributes=[self._extract_attribute(a) for a in node.body
                                  if isinstance(a, ast.Assign)],
                        docstring=ast.get_docstring(node)
                    )
                    classes.append(class_info)

            return classes

        def _extract_functions(self, tree: ast.AST) -> List[FunctionInfo]:
            """提取函数信息"""
            functions = []

            for node in ast.walk(tree):
                if isinstance(node, ast.FunctionDef):
                    # 跳过类中的方法
                    if not any(isinstance(parent, ast.ClassDef)
                              for parent in ast.walk(tree)
                              if node in ast.iter_child_nodes(parent)):
                        function_info = FunctionInfo(
                            name=node.name,
                            arguments=[arg.arg for arg in node.args.args],
                            return_type=self._get_return_type(node),
                            docstring=ast.get_docstring(node),
                            decorators=[self._get_name(d) for d in node.decorator_list]
                        )
                        functions.append(function_info)

            return functions

        def _extract_imports(self, tree: ast.AST) -> List[ImportInfo]:
            """提取导入信息"""
            imports = []

            for node in ast.walk(tree):
                if isinstance(node, ast.Import):
                    for alias in node.names:
                        import_info = ImportInfo(
                            module=alias.name,
                            alias=alias.asname,
                            type='import'
                        )
                        imports.append(import_info)
                elif isinstance(node, ast.ImportFrom):
                    for alias in node.names:
                        import_info = ImportInfo(
                            module=node.module,
                            name=alias.name,
                            alias=alias.asname,
                            type='from'
                        )
                        imports.append(import_info)

            return imports

        def _extract_global_variables(self, tree: ast.AST) -> List[VariableInfo]:
            """提取全局变量"""
            variables = []

            for node in ast.walk(tree):
                if isinstance(node, ast.Assign):
                    # 只提取模块级别的变量
                    if isinstance(node, ast.Module):
                        for target in node.targets:
                            if isinstance(target, ast.Name):
                                var_info = VariableInfo(
                                    name=target.id,
                                    type=self._infer_type(node.value),
                                    value=self._get_value(node.value)
                                )
                                variables.append(var_info)

            return variables

        def _extract_method(self, node: ast.FunctionDef) -> MethodInfo:
            """提取方法信息"""
            return MethodInfo(
                name=node.name,
                arguments=[arg.arg for arg in node.args.args],
                return_type=self._get_return_type(node),
                docstring=ast.get_docstring(node),
                is_static=any(isinstance(d, ast.Name) and d.id == 'staticmethod'
                              for d in node.decorator_list),
                is_classmethod=any(isinstance(d, ast.Name) and d.id == 'classmethod'
                                  for d in node.decorator_list)
            )

        def _extract_attribute(self, node: ast.Assign) -> AttributeInfo:
            """提取属性信息"""
            target = node.targets[0]
            if isinstance(target, ast.Name):
                return AttributeInfo(
                    name=target.id,
                    type=self._infer_type(node.value),
                    value=self._get_value(node.value)
                )
            return None

        def _get_name(self, node: ast.AST) -> str:
            """获取节点名称"""
            if isinstance(node, ast.Name):
                return node.id
            elif isinstance(node, ast.Attribute):
                return f"{self._get_name(node.value)}.{node.attr}"
            return str(node)

        def _get_return_type(self, node: ast.FunctionDef) -> str:
            """获取返回类型"""
            if node.returns:
                return self._get_name(node.returns)
            return "None"

        def _infer_type(self, node: ast.AST) -> str:
            """推断类型"""
            if isinstance(node, ast.Constant):
                return type(node.value).__name__
            elif isinstance(node, ast.List):
                return "list"
            elif isinstance(node, ast.Dict):
                return "dict"
            elif isinstance(node, ast.Call):
                return self._get_name(node.func)
            return "Any"

        def _get_value(self, node: ast.AST) -> Any:
            """获取值"""
            if isinstance(node, ast.Constant):
                return node.value
            return None

    ```## 29.4.3 结构分析

```python
    ### 结构分析器

    class StructureAnalyzer:
    """结构分析器"""
    def analyze(self, parsed_code: ParsedCode) -> StructureAnalysis:
    """分析代码结构"""
    analysis = StructureAnalysis()
    # 分析类的层次结构
    analysis.class_hierarchy = self._analyze_class_hierarchy(
    parsed_code.classes
    )
    # 分析函数调用关系
    analysis.call_graph = self._analyze_call_graph(parsed_code)
    # 分析模块依赖
    analysis.dependencies = self._analyze_dependencies(parsed_code)
    # 分析代码复杂度
    analysis.complexity = self._analyze_complexity(parsed_code)
    return analysis
    def _analyze_class_hierarchy(self,
    classes: List[ClassInfo]) -> Dict[str, List[str]]:
    """分析类层次结构"""
    hierarchy = {}
    for cls in classes:
    hierarchy[cls.name] = cls.bases
    return hierarchy
    def _analyze_call_graph(self,
    parsed_code: ParsedCode) -> Dict[str, List[str]]:
    """分析函数调用关系"""
    call_graph = {}
    # 分析函数调用
    for func in parsed_code.functions:
    calls = self._extract_function_calls(func, parsed_code)
    call_graph[func.name] = calls
    # 分析方法调用
    for cls in parsed_code.classes:
    for method in cls.methods:
    calls = self._extract_method_calls(method, cls, parsed_code)
    call_graph[f"{cls.name}.{method.name}"] = calls
    return call_graph
    def _extract_function_calls(self, func: FunctionInfo,
    parsed_code: ParsedCode) -> List[str]:
    """提取函数调用"""
    calls = []
    # 这里需要更复杂的 AST 分析
    # 简化实现:从函数体中提取调用
    return calls
    def _extract_method_calls(self, method: MethodInfo,
    cls: ClassInfo,
    parsed_code: ParsedCode) -> List[str]:
    """提取方法调用"""
    calls = []
    # 这里需要更复杂的 AST 分析
    # 简化实现:从方法体中提取调用
    return calls
    def _analyze_dependencies(self,
    parsed_code: ParsedCode) -> List[Dependency]:
    """分析依赖关系"""
    dependencies = []
    # 分析导入依赖
    for imp in parsed_code.imports:
    dependency = Dependency(
    type='import',
    source=parsed_code.language,
    target=imp.module,
    strength='external'
    )
    dependencies.append(dependency)
    # 分析类继承依赖
    for cls in parsed_code.classes:
    for base in cls.bases:
    dependency = Dependency(
    type='inheritance',
    source=cls.name,
    target=base,
    strength='strong'
    )
    dependencies.append(dependency)
    return dependencies
    def _analyze_complexity(self,
    parsed_code: ParsedCode) -> ComplexityMetrics:
    """分析代码复杂度"""
    metrics = ComplexityMetrics()
    # 计算圈复杂度
    metrics.cyclomatic_complexity = self._calculate_cyclomatic_complexity(
    parsed_code
    )
    # 计算认知复杂度
    metrics.cognitive_complexity = self._calculate_cognitive_complexity(
    parsed_code
    )
    # 计算维护性指数
    metrics.maintainability_index = self._calculate_maintainability_index(
    parsed_code
    )
    return metrics
    def _calculate_cyclomatic_complexity(self,
    parsed_code: ParsedCode) -> float:
    """计算圈复杂度"""
    complexity = 1  # 基础复杂度
    # 遍历 AST,计算决策点
    for node in ast.walk(parsed_code.ast):
    if isinstance(node, (ast.If, ast.While, ast.For, ast.ExceptHandler)):
    complexity += 1
    elif isinstance(node, ast.BoolOp):
    complexity += len(node.values) - 1
    return complexity
    def _calculate_cognitive_complexity(self,
    parsed_code: ParsedCode) -> float:
    """计算认知复杂度"""
    # 简化实现
    return self._calculate_cyclomatic_complexity(parsed_code) * 1.5
    def _calculate_maintainability_index(self,
    parsed_code: ParsedCode) -> float:
    """计算维护性指数"""
    # 简化实现
    loc = len(parsed_code.original_code.split('\n'))
    complexity = self._calculate_cyclomatic_complexity(parsed_code)
    # MI = 171 - 5.2 * ln(V) - 0.23 * G - 16.2 * ln(L)
    # V = 圈复杂度, G = 代码行数, L = 代码行数
    mi = 171 - 5.2 * math.log(complexity) - 0.23 * loc - 16.2 * math.log(loc)
    return max(0, min(100, mi))

29.4.4 语义分析

语义分析器

```python

class SemanticAnalyzer:
    """语义分析器"""

    def __init__(self, llm_client: LLMClient):
        self.llm_client = llm_client

    async def analyze(self, parsed_code: ParsedCode,
                     structure: StructureAnalysis) -> SemanticAnalysis:
        """分析代码语义"""
        analysis = SemanticAnalysis()

        # 分析代码意图

        analysis.purpose = await self._analyze_purpose(parsed_code)

        # 分析算法

        analysis.algorithms = await self._analyze_algorithms(parsed_code)

        # 分析设计模式

        analysis.design_patterns = await self._analyze_design_patterns(
            parsed_code,
            structure
        )

        # 分析数据流

        analysis.data_flow = await self._analyze_data_flow(parsed_code)

        return analysis

    async def _analyze_purpose(self,
                              parsed_code: ParsedCode) -> str:
        """分析代码目的"""
        prompt = f"""
        分析以下代码的主要目的和功能:

        {parsed_code.original_code}

        请用简洁的语言描述这段代码的主要功能。
        """

        return await self.llm_client.complete(prompt)

    async def _analyze_algorithms(self,
                                 parsed_code: ParsedCode) -> List[AlgorithmInfo]:
        """分析算法"""
        prompt = f"""
        识别以下代码中使用的算法:

        {parsed_code.original_code}

        请识别:
        1. 使用的主要算法(排序、搜索、图算法等)
        2. 算法的时间复杂度
        3. 算法的空间复杂度

        以 JSON 格式返回结果。
        """

        response = await self.llm_client.complete(prompt)
        return self._parse_algorithms(response)

    async def _analyze_design_patterns(self,
                                      parsed_code: ParsedCode,
                                      structure: StructureAnalysis) -> List[str]:
        """分析设计模式"""
        prompt = f"""
        识别以下代码中使用的设计模式:

        类:{parsed_code.classes}
        函数:{parsed_code.functions}
        类层次结构:{structure.class_hierarchy}

        请识别使用的设计模式。
        """

        response = await self.llm_client.complete(prompt)
        return self._parse_design_patterns(response)

    async def _analyze_data_flow(self,
                                parsed_code: ParsedCode) -> DataFlowAnalysis:
        """分析数据流"""
        analysis = DataFlowAnalysis()

        # 分析输入

        analysis.inputs = self._identify_inputs(parsed_code)

        # 分析输出

        analysis.outputs = self._identify_outputs(parsed_code)

        # 分析转换

        analysis.transformations = self._identify_transformations(parsed_code)

        return analysis

    def _identify_inputs(self, parsed_code: ParsedCode) -> List[str]:
        """识别输入"""
        inputs = []

        # 函数参数

        for func in parsed_code.functions:
            inputs.extend(func.arguments)

        # 方法参数

        for cls in parsed_code.classes:
            for method in cls.methods:
                inputs.extend(method.arguments)

        return list(set(inputs))

    def _identify_outputs(self, parsed_code: ParsedCode) -> List[str]:
        """识别输出"""
        outputs = []

        # 函数返回值

        for func in parsed_code.functions:
            if func.return_type != "None":
                outputs.append(f"{func.name}() -> {func.return_type}")

        # 方法返回值

        for cls in parsed_code.classes:
            for method in cls.methods:
                if method.return_type != "None":
                    outputs.append(f"{cls.name}.{method.name}() -> {method.return_type}")

        return outputs

    def _identify_transformations(self,
                                 parsed_code: ParsedCode) -> List[str]:
        """识别数据转换"""
        transformations = []

        # 这里需要更复杂的分析

        # 简化实现:基于函数名推断

        for func in parsed_code.functions:
            if any(keyword in func.name.lower()
                  for keyword in ['transform', 'convert', 'process', 'compute']):
                transformations.append(func.name)

        return transformations

```## 29.4.5 代码解释生成

### 解释生成器
python
    class ExplanationGenerator:
"""解释生成器"""
python
    def __init__(self, llm_client: LLMClient):
    self.llm_client = llm_client
    async def generate_explanation(self,
    parsed_code: ParsedCode,
    structure: StructureAnalysis,
    semantic: SemanticAnalysis) -> CodeExplanation:
"""生成代码解释"""
explanation = CodeExplanation()
# 生成总体概述
explanation.overview = await self._generate_overview(
parsed_code,
semantic
)
# 生成类解释
explanation.class_explanations = await self._generate_class_explanations(
parsed_code.classes,
structure
)
# 生成函数解释
explanation.function_explanations = await self._generate_function_explanations(
parsed_code.functions,
structure
)
# 生成算法解释
explanation.algorithm_explanations = await self._generate_algorithm_explanations(
semantic.algorithms
)
# 生成使用示例
explanation.usage_examples = await self._generate_usage_examples(
parsed_code
)
bash
    return explanation
    async def _generate_overview(self,
    parsed_code: ParsedCode,
    semantic: SemanticAnalysis) -> str:
"""生成总体概述"""
prompt = f"""
为以下代码生成总体概述:
代码目的:{semantic.purpose}
类:{[c.name for c in parsed_code.classes]}
函数:{[f.name for f in parsed_code.functions]}
请生成一个清晰的总体概述,包括:
1. 代码的主要功能
2. 主要组件
3. 整体架构
"""
python
    return await self.llm_client.complete(prompt)
    async def _generate_class_explanations(self,
    classes: List[ClassInfo],
    structure: StructureAnalysis) -> Dict[str, str]:
"""生成类解释"""
explanations = {}
bash
    for cls in classes:
    prompt = f"""
为以下类生成详细解释:
类名:{cls.name}
父类:{cls.bases}
方法:{[m.name for m in cls.methods]}
属性:{[a.name for a in cls.attributes if a]}
文档字符串:{cls.docstring}
请生成详细的类解释,包括:
1. 类的职责
2. 主要方法的功能
3. 使用场景
"""
explanation = await self.llm_client.complete(prompt)
explanations[cls.name] = explanation
bash
    return explanations
    async def _generate_function_explanations(self,
    functions: List[FunctionInfo],
    structure: StructureAnalysis) -> Dict[str, str]:
"""生成函数解释"""
explanations = {}
bash
    for func in functions:
    prompt = f"""
为以下函数生成详细解释:
函数名:{func.name}
参数:{func.arguments}
返回类型:{func.return_type}
文档字符串:{func.docstring}
请生成详细的函数解释,包括:
1. 函数的功能
2. 参数说明
3. 返回值说明
4. 使用示例
"""
explanation = await self.llm_client.complete(prompt)
explanations[func.name] = explanation
bash
    return explanations
    async def _generate_algorithm_explanations(self,
    algorithms: List[AlgorithmInfo]) -> Dict[str, str]:
"""生成算法解释"""
explanations = {}
bash
    for algo in algorithms:
    prompt = f"""
为以下算法生成详细解释:
算法名称:{algo.name}
时间复杂度:{algo.time_complexity}
空间复杂度:{algo.space_complexity}
请生成详细的算法解释,包括:
1. 算法原理
2. 实现细节
3. 优缺点分析
4. 适用场景
"""
explanation = await self.llm_client.complete(prompt)
explanations[algo.name] = explanation
bash
    return explanations
    async def _generate_usage_examples(self,
    parsed_code: ParsedCode) -> List[str]:
"""生成使用示例"""
examples = []
# 为每个类生成示例
bash
    for cls in parsed_code.classes:
    prompt = f"""
为以下类生成使用示例:
类名:{cls.name}
方法:{[m.name for m in cls.methods]}
请生成 2-3 个实用的使用示例。
"""
example = await self.llm_client.complete(prompt)
bash
    examples.append(example)
# 为主要函数生成示例
bash
    for func in parsed_code.functions[:3]:  # 只生成前 3 个函数的示例
    prompt = f"""
为以下函数生成使用示例:
函数名:{func.name}
参数:{func.arguments}
请生成 1-2 个实用的使用示例。
"""
example = await self.llm_client.complete(prompt)
bash
    examples.append(example)
    return examples

基于 MIT 许可发布