Coverage for src/c2puml/core/verifier.py: 78%
154 statements
« prev ^ index » next coverage.py v7.10.4, created at 2025-08-20 03:53 +0000
« prev ^ index » next coverage.py v7.10.4, created at 2025-08-20 03:53 +0000
1#!/usr/bin/env python3
2"""
3Verification module for C to PlantUML converter
5Performs sanity checks on the parsed model to ensure values make sense for C code.
6"""
8import logging
9import re
10from typing import List, Tuple
12from ..models import Alias, Enum, Field, FileModel, Function, ProjectModel, Struct, Union
15class ModelVerifier:
16 """Verifies the sanity of parsed C code model"""
18 def __init__(self):
19 self.logger = logging.getLogger(__name__)
20 self.issues = []
22 def verify_model(self, model: ProjectModel) -> Tuple[bool, List[str]]:
23 """
24 Verify the sanity of the entire model
26 Args:
27 model: The ProjectModel to verify
29 Returns:
30 Tuple of (is_valid, list_of_issues)
31 """
32 self.issues = []
34 # Verify project-level data
35 self._verify_project_data(model)
37 # New invariants: filenames as keys and include_relations ownership
38 self._verify_filename_keys_and_relations(model)
40 # Verify each file
41 for file_path, file_model in model.files.items():
42 self._verify_file(file_path, file_model)
44 is_valid = not self.issues
46 if self.issues:
47 self.logger.warning("Model verification found %d issues:", len(self.issues))
48 for issue in self.issues:
49 self.logger.warning(" - %s", issue)
50 else:
51 self.logger.info("Model verification passed - all values look sane")
53 return is_valid, self.issues
55 def _verify_project_data(self, model: ProjectModel) -> None:
56 """Verify project-level data"""
57 if not model.project_name or not model.project_name.strip():
58 self.issues.append("Project name is empty or whitespace")
60 if not model.source_folder or not model.source_folder.strip():
61 self.issues.append("Source folder is empty or whitespace")
63 if not model.files:
64 self.issues.append("No files found in project")
66 def _verify_file(self, file_path: str, file_model: FileModel) -> None:
67 """Verify a single file model"""
68 # Verify file-level data
69 if not file_model.file_path or not file_model.file_path.strip():
70 self.issues.append(f"File path is empty in {file_model.name}")
72 if not file_model.name or not file_model.name.strip():
73 self.issues.append(f"File name is empty in {file_path}")
75 # Anonymous extraction sanity: detect duplicates per parent and garbled content
76 if file_model.anonymous_relationships:
77 for parent, children in file_model.anonymous_relationships.items():
78 # Duplicates under same parent
79 seen = set()
80 for child in children:
81 key = (parent, child)
82 if child in seen:
83 self.issues.append(
84 f"Duplicate extracted anonymous entity '{child}' for parent '{parent}' in {file_path}"
85 )
86 seen.add(child)
88 # Verify structs
89 for struct_name, struct in file_model.structs.items():
90 self._verify_struct(file_path, struct_name, struct)
92 # Verify enums
93 for enum_name, enum in file_model.enums.items():
94 self._verify_enum(file_path, enum_name, enum)
96 # Verify unions
97 for union_name, union in file_model.unions.items():
98 self._verify_union(file_path, union_name, union)
100 # Verify functions
101 for function in file_model.functions:
102 self._verify_function(file_path, function)
104 # Verify globals
105 for global_var in file_model.globals:
106 self._verify_global(file_path, global_var)
108 # Verify aliases
109 for alias_name, alias in file_model.aliases.items():
110 self._verify_alias(file_path, alias_name, alias)
112 def _verify_struct(self, file_path: str, struct_name: str, struct: Struct) -> None:
113 """Verify a struct definition"""
114 if not self._is_valid_identifier(struct_name):
115 self.issues.append(f"Invalid struct name '{struct_name}' in {file_path}")
117 if not struct.name or not struct.name.strip():
118 self.issues.append(f"Struct name is empty in {file_path}")
120 # Verify fields
121 for field in struct.fields:
122 self._verify_field(file_path, f"struct {struct_name}", field)
124 def _verify_enum(self, file_path: str, enum_name: str, enum: Enum) -> None:
125 """Verify an enum definition"""
126 if not self._is_valid_identifier(enum_name):
127 self.issues.append(f"Invalid enum name '{enum_name}' in {file_path}")
129 if not enum.name or not enum.name.strip():
130 self.issues.append(f"Enum name is empty in {file_path}")
132 # Verify enum values
133 for enum_value in enum.values:
134 if not enum_value.name or not enum_value.name.strip():
135 self.issues.append(
136 f"Enum value name is empty in enum {enum_name} in {file_path}"
137 )
138 elif not self._is_valid_identifier(enum_value.name):
139 self.issues.append(
140 f"Invalid enum value name '{enum_value.name}' in enum {enum_name} in {file_path}"
141 )
143 def _verify_union(self, file_path: str, union_name: str, union: Union) -> None:
144 """Verify a union definition"""
145 if not self._is_valid_identifier(union_name):
146 self.issues.append(f"Invalid union name '{union_name}' in {file_path}")
148 if not union.name or not union.name.strip():
149 self.issues.append(f"Union name is empty in {file_path}")
151 # Verify fields
152 for field in union.fields:
153 self._verify_field(file_path, f"union {union_name}", field)
155 def _verify_function(self, file_path: str, function: Function) -> None:
156 """Verify a function definition"""
157 if not function.name or not function.name.strip():
158 self.issues.append(f"Function name is empty in {file_path}")
159 elif not self._is_valid_identifier(function.name):
160 self.issues.append(
161 f"Invalid function name '{function.name}' in {file_path}"
162 )
164 if not function.return_type or not function.return_type.strip():
165 self.issues.append(
166 f"Function return type is empty for '{function.name}' in {file_path}"
167 )
169 # Verify parameters (skip variadic parameter '...')
170 for param in function.parameters:
171 if param.name == "...": # Skip variadic parameter
172 continue
173 self._verify_field(file_path, f"function {function.name}", param)
175 def _verify_global(self, file_path: str, global_var: Field) -> None:
176 """Verify a global variable"""
177 self._verify_field(file_path, "global", global_var)
179 def _verify_alias(self, file_path: str, alias_name: str, alias: Alias) -> None:
180 """Verify a type alias (typedef)"""
181 if not self._is_valid_identifier(alias_name):
182 self.issues.append(f"Invalid alias name '{alias_name}' in {file_path}")
184 if not alias.name or not alias.name.strip():
185 self.issues.append(f"Alias name is empty in {file_path}")
187 if not alias.original_type or not alias.original_type.strip():
188 self.issues.append(
189 f"Alias original type is empty for '{alias_name}' in {file_path}"
190 )
192 def _verify_filename_keys_and_relations(self, model: ProjectModel) -> None:
193 """Check filename-key invariant and include_relations placement."""
194 for key, fm in model.files.items():
195 # Keys should be filenames (equal to FileModel.name)
196 if key != fm.name:
197 self.issues.append(
198 f"Model.files key '{key}' does not match FileModel.name '{fm.name}'"
199 )
200 # Only .c files should carry include_relations; others must be empty
201 if not fm.name.endswith(".c") and fm.include_relations:
202 self.issues.append(
203 f"Header/non-C file '{fm.name}' has include_relations; expected empty"
204 )
206 def _verify_field(self, file_path: str, context: str, field: Field) -> None:
207 """Verify a field (struct field, function parameter, global variable)"""
208 # Check for invalid names
209 if not field.name or not field.name.strip():
210 self.issues.append(f"Field name is empty in {context} in {file_path}")
211 elif not self._is_valid_identifier(field.name):
212 self.issues.append(
213 f"Invalid field name '{field.name}' in {context} in {file_path}"
214 )
216 # Check for invalid types
217 if not field.type or not field.type.strip():
218 self.issues.append(
219 f"Field type is empty for '{field.name}' in {context} in {file_path}"
220 )
221 elif self._is_suspicious_type(field.type):
222 self.issues.append(
223 f"Suspicious field type '{field.type}' for '{field.name}' in {context} in {file_path}"
224 )
226 # Check for suspicious values
227 if field.value and self._is_suspicious_value(field.value):
228 self.issues.append(
229 f"Suspicious field value '{field.value}' for '{field.name}' in {context} in {file_path}"
230 )
232 def _is_valid_identifier(self, name: str) -> bool:
233 """Check if a name is a valid C identifier"""
234 if not name or not name.strip():
235 return False
237 # C identifier rules: start with letter or underscore, then letters, digits, or underscores
238 return bool(re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", name.strip()))
240 def _is_suspicious_type(self, type_str: str) -> bool:
241 """Check if a type string looks suspicious"""
242 if not type_str or not type_str.strip():
243 return True
245 type_str = type_str.strip()
247 # Check for obvious parsing errors
248 suspicious_patterns = [
249 r"^[\[\]\{\}\(\)\s\\\n]+$", # Only brackets, spaces, backslashes, newlines
250 r"^[\[\]\{\}\(\)\s\\\n]*[\[\]\{\}\(\)\s\\\n]+$", # Mostly brackets and whitespace
251 r"^[\[\]\{\}\(\)\s\\\n]*[\[\]\{\}\(\)\s\\\n]*$", # All brackets and whitespace
252 r"^[\[\]\{\}\(\)\s\\\n]*[\[\]\{\}\(\)\s\\\n]*[\[\]\{\}\(\)\s\\\n]*$", # Excessive brackets/whitespace
253 r"}\s+\w+;\s*struct\s*\{", # Garbled anonymous extraction pattern like '} name; struct {'
254 ]
256 for pattern in suspicious_patterns:
257 if re.match(pattern, type_str):
258 return True
260 # Check for unbalanced brackets
261 if self._has_unbalanced_brackets(type_str):
262 return True
264 # Check for excessive newlines or backslashes
265 if type_str.count("\n") > 5 or type_str.count("\\") > 10:
266 return True
268 return False
270 def _is_suspicious_value(self, value: str) -> bool:
271 """Check if a value string looks suspicious"""
272 if not value or not value.strip():
273 return True
275 value = value.strip()
277 # Check for obvious parsing errors
278 suspicious_patterns = [
279 r"^[\[\]\{\}\(\)\s\\\n]+$", # Only brackets, spaces, backslashes, newlines
280 r"^[\[\]\{\}\(\)\s\\\n]*[\[\]\{\}\(\)\s\\\n]+$", # Mostly brackets and whitespace
281 ]
283 for pattern in suspicious_patterns:
284 if re.match(pattern, value):
285 return True
287 # Check for unbalanced brackets
288 if self._has_unbalanced_brackets(value):
289 return True
291 # Check for excessive newlines or backslashes
292 if value.count("\n") > 3 or value.count("\\") > 5:
293 return True
295 return False
297 def _has_unbalanced_brackets(self, text: str) -> bool:
298 """Check if text has unbalanced brackets"""
299 stack = []
300 bracket_pairs = {")": "(", "]": "[", "}": "{"}
302 for char in text:
303 if char in "([{":
304 stack.append(char)
305 elif char in ")]}":
306 if not stack or stack.pop() != bracket_pairs[char]:
307 return True
309 return bool(stack) # Unclosed brackets