Coverage for src/c2puml/core/verifier.py: 78%

154 statements  

« prev     ^ index     » next       coverage.py v7.10.4, created at 2025-08-20 03:53 +0000

1#!/usr/bin/env python3 

2""" 

3Verification module for C to PlantUML converter 

4 

5Performs sanity checks on the parsed model to ensure values make sense for C code. 

6""" 

7 

8import logging 

9import re 

10from typing import List, Tuple 

11 

12from ..models import Alias, Enum, Field, FileModel, Function, ProjectModel, Struct, Union 

13 

14 

15class ModelVerifier: 

16 """Verifies the sanity of parsed C code model""" 

17 

18 def __init__(self): 

19 self.logger = logging.getLogger(__name__) 

20 self.issues = [] 

21 

22 def verify_model(self, model: ProjectModel) -> Tuple[bool, List[str]]: 

23 """ 

24 Verify the sanity of the entire model 

25 

26 Args: 

27 model: The ProjectModel to verify 

28 

29 Returns: 

30 Tuple of (is_valid, list_of_issues) 

31 """ 

32 self.issues = [] 

33 

34 # Verify project-level data 

35 self._verify_project_data(model) 

36 

37 # New invariants: filenames as keys and include_relations ownership 

38 self._verify_filename_keys_and_relations(model) 

39 

40 # Verify each file 

41 for file_path, file_model in model.files.items(): 

42 self._verify_file(file_path, file_model) 

43 

44 is_valid = not self.issues 

45 

46 if self.issues: 

47 self.logger.warning("Model verification found %d issues:", len(self.issues)) 

48 for issue in self.issues: 

49 self.logger.warning(" - %s", issue) 

50 else: 

51 self.logger.info("Model verification passed - all values look sane") 

52 

53 return is_valid, self.issues 

54 

55 def _verify_project_data(self, model: ProjectModel) -> None: 

56 """Verify project-level data""" 

57 if not model.project_name or not model.project_name.strip(): 

58 self.issues.append("Project name is empty or whitespace") 

59 

60 if not model.source_folder or not model.source_folder.strip(): 

61 self.issues.append("Source folder is empty or whitespace") 

62 

63 if not model.files: 

64 self.issues.append("No files found in project") 

65 

66 def _verify_file(self, file_path: str, file_model: FileModel) -> None: 

67 """Verify a single file model""" 

68 # Verify file-level data 

69 if not file_model.file_path or not file_model.file_path.strip(): 

70 self.issues.append(f"File path is empty in {file_model.name}") 

71 

72 if not file_model.name or not file_model.name.strip(): 

73 self.issues.append(f"File name is empty in {file_path}") 

74 

75 # Anonymous extraction sanity: detect duplicates per parent and garbled content 

76 if file_model.anonymous_relationships: 

77 for parent, children in file_model.anonymous_relationships.items(): 

78 # Duplicates under same parent 

79 seen = set() 

80 for child in children: 

81 key = (parent, child) 

82 if child in seen: 

83 self.issues.append( 

84 f"Duplicate extracted anonymous entity '{child}' for parent '{parent}' in {file_path}" 

85 ) 

86 seen.add(child) 

87 

88 # Verify structs 

89 for struct_name, struct in file_model.structs.items(): 

90 self._verify_struct(file_path, struct_name, struct) 

91 

92 # Verify enums 

93 for enum_name, enum in file_model.enums.items(): 

94 self._verify_enum(file_path, enum_name, enum) 

95 

96 # Verify unions 

97 for union_name, union in file_model.unions.items(): 

98 self._verify_union(file_path, union_name, union) 

99 

100 # Verify functions 

101 for function in file_model.functions: 

102 self._verify_function(file_path, function) 

103 

104 # Verify globals 

105 for global_var in file_model.globals: 

106 self._verify_global(file_path, global_var) 

107 

108 # Verify aliases 

109 for alias_name, alias in file_model.aliases.items(): 

110 self._verify_alias(file_path, alias_name, alias) 

111 

112 def _verify_struct(self, file_path: str, struct_name: str, struct: Struct) -> None: 

113 """Verify a struct definition""" 

114 if not self._is_valid_identifier(struct_name): 

115 self.issues.append(f"Invalid struct name '{struct_name}' in {file_path}") 

116 

117 if not struct.name or not struct.name.strip(): 

118 self.issues.append(f"Struct name is empty in {file_path}") 

119 

120 # Verify fields 

121 for field in struct.fields: 

122 self._verify_field(file_path, f"struct {struct_name}", field) 

123 

124 def _verify_enum(self, file_path: str, enum_name: str, enum: Enum) -> None: 

125 """Verify an enum definition""" 

126 if not self._is_valid_identifier(enum_name): 

127 self.issues.append(f"Invalid enum name '{enum_name}' in {file_path}") 

128 

129 if not enum.name or not enum.name.strip(): 

130 self.issues.append(f"Enum name is empty in {file_path}") 

131 

132 # Verify enum values 

133 for enum_value in enum.values: 

134 if not enum_value.name or not enum_value.name.strip(): 

135 self.issues.append( 

136 f"Enum value name is empty in enum {enum_name} in {file_path}" 

137 ) 

138 elif not self._is_valid_identifier(enum_value.name): 

139 self.issues.append( 

140 f"Invalid enum value name '{enum_value.name}' in enum {enum_name} in {file_path}" 

141 ) 

142 

143 def _verify_union(self, file_path: str, union_name: str, union: Union) -> None: 

144 """Verify a union definition""" 

145 if not self._is_valid_identifier(union_name): 

146 self.issues.append(f"Invalid union name '{union_name}' in {file_path}") 

147 

148 if not union.name or not union.name.strip(): 

149 self.issues.append(f"Union name is empty in {file_path}") 

150 

151 # Verify fields 

152 for field in union.fields: 

153 self._verify_field(file_path, f"union {union_name}", field) 

154 

155 def _verify_function(self, file_path: str, function: Function) -> None: 

156 """Verify a function definition""" 

157 if not function.name or not function.name.strip(): 

158 self.issues.append(f"Function name is empty in {file_path}") 

159 elif not self._is_valid_identifier(function.name): 

160 self.issues.append( 

161 f"Invalid function name '{function.name}' in {file_path}" 

162 ) 

163 

164 if not function.return_type or not function.return_type.strip(): 

165 self.issues.append( 

166 f"Function return type is empty for '{function.name}' in {file_path}" 

167 ) 

168 

169 # Verify parameters (skip variadic parameter '...') 

170 for param in function.parameters: 

171 if param.name == "...": # Skip variadic parameter 

172 continue 

173 self._verify_field(file_path, f"function {function.name}", param) 

174 

175 def _verify_global(self, file_path: str, global_var: Field) -> None: 

176 """Verify a global variable""" 

177 self._verify_field(file_path, "global", global_var) 

178 

179 def _verify_alias(self, file_path: str, alias_name: str, alias: Alias) -> None: 

180 """Verify a type alias (typedef)""" 

181 if not self._is_valid_identifier(alias_name): 

182 self.issues.append(f"Invalid alias name '{alias_name}' in {file_path}") 

183 

184 if not alias.name or not alias.name.strip(): 

185 self.issues.append(f"Alias name is empty in {file_path}") 

186 

187 if not alias.original_type or not alias.original_type.strip(): 

188 self.issues.append( 

189 f"Alias original type is empty for '{alias_name}' in {file_path}" 

190 ) 

191 

192 def _verify_filename_keys_and_relations(self, model: ProjectModel) -> None: 

193 """Check filename-key invariant and include_relations placement.""" 

194 for key, fm in model.files.items(): 

195 # Keys should be filenames (equal to FileModel.name) 

196 if key != fm.name: 

197 self.issues.append( 

198 f"Model.files key '{key}' does not match FileModel.name '{fm.name}'" 

199 ) 

200 # Only .c files should carry include_relations; others must be empty 

201 if not fm.name.endswith(".c") and fm.include_relations: 

202 self.issues.append( 

203 f"Header/non-C file '{fm.name}' has include_relations; expected empty" 

204 ) 

205 

206 def _verify_field(self, file_path: str, context: str, field: Field) -> None: 

207 """Verify a field (struct field, function parameter, global variable)""" 

208 # Check for invalid names 

209 if not field.name or not field.name.strip(): 

210 self.issues.append(f"Field name is empty in {context} in {file_path}") 

211 elif not self._is_valid_identifier(field.name): 

212 self.issues.append( 

213 f"Invalid field name '{field.name}' in {context} in {file_path}" 

214 ) 

215 

216 # Check for invalid types 

217 if not field.type or not field.type.strip(): 

218 self.issues.append( 

219 f"Field type is empty for '{field.name}' in {context} in {file_path}" 

220 ) 

221 elif self._is_suspicious_type(field.type): 

222 self.issues.append( 

223 f"Suspicious field type '{field.type}' for '{field.name}' in {context} in {file_path}" 

224 ) 

225 

226 # Check for suspicious values 

227 if field.value and self._is_suspicious_value(field.value): 

228 self.issues.append( 

229 f"Suspicious field value '{field.value}' for '{field.name}' in {context} in {file_path}" 

230 ) 

231 

232 def _is_valid_identifier(self, name: str) -> bool: 

233 """Check if a name is a valid C identifier""" 

234 if not name or not name.strip(): 

235 return False 

236 

237 # C identifier rules: start with letter or underscore, then letters, digits, or underscores 

238 return bool(re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", name.strip())) 

239 

240 def _is_suspicious_type(self, type_str: str) -> bool: 

241 """Check if a type string looks suspicious""" 

242 if not type_str or not type_str.strip(): 

243 return True 

244 

245 type_str = type_str.strip() 

246 

247 # Check for obvious parsing errors 

248 suspicious_patterns = [ 

249 r"^[\[\]\{\}\(\)\s\\\n]+$", # Only brackets, spaces, backslashes, newlines 

250 r"^[\[\]\{\}\(\)\s\\\n]*[\[\]\{\}\(\)\s\\\n]+$", # Mostly brackets and whitespace 

251 r"^[\[\]\{\}\(\)\s\\\n]*[\[\]\{\}\(\)\s\\\n]*$", # All brackets and whitespace 

252 r"^[\[\]\{\}\(\)\s\\\n]*[\[\]\{\}\(\)\s\\\n]*[\[\]\{\}\(\)\s\\\n]*$", # Excessive brackets/whitespace 

253 r"}\s+\w+;\s*struct\s*\{", # Garbled anonymous extraction pattern like '} name; struct {' 

254 ] 

255 

256 for pattern in suspicious_patterns: 

257 if re.match(pattern, type_str): 

258 return True 

259 

260 # Check for unbalanced brackets 

261 if self._has_unbalanced_brackets(type_str): 

262 return True 

263 

264 # Check for excessive newlines or backslashes 

265 if type_str.count("\n") > 5 or type_str.count("\\") > 10: 

266 return True 

267 

268 return False 

269 

270 def _is_suspicious_value(self, value: str) -> bool: 

271 """Check if a value string looks suspicious""" 

272 if not value or not value.strip(): 

273 return True 

274 

275 value = value.strip() 

276 

277 # Check for obvious parsing errors 

278 suspicious_patterns = [ 

279 r"^[\[\]\{\}\(\)\s\\\n]+$", # Only brackets, spaces, backslashes, newlines 

280 r"^[\[\]\{\}\(\)\s\\\n]*[\[\]\{\}\(\)\s\\\n]+$", # Mostly brackets and whitespace 

281 ] 

282 

283 for pattern in suspicious_patterns: 

284 if re.match(pattern, value): 

285 return True 

286 

287 # Check for unbalanced brackets 

288 if self._has_unbalanced_brackets(value): 

289 return True 

290 

291 # Check for excessive newlines or backslashes 

292 if value.count("\n") > 3 or value.count("\\") > 5: 

293 return True 

294 

295 return False 

296 

297 def _has_unbalanced_brackets(self, text: str) -> bool: 

298 """Check if text has unbalanced brackets""" 

299 stack = [] 

300 bracket_pairs = {")": "(", "]": "[", "}": "{"} 

301 

302 for char in text: 

303 if char in "([{": 

304 stack.append(char) 

305 elif char in ")]}": 

306 if not stack or stack.pop() != bracket_pairs[char]: 

307 return True 

308 

309 return bool(stack) # Unclosed brackets