Coverage for src/c2puml/core/verifier.py: 78%

1#!/usr/bin/env python3

2"""

3Verification module for C to PlantUML converter

5Performs sanity checks on the parsed model to ensure values make sense for C code.

6"""

8import logging

9import re

10from typing import List, Tuple

12from ..models import Alias, Enum, Field, FileModel, Function, ProjectModel, Struct, Union

15class ModelVerifier:

16 """Verifies the sanity of parsed C code model"""

18 def __init__(self):

19 self.logger = logging.getLogger(__name__)

20 self.issues = []

22 def verify_model(self, model: ProjectModel) -> Tuple[bool, List[str]]:

23 """

24 Verify the sanity of the entire model

26 Args:

27 model: The ProjectModel to verify

29 Returns:

30 Tuple of (is_valid, list_of_issues)

31 """

32 self.issues = []

34 # Verify project-level data

35 self._verify_project_data(model)

37 # New invariants: filenames as keys and include_relations ownership

38 self._verify_filename_keys_and_relations(model)

40 # Verify each file

41 for file_path, file_model in model.files.items():

42 self._verify_file(file_path, file_model)

44 is_valid = not self.issues

46 if self.issues:

47 self.logger.warning("Model verification found %d issues:", len(self.issues))

48 for issue in self.issues:

49 self.logger.warning(" - %s", issue)

50 else:

51 self.logger.info("Model verification passed - all values look sane")

53 return is_valid, self.issues

55 def _verify_project_data(self, model: ProjectModel) -> None:

56 """Verify project-level data"""

57 if not model.project_name or not model.project_name.strip():

58 self.issues.append("Project name is empty or whitespace")

60 if not model.source_folder or not model.source_folder.strip():

61 self.issues.append("Source folder is empty or whitespace")

63 if not model.files:

64 self.issues.append("No files found in project")

66 def _verify_file(self, file_path: str, file_model: FileModel) -> None:

67 """Verify a single file model"""

68 # Verify file-level data

69 if not file_model.file_path or not file_model.file_path.strip():

70 self.issues.append(f"File path is empty in {file_model.name}")

72 if not file_model.name or not file_model.name.strip():

73 self.issues.append(f"File name is empty in {file_path}")

75 # Anonymous extraction sanity: detect duplicates per parent and garbled content

76 if file_model.anonymous_relationships:

77 for parent, children in file_model.anonymous_relationships.items():

78 # Duplicates under same parent

79 seen = set()

80 for child in children:

81 key = (parent, child)

82 if child in seen:

83 self.issues.append(

84 f"Duplicate extracted anonymous entity '{child}' for parent '{parent}' in {file_path}"

85 )

86 seen.add(child)

88 # Verify structs

89 for struct_name, struct in file_model.structs.items():

90 self._verify_struct(file_path, struct_name, struct)

92 # Verify enums

93 for enum_name, enum in file_model.enums.items():

94 self._verify_enum(file_path, enum_name, enum)

96 # Verify unions

97 for union_name, union in file_model.unions.items():

98 self._verify_union(file_path, union_name, union)

100 # Verify functions

101 for function in file_model.functions:

102 self._verify_function(file_path, function)

103

104 # Verify globals

105 for global_var in file_model.globals:

106 self._verify_global(file_path, global_var)

107

108 # Verify aliases

109 for alias_name, alias in file_model.aliases.items():

110 self._verify_alias(file_path, alias_name, alias)

111

112 def _verify_struct(self, file_path: str, struct_name: str, struct: Struct) -> None:

113 """Verify a struct definition"""

114 if not self._is_valid_identifier(struct_name):

115 self.issues.append(f"Invalid struct name '{struct_name}' in {file_path}")

116

117 if not struct.name or not struct.name.strip():

118 self.issues.append(f"Struct name is empty in {file_path}")

119

120 # Verify fields

121 for field in struct.fields:

122 self._verify_field(file_path, f"struct {struct_name}", field)

123

124 def _verify_enum(self, file_path: str, enum_name: str, enum: Enum) -> None:

125 """Verify an enum definition"""

126 if not self._is_valid_identifier(enum_name):

127 self.issues.append(f"Invalid enum name '{enum_name}' in {file_path}")

128

129 if not enum.name or not enum.name.strip():

130 self.issues.append(f"Enum name is empty in {file_path}")

131

132 # Verify enum values

133 for enum_value in enum.values:

134 if not enum_value.name or not enum_value.name.strip():

135 self.issues.append(

136 f"Enum value name is empty in enum {enum_name} in {file_path}"

137 )

138 elif not self._is_valid_identifier(enum_value.name):

139 self.issues.append(

140 f"Invalid enum value name '{enum_value.name}' in enum {enum_name} in {file_path}"

141 )

142

143 def _verify_union(self, file_path: str, union_name: str, union: Union) -> None:

144 """Verify a union definition"""

145 if not self._is_valid_identifier(union_name):

146 self.issues.append(f"Invalid union name '{union_name}' in {file_path}")

147

148 if not union.name or not union.name.strip():

149 self.issues.append(f"Union name is empty in {file_path}")

150

151 # Verify fields

152 for field in union.fields:

153 self._verify_field(file_path, f"union {union_name}", field)

154

155 def _verify_function(self, file_path: str, function: Function) -> None:

156 """Verify a function definition"""

157 if not function.name or not function.name.strip():

158 self.issues.append(f"Function name is empty in {file_path}")

159 elif not self._is_valid_identifier(function.name):

160 self.issues.append(

161 f"Invalid function name '{function.name}' in {file_path}"

162 )

163

164 if not function.return_type or not function.return_type.strip():

165 self.issues.append(

166 f"Function return type is empty for '{function.name}' in {file_path}"

167 )

168

169 # Verify parameters (skip variadic parameter '...')

170 for param in function.parameters:

171 if param.name == "...": # Skip variadic parameter

172 continue

173 self._verify_field(file_path, f"function {function.name}", param)

174

175 def _verify_global(self, file_path: str, global_var: Field) -> None:

176 """Verify a global variable"""

177 self._verify_field(file_path, "global", global_var)

178

179 def _verify_alias(self, file_path: str, alias_name: str, alias: Alias) -> None:

180 """Verify a type alias (typedef)"""

181 if not self._is_valid_identifier(alias_name):

182 self.issues.append(f"Invalid alias name '{alias_name}' in {file_path}")

183

184 if not alias.name or not alias.name.strip():

185 self.issues.append(f"Alias name is empty in {file_path}")

186

187 if not alias.original_type or not alias.original_type.strip():

188 self.issues.append(

189 f"Alias original type is empty for '{alias_name}' in {file_path}"

190 )

191

192 def _verify_filename_keys_and_relations(self, model: ProjectModel) -> None:

193 """Check filename-key invariant and include_relations placement."""

194 for key, fm in model.files.items():

195 # Keys should be filenames (equal to FileModel.name)

196 if key != fm.name:

197 self.issues.append(

198 f"Model.files key '{key}' does not match FileModel.name '{fm.name}'"

199 )

200 # Only .c files should carry include_relations; others must be empty

201 if not fm.name.endswith(".c") and fm.include_relations:

202 self.issues.append(

203 f"Header/non-C file '{fm.name}' has include_relations; expected empty"

204 )

205

206 def _verify_field(self, file_path: str, context: str, field: Field) -> None:

207 """Verify a field (struct field, function parameter, global variable)"""

208 # Check for invalid names

209 if not field.name or not field.name.strip():

210 self.issues.append(f"Field name is empty in {context} in {file_path}")

211 elif not self._is_valid_identifier(field.name):

212 self.issues.append(

213 f"Invalid field name '{field.name}' in {context} in {file_path}"

214 )

215

216 # Check for invalid types

217 if not field.type or not field.type.strip():

218 self.issues.append(

219 f"Field type is empty for '{field.name}' in {context} in {file_path}"

220 )

221 elif self._is_suspicious_type(field.type):

222 self.issues.append(

223 f"Suspicious field type '{field.type}' for '{field.name}' in {context} in {file_path}"

224 )

225

226 # Check for suspicious values

227 if field.value and self._is_suspicious_value(field.value):

228 self.issues.append(

229 f"Suspicious field value '{field.value}' for '{field.name}' in {context} in {file_path}"

230 )

231

232 def _is_valid_identifier(self, name: str) -> bool:

233 """Check if a name is a valid C identifier"""

234 if not name or not name.strip():

235 return False

236

237 # C identifier rules: start with letter or underscore, then letters, digits, or underscores

238 return bool(re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", name.strip()))

239

240 def _is_suspicious_type(self, type_str: str) -> bool:

241 """Check if a type string looks suspicious"""

242 if not type_str or not type_str.strip():

243 return True

244

245 type_str = type_str.strip()

246

247 # Check for obvious parsing errors

248 suspicious_patterns = [

249 r"^[\[\]\{\}\s\\\n]+$", # Only brackets, spaces, backslashes, newlines

250 r"^[\[\]\{\}\s\\\n]*[\[\]\{\}\s\\\n]+$", # Mostly brackets and whitespace

251 r"^[\[\]\{\}\s\\\n]*[\[\]\{\}\s\\\n]*$", # All brackets and whitespace

252 r"^[\[\]\{\}\s\\\n]*[\[\]\{\}\s\\\n]*[\[\]\{\}\s\\\n]*$", # Excessive brackets/whitespace

253 r"}\s+\w+;\s*struct\s*\{", # Garbled anonymous extraction pattern like '} name; struct {'

254 ]

255

256 for pattern in suspicious_patterns:

257 if re.match(pattern, type_str):

258 return True

259

260 # Check for unbalanced brackets

261 if self._has_unbalanced_brackets(type_str):

262 return True

263

264 # Check for excessive newlines or backslashes

265 if type_str.count("\n") > 5 or type_str.count("\\") > 10:

266 return True

267

268 return False

269

270 def _is_suspicious_value(self, value: str) -> bool:

271 """Check if a value string looks suspicious"""

272 if not value or not value.strip():

273 return True

274

275 value = value.strip()

276

277 # Check for obvious parsing errors

278 suspicious_patterns = [

279 r"^[\[\]\{\}\s\\\n]+$", # Only brackets, spaces, backslashes, newlines

280 r"^[\[\]\{\}\s\\\n]*[\[\]\{\}\s\\\n]+$", # Mostly brackets and whitespace

281 ]

282

283 for pattern in suspicious_patterns:

284 if re.match(pattern, value):

285 return True

286

287 # Check for unbalanced brackets

288 if self._has_unbalanced_brackets(value):

289 return True

290

291 # Check for excessive newlines or backslashes

292 if value.count("\n") > 3 or value.count("\\") > 5:

293 return True

294

295 return False

296

297 def _has_unbalanced_brackets(self, text: str) -> bool:

298 """Check if text has unbalanced brackets"""

299 stack = []

300 bracket_pairs = {")": "(", "]": "[", "}": "{"}

301

302 for char in text:

303 if char in "([{":

304 stack.append(char)

305 elif char in ")]}":

306 if not stack or stack.pop() != bracket_pairs[char]:

307 return True

308

309 return bool(stack) # Unclosed brackets