Coverage for src/c2puml/models.py: 90%

251 statements  

« prev     ^ index     » next       coverage.py v7.10.4, created at 2025-08-20 03:53 +0000

1#!/usr/bin/env python3 

2""" 

3Data models for C to PlantUML converter 

4""" 

5 

6import json 

7from dataclasses import asdict, dataclass, field 

8from typing import Dict, List, Optional, Set 

9 

10 

11@dataclass 

12class Field: 

13 """Represents a field in a struct or global variable""" 

14 

15 name: str 

16 type: str 

17 value: Optional[str] = None 

18 

19 def __repr__(self): 

20 if self.value is not None: 

21 return f"Field(name={self.name}, type={self.type}, value={self.value})" 

22 return f"Field(name={self.name}, type={self.type})" 

23 

24 def __post_init__(self): 

25 """Validate field data after initialization""" 

26 if not isinstance(self.name, str): 

27 raise ValueError( 

28 f"Field name must be a string, got {type(self.name)}: {repr(self.name)}" 

29 ) 

30 if not self.type or not isinstance(self.type, str): 

31 raise ValueError( 

32 f"Field type must be a non-empty string, got {type(self.type)}: {repr(self.type)}" 

33 ) 

34 

35 # Additional validation: ensure name and type are not just whitespace 

36 if not self.name.strip(): 

37 raise ValueError( 

38 f"Field name cannot be empty or whitespace, got: {repr(self.name)}" 

39 ) 

40 if not self.type.strip(): 

41 raise ValueError( 

42 f"Field type cannot be empty or whitespace, got: {repr(self.type)}" 

43 ) 

44 

45 

46# TypedefRelation class removed - tag names moved to struct/enum/union 

47 

48 

49@dataclass 

50class IncludeRelation: 

51 """Represents an include relationship""" 

52 

53 source_file: str 

54 included_file: str 

55 depth: int 

56 

57 def __post_init__(self): 

58 """Validate include relation data after initialization""" 

59 if not self.source_file or not isinstance(self.source_file, str): 

60 raise ValueError("Source file must be a non-empty string") 

61 if not self.included_file or not isinstance(self.included_file, str): 

62 raise ValueError("Included file must be a non-empty string") 

63 if not isinstance(self.depth, int) or self.depth < 0: 

64 raise ValueError("Depth must be a non-negative integer") 

65 

66 

67@dataclass 

68class Function: 

69 """Represents a function""" 

70 

71 name: str 

72 return_type: str 

73 parameters: List[Field] = field(default_factory=list) 

74 is_static: bool = False 

75 is_declaration: bool = False 

76 is_inline: bool = False 

77 

78 def __post_init__(self): 

79 """Validate function data after initialization""" 

80 if not self.name or not isinstance(self.name, str): 

81 raise ValueError("Function name must be a non-empty string") 

82 if not self.return_type or not isinstance(self.return_type, str): 

83 raise ValueError("Function return type must be a non-empty string") 

84 

85 # Convert parameters to Field objects if they're dictionaries 

86 if self.parameters: 

87 converted_params = [] 

88 for param in self.parameters: 

89 if isinstance(param, dict): 

90 converted_params.append(Field(**param)) 

91 else: 

92 converted_params.append(param) 

93 self.parameters = converted_params 

94 

95 

96@dataclass 

97class Struct: 

98 """Represents a C struct""" 

99 

100 name: str 

101 fields: List[Field] = field(default_factory=list) 

102 methods: List[Function] = field(default_factory=list) 

103 tag_name: str = "" # Tag name for typedef structs 

104 uses: List[str] = field( 

105 default_factory=list 

106 ) # Non-primitive types used by this struct 

107 

108 def __post_init__(self): 

109 """Validate struct data after initialization""" 

110 if not self.name or not isinstance(self.name, str): 

111 raise ValueError("Struct name must be a non-empty string") 

112 

113 

114@dataclass 

115class EnumValue: 

116 name: str 

117 value: Optional[str] = None 

118 

119 def __post_init__(self): 

120 if not self.name or not isinstance(self.name, str): 

121 raise ValueError("Enum value name must be a non-empty string") 

122 

123 

124@dataclass 

125class Enum: 

126 """Represents a C enum""" 

127 

128 name: str 

129 values: List[EnumValue] = field(default_factory=list) 

130 tag_name: str = "" # Tag name for typedef enums 

131 

132 def __post_init__(self): 

133 if not self.name or not isinstance(self.name, str): 

134 raise ValueError("Enum name must be a non-empty string") 

135 # Convert any string values to EnumValue 

136 self.values = [ 

137 v if isinstance(v, EnumValue) else EnumValue(v) for v in self.values 

138 ] 

139 

140 

141@dataclass 

142class Union: 

143 """Represents a C union""" 

144 

145 name: str 

146 fields: List[Field] = field(default_factory=list) 

147 tag_name: str = "" # Tag name for typedef unions 

148 uses: List[str] = field( 

149 default_factory=list 

150 ) # Non-primitive types used by this union 

151 

152 def __post_init__(self): 

153 """Validate union data after initialization""" 

154 if not self.name or not isinstance(self.name, str): 

155 raise ValueError("Union name must be a non-empty string") 

156 

157 

158@dataclass 

159class Alias: 

160 """Represents a type alias (typedef)""" 

161 

162 name: str 

163 original_type: str 

164 uses: List[str] = field( 

165 default_factory=list 

166 ) # Non-primitive types used by this alias 

167 

168 def __post_init__(self): 

169 """Validate alias data after initialization""" 

170 if not self.name or not isinstance(self.name, str): 

171 raise ValueError("Alias name must be a non-empty string") 

172 if not self.original_type or not isinstance(self.original_type, str): 

173 raise ValueError("Original type must be a non-empty string") 

174 

175 

176@dataclass 

177class FileModel: 

178 """Represents a parsed C/C++ file""" 

179 

180 file_path: str 

181 

182 name: str = "" # Filename extracted from file_path 

183 structs: Dict[str, Struct] = field(default_factory=dict) 

184 enums: Dict[str, Enum] = field(default_factory=dict) 

185 functions: List[Function] = field(default_factory=list) 

186 globals: List[Field] = field(default_factory=list) 

187 includes: Set[str] = field(default_factory=set) 

188 macros: List[str] = field(default_factory=list) 

189 aliases: Dict[str, Alias] = field(default_factory=dict) 

190 unions: Dict[str, Union] = field(default_factory=dict) 

191 include_relations: List[IncludeRelation] = field(default_factory=list) 

192 anonymous_relationships: Dict[str, List[str]] = field(default_factory=dict) # parent -> [child1, child2, ...] 

193 placeholder_headers: Set[str] = field(default_factory=set) # Headers shown as empty (placeholders) in diagrams 

194 def __post_init__(self): 

195 """Extract filename from file_path if not provided""" 

196 if not self.name: 

197 from pathlib import Path 

198 

199 self.name = Path(self.file_path).name 

200 

201 def to_dict(self) -> dict: 

202 """Convert to dictionary for JSON serialization""" 

203 data = asdict(self) 

204 

205 # Convert set to list for JSON serialization and sort for consistency 

206 data["includes"] = sorted(list(self.includes)) 

207 # Serialize placeholder headers as sorted list 

208 data["placeholder_headers"] = sorted(list(self.placeholder_headers)) 

209 # Convert include_relations to list of dicts and sort for consistency 

210 data["include_relations"] = sorted( 

211 [asdict(rel) for rel in self.include_relations], 

212 key=lambda x: (x["source_file"], x["included_file"]), 

213 ) 

214 # Sort all dictionary fields for consistent ordering 

215 data["structs"] = dict(sorted(data["structs"].items())) 

216 data["enums"] = dict(sorted(data["enums"].items())) 

217 data["unions"] = dict(sorted(data["unions"].items())) 

218 data["aliases"] = dict(sorted(data["aliases"].items())) 

219 data["macros"] = sorted(data["macros"]) 

220 # Sort anonymous relationships for consistent ordering 

221 data["anonymous_relationships"] = {k: sorted(v) for k, v in sorted(data["anonymous_relationships"].items())} 

222 # Sort functions and globals by name (they are already objects, not dicts) 

223 data["functions"] = sorted(self.functions, key=lambda x: x.name) 

224 data["globals"] = sorted(self.globals, key=lambda x: x.name) 

225 # Convert functions and globals to dicts after sorting 

226 data["functions"] = [asdict(f) for f in data["functions"]] 

227 data["globals"] = [asdict(g) for g in data["globals"]] 

228 

229 # Sort "uses" arrays in structs, unions, and aliases for consistent ordering 

230 for struct_data in data["structs"].values(): 

231 if "uses" in struct_data: 

232 struct_data["uses"] = sorted(struct_data["uses"]) 

233 for union_data in data["unions"].values(): 

234 if "uses" in union_data: 

235 union_data["uses"] = sorted(union_data["uses"]) 

236 for alias_data in data["aliases"].values(): 

237 if "uses" in alias_data: 

238 alias_data["uses"] = sorted(alias_data["uses"]) 

239 

240 # Tag names are now stored in struct/enum/union objects 

241 return data 

242 

243 @classmethod 

244 def from_dict(cls, data: dict) -> "FileModel": 

245 """Create from dictionary""" 

246 # Convert list back to set 

247 includes = set(data.get("includes", [])) 

248 

249 # Convert globals back to Field objects 

250 globals_data = data.get("globals", []) 

251 globals = [Field(**g) if isinstance(g, dict) else g for g in globals_data] 

252 

253 # Convert functions back to Function objects 

254 functions_data = data.get("functions", []) 

255 functions = [ 

256 Function(**f) if isinstance(f, dict) else f for f in functions_data 

257 ] 

258 

259 # Convert structs back to Struct objects 

260 structs_data = data.get("structs", {}) 

261 structs = {} 

262 for name, struct_data in structs_data.items(): 

263 if isinstance(struct_data, dict): 

264 fields = [ 

265 Field(**field) if isinstance(field, dict) else field 

266 for field in struct_data.get("fields", []) 

267 ] 

268 methods = [ 

269 Function(**method) if isinstance(method, dict) else method 

270 for method in struct_data.get("methods", []) 

271 ] 

272 structs[name] = Struct( 

273 name=struct_data.get("name", name), 

274 fields=fields, 

275 methods=methods, 

276 tag_name=struct_data.get("tag_name", ""), 

277 uses=struct_data.get("uses", []), 

278 ) 

279 else: 

280 structs[name] = struct_data 

281 

282 # Convert enums back to Enum objects 

283 enums_data = data.get("enums", {}) 

284 enums = {} 

285 for name, enum_data in enums_data.items(): 

286 if isinstance(enum_data, dict): 

287 values = [ 

288 EnumValue(**val) if isinstance(val, dict) else EnumValue(val) 

289 for val in enum_data.get("values", []) 

290 ] 

291 enums[name] = Enum(name=enum_data.get("name", name), values=values) 

292 else: 

293 enums[name] = enum_data 

294 

295 # Tag names are now stored in struct/enum/union objects 

296 

297 # Convert include_relations back to IncludeRelation objects 

298 include_relations_data = data.get("include_relations", []) 

299 include_relations = [ 

300 IncludeRelation(**rel) if isinstance(rel, dict) else rel 

301 for rel in include_relations_data 

302 ] 

303 

304 # Convert unions back to Union objects 

305 unions_data = data.get("unions", {}) 

306 unions = {} 

307 for name, union_data in unions_data.items(): 

308 if isinstance(union_data, dict): 

309 fields = [ 

310 Field(**field) if isinstance(field, dict) else field 

311 for field in union_data.get("fields", []) 

312 ] 

313 unions[name] = Union( 

314 name=union_data.get("name", name), 

315 fields=fields, 

316 tag_name=union_data.get("tag_name", ""), 

317 uses=union_data.get("uses", []), 

318 ) 

319 else: 

320 unions[name] = union_data 

321 

322 # Convert aliases back to Alias objects 

323 aliases_data = data.get("aliases", {}) 

324 aliases = {} 

325 for name, alias_data in aliases_data.items(): 

326 if isinstance(alias_data, dict): 

327 aliases[name] = Alias( 

328 name=alias_data.get("name", name), 

329 original_type=alias_data.get("original_type", ""), 

330 uses=alias_data.get("uses", []), 

331 ) 

332 else: 

333 # Handle legacy format where aliases was Dict[str, str] 

334 aliases[name] = Alias(name=name, original_type=alias_data, uses=[]) 

335 

336 # Create new data dict with converted objects 

337 new_data = data.copy() 

338 new_data["includes"] = includes 

339 new_data["globals"] = globals 

340 new_data["functions"] = functions 

341 new_data["structs"] = structs 

342 new_data["enums"] = enums 

343 new_data["unions"] = unions 

344 new_data["aliases"] = aliases 

345 new_data["include_relations"] = include_relations 

346 # Load placeholder headers (if present) 

347 new_data["placeholder_headers"] = set(data.get("placeholder_headers", [])) 

348 

349 return cls(**new_data) 

350 

351 

352 

353 

354@dataclass 

355class ProjectModel: 

356 """Represents a complete C/C++ project""" 

357 

358 project_name: str 

359 source_folder: str 

360 files: Dict[str, FileModel] = field(default_factory=dict) 

361 

362 def __post_init__(self): 

363 """Validate project model data after initialization""" 

364 if not self.project_name or not isinstance(self.project_name, str): 

365 raise ValueError("Project name must be a non-empty string") 

366 if not self.source_folder or not isinstance(self.source_folder, str): 

367 raise ValueError("Source folder must be a non-empty string") 

368 

369 def save(self, file_path: str) -> None: 

370 """Save model to JSON file""" 

371 data = { 

372 "project_name": self.project_name, 

373 "source_folder": self.source_folder, 

374 "files": { 

375 path: file_model.to_dict() 

376 for path, file_model in sorted(self.files.items()) 

377 }, 

378 } 

379 

380 try: 

381 with open(file_path, "w", encoding="utf-8") as f: 

382 json.dump(data, f, indent=2, ensure_ascii=False, sort_keys=True) 

383 except Exception as e: 

384 raise ValueError(f"Failed to save model to {file_path}: {e}") from e 

385 

386 @classmethod 

387 def from_dict(cls, data: dict) -> "ProjectModel": 

388 """Create from dictionary""" 

389 files = { 

390 path: FileModel.from_dict(file_data) 

391 for path, file_data in data.get("files", {}).items() 

392 } 

393 

394 return cls( 

395 project_name=data.get("project_name", "Unknown"), 

396 source_folder=data.get("source_folder", data.get("project_root", "")), 

397 files=files, 

398 ) 

399 

400 @classmethod 

401 def load(cls, file_path: str) -> "ProjectModel": 

402 """Load model from JSON file""" 

403 try: 

404 with open(file_path, "r", encoding="utf-8") as f: 

405 data = json.load(f) 

406 return cls.from_dict(data) 

407 except Exception as e: 

408 raise ValueError(f"Failed to load model from {file_path}: {e}") from e 

409 

410 

411 

412 def update_uses_fields(self): 

413 """Update all uses fields across the entire project model""" 

414 # Collect all available type names from the entire project 

415 available_types = set() 

416 for file_model in self.files.values(): 

417 available_types.update(file_model.structs.keys()) 

418 available_types.update(file_model.enums.keys()) 

419 available_types.update(file_model.unions.keys()) 

420 available_types.update(file_model.aliases.keys()) 

421 

422 # Update uses fields for all structures in all files 

423 for file_model in self.files.values(): 

424 # Update struct uses 

425 for struct in file_model.structs.values(): 

426 filtered_uses = [] 

427 for struct_field in struct.fields: 

428 field_uses = self._extract_non_primitive_types( 

429 struct_field.type, available_types 

430 ) 

431 filtered_uses.extend(field_uses) 

432 struct.uses = list(set(filtered_uses)) 

433 

434 # Update union uses 

435 for union in file_model.unions.values(): 

436 filtered_uses = [] 

437 for union_field in union.fields: 

438 field_uses = self._extract_non_primitive_types( 

439 union_field.type, available_types 

440 ) 

441 filtered_uses.extend(field_uses) 

442 union.uses = list(set(filtered_uses)) 

443 

444 # Update alias uses 

445 for alias in file_model.aliases.values(): 

446 alias.uses = self._extract_non_primitive_types( 

447 alias.original_type, available_types 

448 ) 

449 # Remove the alias name from its own uses list 

450 if alias.name in alias.uses: 

451 alias.uses.remove(alias.name) 

452 

453 def _extract_non_primitive_types(self, type_str: str, available_types: set) -> list: 

454 """Extract non-primitive type names from a type string that exist in available_types""" 

455 # Define primitive types 

456 primitive_types = { 

457 "void", 

458 "char", 

459 "short", 

460 "int", 

461 "long", 

462 "float", 

463 "double", 

464 "signed", 

465 "unsigned", 

466 "const", 

467 "volatile", 

468 "static", 

469 "extern", 

470 "auto", 

471 "register", 

472 "inline", 

473 "restrict", 

474 "size_t", 

475 "ptrdiff_t", 

476 "int8_t", 

477 "int16_t", 

478 "int32_t", 

479 "int64_t", 

480 "uint8_t", 

481 "uint16_t", 

482 "uint32_t", 

483 "uint64_t", 

484 "intptr_t", 

485 "uintptr_t", 

486 "bool", 

487 "true", 

488 "false", 

489 "NULL", 

490 "nullptr", 

491 } 

492 

493 # Remove common C keywords and operators 

494 import re 

495 

496 # Split by common delimiters and operators 

497 parts = re.split(r"[\[\]\(\)\{\}\s\*&,;]", type_str) 

498 

499 # Extract potential type names that exist in available_types 

500 types = [] 

501 for part in parts: 

502 part = part.strip() 

503 if part and len(part) > 1 and part not in primitive_types: 

504 # Check if it looks like a type name (starts with letter, contains letters/numbers/underscores) 

505 if re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", part): 

506 # Only include if it exists in available_types 

507 if part in available_types: 

508 types.append(part) 

509 

510 return list(set(types)) # Remove duplicates