Coverage for src/c2puml/models.py: 90%
251 statements
« prev ^ index » next coverage.py v7.10.4, created at 2025-08-20 03:53 +0000
« prev ^ index » next coverage.py v7.10.4, created at 2025-08-20 03:53 +0000
1#!/usr/bin/env python3
2"""
3Data models for C to PlantUML converter
4"""
6import json
7from dataclasses import asdict, dataclass, field
8from typing import Dict, List, Optional, Set
11@dataclass
12class Field:
13 """Represents a field in a struct or global variable"""
15 name: str
16 type: str
17 value: Optional[str] = None
19 def __repr__(self):
20 if self.value is not None:
21 return f"Field(name={self.name}, type={self.type}, value={self.value})"
22 return f"Field(name={self.name}, type={self.type})"
24 def __post_init__(self):
25 """Validate field data after initialization"""
26 if not isinstance(self.name, str):
27 raise ValueError(
28 f"Field name must be a string, got {type(self.name)}: {repr(self.name)}"
29 )
30 if not self.type or not isinstance(self.type, str):
31 raise ValueError(
32 f"Field type must be a non-empty string, got {type(self.type)}: {repr(self.type)}"
33 )
35 # Additional validation: ensure name and type are not just whitespace
36 if not self.name.strip():
37 raise ValueError(
38 f"Field name cannot be empty or whitespace, got: {repr(self.name)}"
39 )
40 if not self.type.strip():
41 raise ValueError(
42 f"Field type cannot be empty or whitespace, got: {repr(self.type)}"
43 )
46# TypedefRelation class removed - tag names moved to struct/enum/union
49@dataclass
50class IncludeRelation:
51 """Represents an include relationship"""
53 source_file: str
54 included_file: str
55 depth: int
57 def __post_init__(self):
58 """Validate include relation data after initialization"""
59 if not self.source_file or not isinstance(self.source_file, str):
60 raise ValueError("Source file must be a non-empty string")
61 if not self.included_file or not isinstance(self.included_file, str):
62 raise ValueError("Included file must be a non-empty string")
63 if not isinstance(self.depth, int) or self.depth < 0:
64 raise ValueError("Depth must be a non-negative integer")
67@dataclass
68class Function:
69 """Represents a function"""
71 name: str
72 return_type: str
73 parameters: List[Field] = field(default_factory=list)
74 is_static: bool = False
75 is_declaration: bool = False
76 is_inline: bool = False
78 def __post_init__(self):
79 """Validate function data after initialization"""
80 if not self.name or not isinstance(self.name, str):
81 raise ValueError("Function name must be a non-empty string")
82 if not self.return_type or not isinstance(self.return_type, str):
83 raise ValueError("Function return type must be a non-empty string")
85 # Convert parameters to Field objects if they're dictionaries
86 if self.parameters:
87 converted_params = []
88 for param in self.parameters:
89 if isinstance(param, dict):
90 converted_params.append(Field(**param))
91 else:
92 converted_params.append(param)
93 self.parameters = converted_params
96@dataclass
97class Struct:
98 """Represents a C struct"""
100 name: str
101 fields: List[Field] = field(default_factory=list)
102 methods: List[Function] = field(default_factory=list)
103 tag_name: str = "" # Tag name for typedef structs
104 uses: List[str] = field(
105 default_factory=list
106 ) # Non-primitive types used by this struct
108 def __post_init__(self):
109 """Validate struct data after initialization"""
110 if not self.name or not isinstance(self.name, str):
111 raise ValueError("Struct name must be a non-empty string")
114@dataclass
115class EnumValue:
116 name: str
117 value: Optional[str] = None
119 def __post_init__(self):
120 if not self.name or not isinstance(self.name, str):
121 raise ValueError("Enum value name must be a non-empty string")
124@dataclass
125class Enum:
126 """Represents a C enum"""
128 name: str
129 values: List[EnumValue] = field(default_factory=list)
130 tag_name: str = "" # Tag name for typedef enums
132 def __post_init__(self):
133 if not self.name or not isinstance(self.name, str):
134 raise ValueError("Enum name must be a non-empty string")
135 # Convert any string values to EnumValue
136 self.values = [
137 v if isinstance(v, EnumValue) else EnumValue(v) for v in self.values
138 ]
141@dataclass
142class Union:
143 """Represents a C union"""
145 name: str
146 fields: List[Field] = field(default_factory=list)
147 tag_name: str = "" # Tag name for typedef unions
148 uses: List[str] = field(
149 default_factory=list
150 ) # Non-primitive types used by this union
152 def __post_init__(self):
153 """Validate union data after initialization"""
154 if not self.name or not isinstance(self.name, str):
155 raise ValueError("Union name must be a non-empty string")
158@dataclass
159class Alias:
160 """Represents a type alias (typedef)"""
162 name: str
163 original_type: str
164 uses: List[str] = field(
165 default_factory=list
166 ) # Non-primitive types used by this alias
168 def __post_init__(self):
169 """Validate alias data after initialization"""
170 if not self.name or not isinstance(self.name, str):
171 raise ValueError("Alias name must be a non-empty string")
172 if not self.original_type or not isinstance(self.original_type, str):
173 raise ValueError("Original type must be a non-empty string")
176@dataclass
177class FileModel:
178 """Represents a parsed C/C++ file"""
180 file_path: str
182 name: str = "" # Filename extracted from file_path
183 structs: Dict[str, Struct] = field(default_factory=dict)
184 enums: Dict[str, Enum] = field(default_factory=dict)
185 functions: List[Function] = field(default_factory=list)
186 globals: List[Field] = field(default_factory=list)
187 includes: Set[str] = field(default_factory=set)
188 macros: List[str] = field(default_factory=list)
189 aliases: Dict[str, Alias] = field(default_factory=dict)
190 unions: Dict[str, Union] = field(default_factory=dict)
191 include_relations: List[IncludeRelation] = field(default_factory=list)
192 anonymous_relationships: Dict[str, List[str]] = field(default_factory=dict) # parent -> [child1, child2, ...]
193 placeholder_headers: Set[str] = field(default_factory=set) # Headers shown as empty (placeholders) in diagrams
194 def __post_init__(self):
195 """Extract filename from file_path if not provided"""
196 if not self.name:
197 from pathlib import Path
199 self.name = Path(self.file_path).name
201 def to_dict(self) -> dict:
202 """Convert to dictionary for JSON serialization"""
203 data = asdict(self)
205 # Convert set to list for JSON serialization and sort for consistency
206 data["includes"] = sorted(list(self.includes))
207 # Serialize placeholder headers as sorted list
208 data["placeholder_headers"] = sorted(list(self.placeholder_headers))
209 # Convert include_relations to list of dicts and sort for consistency
210 data["include_relations"] = sorted(
211 [asdict(rel) for rel in self.include_relations],
212 key=lambda x: (x["source_file"], x["included_file"]),
213 )
214 # Sort all dictionary fields for consistent ordering
215 data["structs"] = dict(sorted(data["structs"].items()))
216 data["enums"] = dict(sorted(data["enums"].items()))
217 data["unions"] = dict(sorted(data["unions"].items()))
218 data["aliases"] = dict(sorted(data["aliases"].items()))
219 data["macros"] = sorted(data["macros"])
220 # Sort anonymous relationships for consistent ordering
221 data["anonymous_relationships"] = {k: sorted(v) for k, v in sorted(data["anonymous_relationships"].items())}
222 # Sort functions and globals by name (they are already objects, not dicts)
223 data["functions"] = sorted(self.functions, key=lambda x: x.name)
224 data["globals"] = sorted(self.globals, key=lambda x: x.name)
225 # Convert functions and globals to dicts after sorting
226 data["functions"] = [asdict(f) for f in data["functions"]]
227 data["globals"] = [asdict(g) for g in data["globals"]]
229 # Sort "uses" arrays in structs, unions, and aliases for consistent ordering
230 for struct_data in data["structs"].values():
231 if "uses" in struct_data:
232 struct_data["uses"] = sorted(struct_data["uses"])
233 for union_data in data["unions"].values():
234 if "uses" in union_data:
235 union_data["uses"] = sorted(union_data["uses"])
236 for alias_data in data["aliases"].values():
237 if "uses" in alias_data:
238 alias_data["uses"] = sorted(alias_data["uses"])
240 # Tag names are now stored in struct/enum/union objects
241 return data
243 @classmethod
244 def from_dict(cls, data: dict) -> "FileModel":
245 """Create from dictionary"""
246 # Convert list back to set
247 includes = set(data.get("includes", []))
249 # Convert globals back to Field objects
250 globals_data = data.get("globals", [])
251 globals = [Field(**g) if isinstance(g, dict) else g for g in globals_data]
253 # Convert functions back to Function objects
254 functions_data = data.get("functions", [])
255 functions = [
256 Function(**f) if isinstance(f, dict) else f for f in functions_data
257 ]
259 # Convert structs back to Struct objects
260 structs_data = data.get("structs", {})
261 structs = {}
262 for name, struct_data in structs_data.items():
263 if isinstance(struct_data, dict):
264 fields = [
265 Field(**field) if isinstance(field, dict) else field
266 for field in struct_data.get("fields", [])
267 ]
268 methods = [
269 Function(**method) if isinstance(method, dict) else method
270 for method in struct_data.get("methods", [])
271 ]
272 structs[name] = Struct(
273 name=struct_data.get("name", name),
274 fields=fields,
275 methods=methods,
276 tag_name=struct_data.get("tag_name", ""),
277 uses=struct_data.get("uses", []),
278 )
279 else:
280 structs[name] = struct_data
282 # Convert enums back to Enum objects
283 enums_data = data.get("enums", {})
284 enums = {}
285 for name, enum_data in enums_data.items():
286 if isinstance(enum_data, dict):
287 values = [
288 EnumValue(**val) if isinstance(val, dict) else EnumValue(val)
289 for val in enum_data.get("values", [])
290 ]
291 enums[name] = Enum(name=enum_data.get("name", name), values=values)
292 else:
293 enums[name] = enum_data
295 # Tag names are now stored in struct/enum/union objects
297 # Convert include_relations back to IncludeRelation objects
298 include_relations_data = data.get("include_relations", [])
299 include_relations = [
300 IncludeRelation(**rel) if isinstance(rel, dict) else rel
301 for rel in include_relations_data
302 ]
304 # Convert unions back to Union objects
305 unions_data = data.get("unions", {})
306 unions = {}
307 for name, union_data in unions_data.items():
308 if isinstance(union_data, dict):
309 fields = [
310 Field(**field) if isinstance(field, dict) else field
311 for field in union_data.get("fields", [])
312 ]
313 unions[name] = Union(
314 name=union_data.get("name", name),
315 fields=fields,
316 tag_name=union_data.get("tag_name", ""),
317 uses=union_data.get("uses", []),
318 )
319 else:
320 unions[name] = union_data
322 # Convert aliases back to Alias objects
323 aliases_data = data.get("aliases", {})
324 aliases = {}
325 for name, alias_data in aliases_data.items():
326 if isinstance(alias_data, dict):
327 aliases[name] = Alias(
328 name=alias_data.get("name", name),
329 original_type=alias_data.get("original_type", ""),
330 uses=alias_data.get("uses", []),
331 )
332 else:
333 # Handle legacy format where aliases was Dict[str, str]
334 aliases[name] = Alias(name=name, original_type=alias_data, uses=[])
336 # Create new data dict with converted objects
337 new_data = data.copy()
338 new_data["includes"] = includes
339 new_data["globals"] = globals
340 new_data["functions"] = functions
341 new_data["structs"] = structs
342 new_data["enums"] = enums
343 new_data["unions"] = unions
344 new_data["aliases"] = aliases
345 new_data["include_relations"] = include_relations
346 # Load placeholder headers (if present)
347 new_data["placeholder_headers"] = set(data.get("placeholder_headers", []))
349 return cls(**new_data)
354@dataclass
355class ProjectModel:
356 """Represents a complete C/C++ project"""
358 project_name: str
359 source_folder: str
360 files: Dict[str, FileModel] = field(default_factory=dict)
362 def __post_init__(self):
363 """Validate project model data after initialization"""
364 if not self.project_name or not isinstance(self.project_name, str):
365 raise ValueError("Project name must be a non-empty string")
366 if not self.source_folder or not isinstance(self.source_folder, str):
367 raise ValueError("Source folder must be a non-empty string")
369 def save(self, file_path: str) -> None:
370 """Save model to JSON file"""
371 data = {
372 "project_name": self.project_name,
373 "source_folder": self.source_folder,
374 "files": {
375 path: file_model.to_dict()
376 for path, file_model in sorted(self.files.items())
377 },
378 }
380 try:
381 with open(file_path, "w", encoding="utf-8") as f:
382 json.dump(data, f, indent=2, ensure_ascii=False, sort_keys=True)
383 except Exception as e:
384 raise ValueError(f"Failed to save model to {file_path}: {e}") from e
386 @classmethod
387 def from_dict(cls, data: dict) -> "ProjectModel":
388 """Create from dictionary"""
389 files = {
390 path: FileModel.from_dict(file_data)
391 for path, file_data in data.get("files", {}).items()
392 }
394 return cls(
395 project_name=data.get("project_name", "Unknown"),
396 source_folder=data.get("source_folder", data.get("project_root", "")),
397 files=files,
398 )
400 @classmethod
401 def load(cls, file_path: str) -> "ProjectModel":
402 """Load model from JSON file"""
403 try:
404 with open(file_path, "r", encoding="utf-8") as f:
405 data = json.load(f)
406 return cls.from_dict(data)
407 except Exception as e:
408 raise ValueError(f"Failed to load model from {file_path}: {e}") from e
412 def update_uses_fields(self):
413 """Update all uses fields across the entire project model"""
414 # Collect all available type names from the entire project
415 available_types = set()
416 for file_model in self.files.values():
417 available_types.update(file_model.structs.keys())
418 available_types.update(file_model.enums.keys())
419 available_types.update(file_model.unions.keys())
420 available_types.update(file_model.aliases.keys())
422 # Update uses fields for all structures in all files
423 for file_model in self.files.values():
424 # Update struct uses
425 for struct in file_model.structs.values():
426 filtered_uses = []
427 for struct_field in struct.fields:
428 field_uses = self._extract_non_primitive_types(
429 struct_field.type, available_types
430 )
431 filtered_uses.extend(field_uses)
432 struct.uses = list(set(filtered_uses))
434 # Update union uses
435 for union in file_model.unions.values():
436 filtered_uses = []
437 for union_field in union.fields:
438 field_uses = self._extract_non_primitive_types(
439 union_field.type, available_types
440 )
441 filtered_uses.extend(field_uses)
442 union.uses = list(set(filtered_uses))
444 # Update alias uses
445 for alias in file_model.aliases.values():
446 alias.uses = self._extract_non_primitive_types(
447 alias.original_type, available_types
448 )
449 # Remove the alias name from its own uses list
450 if alias.name in alias.uses:
451 alias.uses.remove(alias.name)
453 def _extract_non_primitive_types(self, type_str: str, available_types: set) -> list:
454 """Extract non-primitive type names from a type string that exist in available_types"""
455 # Define primitive types
456 primitive_types = {
457 "void",
458 "char",
459 "short",
460 "int",
461 "long",
462 "float",
463 "double",
464 "signed",
465 "unsigned",
466 "const",
467 "volatile",
468 "static",
469 "extern",
470 "auto",
471 "register",
472 "inline",
473 "restrict",
474 "size_t",
475 "ptrdiff_t",
476 "int8_t",
477 "int16_t",
478 "int32_t",
479 "int64_t",
480 "uint8_t",
481 "uint16_t",
482 "uint32_t",
483 "uint64_t",
484 "intptr_t",
485 "uintptr_t",
486 "bool",
487 "true",
488 "false",
489 "NULL",
490 "nullptr",
491 }
493 # Remove common C keywords and operators
494 import re
496 # Split by common delimiters and operators
497 parts = re.split(r"[\[\]\(\)\{\}\s\*&,;]", type_str)
499 # Extract potential type names that exist in available_types
500 types = []
501 for part in parts:
502 part = part.strip()
503 if part and len(part) > 1 and part not in primitive_types:
504 # Check if it looks like a type name (starts with letter, contains letters/numbers/underscores)
505 if re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", part):
506 # Only include if it exists in available_types
507 if part in available_types:
508 types.append(part)
510 return list(set(types)) # Remove duplicates