Coverage for src/c2puml/core/transformer.py: 57%

873 statements  

« prev     ^ index     » next       coverage.py v7.10.4, created at 2025-08-20 03:53 +0000

1#!/usr/bin/env python3 

2""" 

3Transformer module for C to PlantUML converter - Step 2: Transform model based on 

4configuration 

5""" 

6 

7import json 

8import logging 

9import re 

10from pathlib import Path 

11from typing import Any, Callable, Dict, List, Optional, Pattern, Set, Tuple, Union as TypingUnion 

12from collections import deque 

13 

14from ..models import ( 

15 Alias, 

16 Enum, 

17 EnumValue, 

18 Field, 

19 FileModel, 

20 Function, 

21 IncludeRelation, 

22 ProjectModel, 

23 Struct, 

24 Union, 

25) 

26 

27 

28class Transformer: 

29 """Main transformer class for Step 2: Transform model based on configuration""" 

30 

31 def __init__(self) -> None: 

32 self.logger = logging.getLogger(__name__) 

33 

34 def transform( 

35 self, model_file: str, config_file: str, output_file: Optional[str] = None 

36 ) -> str: 

37 """ 

38 Step 2: Transform model based on configuration 

39 

40 Args: 

41 model_file: Input JSON model file path 

42 config_file: Configuration file path 

43 output_file: Output transformed model file path (optional, defaults to 

44 model_file) 

45 

46 Returns: 

47 Path to the transformed model file 

48 """ 

49 self.logger.info("Step 2: Transforming model: %s", model_file) 

50 

51 # Load the model and configuration 

52 model = self._load_model(model_file) 

53 config = self._load_config(config_file) 

54 

55 # Apply transformations 

56 transformed_model = self._apply_transformations(model, config) 

57 

58 # Save transformed model 

59 output_path = output_file or model_file 

60 self._save_model(transformed_model, output_path) 

61 

62 self.logger.info("Step 2 complete! Transformed model saved to: %s", output_path) 

63 return output_path 

64 

65 def _load_model(self, model_file: str) -> ProjectModel: 

66 """Load model from JSON file""" 

67 model_path = Path(model_file) 

68 if not model_path.exists(): 

69 raise FileNotFoundError(f"Model file not found: {model_file}") 

70 

71 try: 

72 model = ProjectModel.load(model_file) 

73 self.logger.debug("Loaded model with %d files", len(model.files)) 

74 return model 

75 except Exception as e: 

76 raise ValueError(f"Failed to load model from {model_file}: {e}") from e 

77 

78 def _load_config(self, config_file: str) -> Dict[str, Any]: 

79 """Load configuration from JSON file""" 

80 config_path = Path(config_file) 

81 if not config_path.exists(): 

82 raise FileNotFoundError(f"Configuration file not found: {config_file}") 

83 

84 try: 

85 with open(config_file, "r", encoding="utf-8") as f: 

86 config = json.load(f) 

87 

88 self.logger.debug("Loaded configuration from: %s", config_file) 

89 return config 

90 

91 except Exception as e: 

92 raise ValueError( 

93 f"Failed to load configuration from {config_file}: {e}" 

94 ) from e 

95 

96 def _apply_transformations( 

97 self, model: ProjectModel, config: Dict[str, Any] 

98 ) -> ProjectModel: 

99 """Apply all configured transformations to the model""" 

100 self.logger.info("Applying transformations to model") 

101 

102 # Apply comprehensive file filtering (moved from parser) 

103 if "file_filters" in config: 

104 model = self._apply_file_filters(model, config["file_filters"]) 

105 

106 # Support backward compatibility - convert single 'transformations' to container format 

107 config = self._ensure_backward_compatibility(config) 

108 

109 # Discover and apply transformation containers 

110 model = self._apply_transformation_containers(model, config) 

111 

112 # Apply simplified depth-based include processing 

113 if self._should_process_include_relations(config): 

114 model = self._process_include_relations_simplified(model, config) 

115 

116 self.logger.info( 

117 "Transformations complete. Model now has %d files", len(model.files) 

118 ) 

119 return model 

120 

121 def _apply_transformation_containers( 

122 self, model: ProjectModel, config: Dict[str, Any] 

123 ) -> ProjectModel: 

124 """Discover and apply transformation containers in alphabetical order""" 

125 transformation_containers = self._discover_transformation_containers(config) 

126 

127 if not transformation_containers: 

128 return model 

129 

130 for container_name, transformation_config in transformation_containers: 

131 self.logger.info("Applying transformation container: %s", container_name) 

132 model = self._apply_single_transformation_container( 

133 model, transformation_config, container_name 

134 ) 

135 self._log_model_state_after_container(model, container_name) 

136 

137 return model 

138 

139 def _log_model_state_after_container( 

140 self, model: ProjectModel, container_name: str 

141 ) -> None: 

142 """Log model state after applying a transformation container""" 

143 total_elements = sum( 

144 len(file_model.structs) + len(file_model.enums) + len(file_model.unions) + 

145 len(file_model.functions) + len(file_model.globals) + len(file_model.macros) + 

146 len(file_model.aliases) 

147 for file_model in model.files.values() 

148 ) 

149 self.logger.info( 

150 "After %s: model contains %d files with %d total elements", 

151 container_name, len(model.files), total_elements 

152 ) 

153 

154 def _should_process_include_relations(self, config: Dict[str, Any]) -> bool: 

155 """Check if include relations should be processed based on global or file-specific settings""" 

156 # Check global include_depth 

157 if config.get("include_depth", 1) > 1: 

158 return True 

159 

160 # Check file-specific include_depth settings 

161 if "file_specific" in config: 

162 for file_config in config["file_specific"].values(): 

163 if file_config.get("include_depth", 1) > 1: 

164 return True 

165 

166 return False 

167 

168 def _discover_transformation_containers(self, config: Dict[str, Any]) -> List[Tuple[str, Dict[str, Any]]]: 

169 """ 

170 Discover all transformation containers and sort them alphabetically 

171  

172 Returns: 

173 List of (container_name, transformation_config) tuples sorted by name 

174 """ 

175 transformation_containers = [ 

176 (key, value) 

177 for key, value in config.items() 

178 if key.startswith("transformations") and isinstance(value, dict) 

179 ] 

180 

181 # Sort alphabetically by container name 

182 transformation_containers.sort(key=lambda x: x[0]) 

183 

184 self.logger.info( 

185 "Discovered %d transformation containers: %s", 

186 len(transformation_containers), 

187 [name for name, _ in transformation_containers] 

188 ) 

189 

190 return transformation_containers 

191 

192 def _ensure_backward_compatibility(self, config: Dict[str, Any]) -> Dict[str, Any]: 

193 """ 

194 Ensure backward compatibility by converting old single 'transformations'  

195 to new container format 

196 """ 

197 # Make a copy to avoid modifying the original 

198 config = config.copy() 

199 

200 # Check if old format is used (single 'transformations' key) 

201 if self._is_legacy_transformation_format(config): 

202 self.logger.info("Converting legacy 'transformations' format to container format") 

203 

204 # Move old transformations to a default container 

205 old_transformations = config.pop("transformations") 

206 config["transformations_00_default"] = old_transformations 

207 

208 self.logger.debug("Converted to container: transformations_00_default") 

209 

210 return config 

211 

212 def _is_legacy_transformation_format(self, config: Dict[str, Any]) -> bool: 

213 """Check if configuration uses legacy transformation format""" 

214 return ( 

215 "transformations" in config and 

216 not any(key.startswith("transformations_") for key in config.keys()) 

217 ) 

218 

219 def _apply_single_transformation_container( 

220 self, 

221 model: ProjectModel, 

222 transformation_config: Dict[str, Any], 

223 container_name: str 

224 ) -> ProjectModel: 

225 """ 

226 Apply a single transformation container 

227  

228 Args: 

229 model: Project model to transform 

230 transformation_config: Single transformation container configuration 

231 container_name: Name of the container for logging 

232  

233 Returns: 

234 Transformed project model 

235 """ 

236 self.logger.debug("Processing transformation container: %s", container_name) 

237 

238 # Determine target files for this container 

239 target_files = self._get_target_files(model, transformation_config) 

240 

241 # Apply transformations in specific order: remove -> rename -> add 

242 # This order ensures that removals happen first, then renaming with deduplication, 

243 # then additions to the cleaned model 

244 model = self._apply_remove_operations(model, transformation_config, target_files, container_name) 

245 model = self._apply_rename_operations(model, transformation_config, target_files, container_name) 

246 model = self._apply_add_operations(model, transformation_config, target_files, container_name) 

247 

248 return model 

249 

250 def _get_target_files( 

251 self, model: ProjectModel, transformation_config: Dict[str, Any] 

252 ) -> Set[str]: 

253 """Determine which files to apply transformations to based on file_selection""" 

254 selected_files = transformation_config.get("file_selection", []) 

255 

256 # Validate that file_selection is a list 

257 if not isinstance(selected_files, list): 

258 selected_files = [] 

259 self.logger.warning("Invalid file_selection format, must be a list, defaulting to empty list") 

260 

261 # Determine which files to apply transformations to 

262 if not selected_files: 

263 target_files = set(model.files.keys()) 

264 self.logger.debug("No file selection specified, applying to all %d files", len(target_files)) 

265 else: 

266 target_files = self._match_files_by_patterns(model, selected_files) 

267 self.logger.debug( 

268 "File selection patterns %s matched %d files: %s", 

269 selected_files, len(target_files), list(target_files) 

270 ) 

271 

272 return target_files 

273 

274 def _match_files_by_patterns( 

275 self, model: ProjectModel, patterns: List[str] 

276 ) -> Set[str]: 

277 """Match files based on selection patterns""" 

278 target_files = set() 

279 for pattern in patterns: 

280 for file_path in model.files.keys(): 

281 if self._matches_pattern(file_path, pattern): 

282 target_files.add(file_path) 

283 return target_files 

284 

285 def _apply_remove_operations( 

286 self, 

287 model: ProjectModel, 

288 transformation_config: Dict[str, Any], 

289 target_files: Set[str], 

290 container_name: str 

291 ) -> ProjectModel: 

292 """Apply remove operations for a transformation container""" 

293 if "remove" not in transformation_config: 

294 return model 

295 

296 self.logger.debug("Applying remove operations for container: %s", container_name) 

297 

298 # Collect typedef names BEFORE removing them for type reference cleanup 

299 removed_typedef_names = self._collect_typedef_names_for_removal( 

300 model, transformation_config["remove"], target_files 

301 ) 

302 

303 model = self._apply_removals(model, transformation_config["remove"], target_files) 

304 

305 # Clean up type references after typedef removal using pre-collected names 

306 if removed_typedef_names: 

307 self.logger.debug("Calling type reference cleanup for container: %s", container_name) 

308 self._cleanup_type_references_by_names(model, removed_typedef_names) 

309 

310 return model 

311 

312 def _apply_rename_operations( 

313 self, 

314 model: ProjectModel, 

315 transformation_config: Dict[str, Any], 

316 target_files: Set[str], 

317 container_name: str 

318 ) -> ProjectModel: 

319 """Apply rename operations for a transformation container""" 

320 if "rename" not in transformation_config: 

321 return model 

322 

323 self.logger.debug("Applying rename operations for container: %s", container_name) 

324 return self._apply_renaming(model, transformation_config["rename"], target_files) 

325 

326 def _apply_add_operations( 

327 self, 

328 model: ProjectModel, 

329 transformation_config: Dict[str, Any], 

330 target_files: Set[str], 

331 container_name: str 

332 ) -> ProjectModel: 

333 """Apply add operations for a transformation container""" 

334 if "add" not in transformation_config: 

335 return model 

336 

337 self.logger.debug("Applying add operations for container: %s", container_name) 

338 return self._apply_additions(model, transformation_config["add"], target_files) 

339 

340 def _collect_typedef_names_for_removal( 

341 self, 

342 model: ProjectModel, 

343 remove_config: Dict[str, Any], 

344 target_files: Set[str] 

345 ) -> Set[str]: 

346 """Collect typedef names that will be removed for type reference cleanup""" 

347 removed_typedef_names = set() 

348 

349 if "typedef" not in remove_config: 

350 return removed_typedef_names 

351 

352 typedef_patterns = remove_config["typedef"] 

353 compiled_patterns = self._compile_patterns(typedef_patterns) 

354 

355 if not compiled_patterns: 

356 return removed_typedef_names 

357 

358 for file_path in target_files: 

359 if file_path in model.files: 

360 file_model = model.files[file_path] 

361 for alias_name in file_model.aliases.keys(): 

362 if self._matches_any_pattern(alias_name, compiled_patterns): 

363 removed_typedef_names.add(alias_name) 

364 

365 self.logger.debug("Pre-identified typedefs for removal: %s", list(removed_typedef_names)) 

366 return removed_typedef_names 

367 

368 def _process_include_relations_simplified( 

369 self, model: ProjectModel, config: Dict[str, Any] 

370 ) -> ProjectModel: 

371 """ 

372 Simplified include processing following a structured depth-based approach: 

373 1. Each include structure has a single root C file 

374 2. Process C file's direct includes through filters first  

375 3. Then recursively process header files' includes with filtering 

376 4. Continue until include_depth is reached 

377 """ 

378 global_include_depth = config.get("include_depth", 1) 

379 file_specific_config = config.get("file_specific", {}) 

380 include_filter_local_only = config.get("include_filter_local_only", False) 

381 always_show_includes = config.get("always_show_includes", False) 

382 

383 self.logger.info( 

384 "Processing includes with simplified depth-based approach (global_depth=%d)", 

385 global_include_depth 

386 ) 

387 

388 # Clear all existing include relations 

389 for file_model in model.files.values(): 

390 file_model.include_relations = [] 

391 

392 # Create filename to file model mapping for quick lookup 

393 file_map = {} 

394 for file_model in model.files.values(): 

395 filename = Path(file_model.name).name 

396 file_map[filename] = file_model 

397 

398 # Process each C file as a root with its own include structure 

399 c_files = [fm for fm in model.files.values() if fm.name.endswith(".c")] 

400 

401 for root_file in c_files: 

402 self._process_root_c_file_includes( 

403 root_file, file_map, global_include_depth, file_specific_config, include_filter_local_only, always_show_includes 

404 ) 

405 

406 return model 

407 

408 def _process_root_c_file_includes( 

409 self, 

410 root_file: FileModel, 

411 file_map: Dict[str, FileModel], 

412 global_include_depth: int, 

413 file_specific_config: Dict[str, Any], 

414 include_filter_local_only: bool, 

415 always_show_includes: bool 

416 ) -> None: 

417 """ 

418 Process includes for a single root C file following the simplified approach: 

419 - Start with root C file 

420 - Apply filters at each depth level 

421 - Process layer by layer until max depth reached 

422 """ 

423 root_filename = Path(root_file.name).name 

424 

425 # Get file-specific settings or use global defaults 

426 include_depth = global_include_depth 

427 include_filters = [] 

428 

429 if root_filename in file_specific_config: 

430 file_config = file_specific_config[root_filename] 

431 include_depth = file_config.get("include_depth", global_include_depth) 

432 include_filters = file_config.get("include_filter", []) 

433 

434 # If configured to keep only local header, ensure filter pattern for local header is present 

435 if include_filter_local_only: 

436 local_header_pattern = f"^{Path(root_filename).stem}\\.h$" 

437 if local_header_pattern not in include_filters: 

438 include_filters.append(local_header_pattern) 

439 

440 # Skip processing if depth is 1 or less (no include relations needed) 

441 if include_depth <= 1: 

442 self.logger.debug( 

443 "Skipping include processing for %s (depth=%d)", 

444 root_filename, include_depth 

445 ) 

446 return 

447 

448 # Compile filter patterns 

449 compiled_filters = [] 

450 if include_filters: 

451 try: 

452 compiled_filters = [re.compile(pattern) for pattern in include_filters] 

453 self.logger.debug( 

454 "Compiled %d filter patterns for %s", 

455 len(compiled_filters), root_filename 

456 ) 

457 except re.error as e: 

458 self.logger.warning( 

459 "Invalid regex pattern for %s: %s", root_filename, e 

460 ) 

461 

462 self.logger.debug( 

463 "Processing includes for root C file %s (depth=%d, filters=%d)", 

464 root_filename, include_depth, len(compiled_filters) 

465 ) 

466 

467 # Track processed files to avoid cycles 

468 processed_files = set() 

469 

470 # Reset placeholder headers for this root file context 

471 try: 

472 root_file.placeholder_headers.clear() 

473 except Exception: 

474 # In case the model was loaded without this field 

475 root_file.placeholder_headers = set() 

476 

477 # Process includes level by level using BFS approach 

478 current_level = [root_file] # Start with the root C file 

479 

480 for depth in range(1, include_depth + 1): 

481 next_level = [] 

482 

483 self.logger.debug( 

484 "Processing depth %d for %s (%d files at current level)", 

485 depth, root_filename, len(current_level) 

486 ) 

487 

488 for current_file in current_level: 

489 current_filename = Path(current_file.name).name 

490 

491 # Skip if already processed to avoid cycles 

492 if current_filename in processed_files: 

493 continue 

494 processed_files.add(current_filename) 

495 

496 # Process each include in the current file 

497 for include_name in current_file.includes: 

498 # Determine if this include is filtered out by patterns 

499 filtered_out_by_patterns = False 

500 if compiled_filters: 

501 if not any(pattern.search(include_name) for pattern in compiled_filters): 

502 if always_show_includes: 

503 filtered_out_by_patterns = True 

504 self.logger.debug( 

505 "Include %s filtered by patterns at depth %d for %s, but will be shown as placeholder", 

506 include_name, depth, root_filename 

507 ) 

508 else: 

509 self.logger.debug( 

510 "Filtered out include %s at depth %d for %s", 

511 include_name, depth, root_filename 

512 ) 

513 continue 

514 

515 # Check if included file exists in our project 

516 if include_name not in file_map: 

517 self.logger.debug( 

518 "Include %s not found in project files (depth %d, root %s)", 

519 include_name, depth, root_filename 

520 ) 

521 continue 

522 

523 # Prevent self-references 

524 if include_name == current_filename: 

525 self.logger.debug( 

526 "Skipping self-reference %s at depth %d for %s", 

527 include_name, depth, root_filename 

528 ) 

529 continue 

530 

531 # Check for duplicate relations to prevent cycles 

532 existing_relation = any( 

533 rel.source_file == current_filename and rel.included_file == include_name 

534 for rel in root_file.include_relations 

535 ) 

536 

537 if existing_relation: 

538 self.logger.debug( 

539 "Skipping duplicate relation %s -> %s for %s", 

540 current_filename, include_name, root_filename 

541 ) 

542 continue 

543 

544 # Prevent processing files that would create cycles (already processed) 

545 if include_name in processed_files: 

546 self.logger.debug( 

547 "Skipping already processed file %s to prevent cycle for %s", 

548 include_name, root_filename 

549 ) 

550 continue 

551 

552 # Create and add the include relation to the root C file 

553 relation = IncludeRelation( 

554 source_file=current_filename, 

555 included_file=include_name, 

556 depth=depth 

557 ) 

558 root_file.include_relations.append(relation) 

559 

560 self.logger.debug( 

561 "Added include relation: %s -> %s (depth %d) for root %s", 

562 current_filename, include_name, depth, root_filename 

563 ) 

564 

565 # If filtered out by patterns and always_show_includes is enabled, mark as placeholder 

566 if filtered_out_by_patterns: 

567 try: 

568 root_file.placeholder_headers.add(include_name) 

569 except Exception: 

570 root_file.placeholder_headers = {include_name} 

571 # Do not process further includes/content for this header 

572 continue 

573 

574 # Add included file to next level for further processing 

575 included_file = file_map[include_name] 

576 if included_file not in next_level and include_name not in processed_files: 

577 next_level.append(included_file) 

578 

579 # Move to next level for the next iteration 

580 current_level = next_level 

581 

582 # Break if no more files to process 

583 if not current_level: 

584 self.logger.debug( 

585 "No more files to process at depth %d for %s", 

586 depth + 1, root_filename 

587 ) 

588 break 

589 

590 self.logger.debug( 

591 "Completed include processing for %s: %d relations generated", 

592 root_filename, len(root_file.include_relations) 

593 ) 

594 

595 def _apply_file_filters( 

596 self, model: ProjectModel, filters: Dict[str, Any] 

597 ) -> ProjectModel: 

598 """Apply user-configured file-level filters (important filtering already 

599 done in parser)""" 

600 include_patterns = self._compile_patterns(filters.get("include", [])) 

601 exclude_patterns = self._compile_patterns(filters.get("exclude", [])) 

602 

603 if not include_patterns and not exclude_patterns: 

604 return model 

605 

606 filtered_files = {} 

607 for file_path, file_model in model.files.items(): 

608 if self._should_include_file(file_path, include_patterns, exclude_patterns): 

609 filtered_files[file_path] = file_model 

610 

611 model.files = filtered_files 

612 self.logger.debug( 

613 "User file filtering: %d files after filtering", len(model.files) 

614 ) 

615 return model 

616 

617 def _apply_include_filters( 

618 self, model: ProjectModel, include_filters: Dict[str, List[str]] 

619 ) -> ProjectModel: 

620 """Apply include filters for each root file based on regex patterns 

621  

622 Args: 

623 model: The project model to apply filters to 

624 include_filters: Dictionary mapping root files to their include filter patterns 

625 """ 

626 self.logger.info( 

627 "Applying include filters for %d root files", len(include_filters) 

628 ) 

629 

630 # Compile regex patterns for each root file 

631 compiled_filters = {} 

632 for root_file, patterns in include_filters.items(): 

633 try: 

634 compiled_filters[root_file] = [ 

635 re.compile(pattern) for pattern in patterns 

636 ] 

637 self.logger.debug( 

638 "Compiled %d patterns for root file: %s", len(patterns), root_file 

639 ) 

640 except re.error as e: 

641 self.logger.warning( 

642 "Invalid regex pattern for root file %s: %s", root_file, e 

643 ) 

644 # Skip invalid patterns for this root file 

645 continue 

646 

647 if not compiled_filters: 

648 self.logger.warning( 

649 "No valid include filters found, skipping include filtering" 

650 ) 

651 return model 

652 

653 # Create a mapping from header files to their root C files 

654 header_to_root = self._create_header_to_root_mapping(model) 

655 

656 # Apply filters to each file in the model 

657 for file_path, file_model in model.files.items(): 

658 # Find the root file for this file 

659 root_file = self._find_root_file_with_mapping( 

660 file_path, file_model, header_to_root 

661 ) 

662 

663 if root_file in compiled_filters: 

664 # Apply filtering (preserve includes arrays, filter include_relations) 

665 self._filter_include_relations( 

666 file_model, compiled_filters[root_file], root_file 

667 ) 

668 

669 return model 

670 

671 def _create_header_to_root_mapping(self, model: ProjectModel) -> Dict[str, str]: 

672 """Create a mapping from header files to their root C files""" 

673 header_to_root = {} 

674 

675 # First, map C files to themselves 

676 c_files = [] 

677 for file_path, file_model in model.files.items(): 

678 if file_model.name.endswith(".c"): 

679 header_to_root[file_model.name] = file_model.name 

680 c_files.append(file_model.name) 

681 

682 # Then, map header files to their corresponding C files 

683 for file_path, file_model in model.files.items(): 

684 if not file_model.name.endswith(".c"): # It's a header file 

685 # Strategy 1: Look for a C file with the same base name 

686 header_base_name = Path(file_model.name).stem 

687 matching_c_file = header_base_name + ".c" 

688 

689 if matching_c_file in [Path(c_file).name for c_file in c_files]: 

690 header_to_root[file_model.name] = matching_c_file 

691 else: 

692 # Strategy 2: Find which C file includes this header 

693 including_c_files = [] 

694 for c_file_path, c_file_model in model.files.items(): 

695 if (c_file_model.name.endswith(".c") and 

696 file_model.name in c_file_model.includes): 

697 including_c_files.append(c_file_model.name) 

698 

699 if including_c_files: 

700 # Use the first C file that includes this header 

701 header_to_root[file_model.name] = including_c_files[0] 

702 else: 

703 # Strategy 3: Fallback to first available C file 

704 if c_files: 

705 header_to_root[file_model.name] = c_files[0] 

706 

707 return header_to_root 

708 

709 def _find_root_file_with_mapping( 

710 self, file_path: str, file_model: FileModel, header_to_root: Dict[str, str] 

711 ) -> str: 

712 """Find the root C file for a given file using the header mapping""" 

713 if file_model.name.endswith(".c"): 

714 return file_model.name 

715 

716 # For header files, use the mapping 

717 return header_to_root.get(file_model.name, file_model.name) 

718 

719 def _find_root_file(self, file_path: str, file_model: FileModel) -> str: 

720 """Find the root C file for a given file""" 

721 filename = Path(file_path).name 

722 

723 # If it's a .c file, it's its own root 

724 if filename.endswith(".c"): 

725 return filename 

726 

727 # For header files, find the corresponding .c file 

728 base_name = Path(file_path).stem 

729 

730 # Look for a .c file with the same base name 

731 if base_name and not filename.startswith("."): 

732 return base_name + ".c" 

733 

734 # Fallback: use the filename as root (original behavior) 

735 return filename 

736 

737 def _filter_include_relations( 

738 self, file_model: FileModel, patterns: List[re.Pattern], root_file: str 

739 ) -> None: 

740 """Unified include_relations filtering, preserving includes arrays.""" 

741 self.logger.debug( 

742 "Filtering include_relations for file %s (root: %s)", file_model.name, root_file 

743 ) 

744 

745 original_relations_count = len(file_model.include_relations) 

746 filtered_relations: List[IncludeRelation] = [] 

747 

748 for relation in file_model.include_relations: 

749 if self._matches_any_pattern(relation.included_file, patterns): 

750 filtered_relations.append(relation) 

751 else: 

752 self.logger.debug( 

753 "Filtered out include relation: %s -> %s (root: %s)", 

754 relation.source_file, 

755 relation.included_file, 

756 root_file, 

757 ) 

758 

759 file_model.include_relations = filtered_relations 

760 

761 self.logger.debug( 

762 "Include filtering for %s: relations %d->%d (includes preserved)", 

763 file_model.name, 

764 original_relations_count, 

765 len(file_model.include_relations), 

766 ) 

767 

768 # Removed deprecated include filtering wrappers; use _filter_include_relations instead 

769 

770 def _matches_any_pattern(self, text: str, patterns: List[Pattern[str]]) -> bool: 

771 """Check if text matches any of the given regex patterns""" 

772 return any(pattern.search(text) for pattern in patterns) 

773 

774 def _matches_pattern(self, text: str, pattern: str) -> bool: 

775 """Safe regex match for a single pattern string""" 

776 try: 

777 return bool(re.search(pattern, text)) 

778 except re.error as e: 

779 self.logger.warning("Invalid regex pattern '%s': %s", pattern, e) 

780 return False 

781 

782 def _apply_model_transformations( 

783 self, model: ProjectModel, transformations: Dict[str, Any] 

784 ) -> ProjectModel: 

785 """Apply model-level transformations with file selection support""" 

786 # Get file selection configuration 

787 selected_files = transformations.get("file_selection", []) 

788 

789 # Validate that file_selection is a list 

790 if not isinstance(selected_files, list): 

791 selected_files = [] 

792 self.logger.warning("Invalid file_selection format, must be a list, defaulting to empty list") 

793 

794 # Determine which files to apply transformations to 

795 # If selected_files is empty or not specified, apply to all files 

796 if not selected_files: 

797 target_files = set(model.files.keys()) 

798 self.logger.debug("No file selection specified, applying to all %d files", len(target_files)) 

799 else: 

800 # Apply only to selected files 

801 target_files = set() 

802 for pattern in selected_files: 

803 for file_path in model.files.keys(): 

804 if self._matches_pattern(file_path, pattern): 

805 target_files.add(file_path) 

806 

807 self.logger.debug( 

808 "File selection patterns %s matched %d files: %s", 

809 selected_files, len(target_files), list(target_files) 

810 ) 

811 

812 self.logger.debug( 

813 "Applying transformations to %d files: %s", 

814 len(target_files), 

815 list(target_files), 

816 ) 

817 

818 # Rename elements 

819 if "rename" in transformations: 

820 model = self._apply_renaming(model, transformations["rename"], target_files) 

821 

822 # Add elements 

823 if "add" in transformations: 

824 model = self._apply_additions(model, transformations["add"], target_files) 

825 

826 # Remove elements 

827 if "remove" in transformations: 

828 model = self._apply_removals(model, transformations["remove"], target_files) 

829 

830 # Clean up type references after typedef removal 

831 if "typedef" in transformations["remove"]: 

832 self._cleanup_type_references(model, transformations["remove"]["typedef"], target_files) 

833 

834 return model 

835 

836 def _apply_renaming( 

837 self, model: ProjectModel, rename_config: Dict[str, Any], target_files: Set[str] 

838 ) -> ProjectModel: 

839 """Apply renaming transformations to selected files""" 

840 self.logger.debug( 

841 "Applying renaming transformations to %d files", len(target_files) 

842 ) 

843 

844 # Apply renaming only to target files 

845 for file_path in target_files: 

846 if file_path in model.files: 

847 file_model = model.files[file_path] 

848 self.logger.debug("Applying renaming to file: %s", file_path) 

849 self._apply_file_level_renaming(file_model, rename_config) 

850 

851 # Apply file renaming (affects model.files keys) 

852 if "files" in rename_config: 

853 model = self._rename_files(model, rename_config["files"], target_files) 

854 

855 return model 

856 

857 def _apply_file_level_renaming( 

858 self, file_model: FileModel, rename_config: Dict[str, Any] 

859 ) -> None: 

860 """Apply all renaming operations to a single file""" 

861 rename_operations = [ 

862 ("typedef", self._rename_typedefs), 

863 ("functions", self._rename_functions), 

864 ("macros", self._rename_macros), 

865 ("globals", self._rename_globals), 

866 ("includes", self._rename_includes), 

867 ("structs", self._rename_structs), 

868 ("enums", self._rename_enums), 

869 ("unions", self._rename_unions), 

870 ] 

871 

872 for config_key, rename_method in rename_operations: 

873 if config_key in rename_config: 

874 rename_method(file_model, rename_config[config_key]) 

875 

876 def _cleanup_type_references( 

877 self, model: ProjectModel, removed_typedef_patterns: List[str], target_files: Set[str] 

878 ) -> None: 

879 """ 

880 Clean up type references after typedef removal 

881  

882 This method removes type references that point to removed typedefs from: 

883 - Function parameters and return types 

884 - Global variable types  

885 - Struct field types 

886 """ 

887 self.logger.debug("Starting type reference cleanup with patterns: %s, target_files: %s", 

888 removed_typedef_patterns, list(target_files)) 

889 

890 if not removed_typedef_patterns: 

891 self.logger.debug("No typedef patterns to clean up") 

892 return 

893 

894 compiled_patterns = self._compile_patterns(removed_typedef_patterns) 

895 if not compiled_patterns: 

896 self.logger.debug("No valid compiled patterns") 

897 return 

898 

899 # Track removed type names for cleanup 

900 removed_types = set() 

901 

902 # First, collect all removed typedef names from all target files 

903 for file_path in target_files: 

904 if file_path in model.files: 

905 file_model = model.files[file_path] 

906 

907 # Check what typedefs would be removed from this file 

908 for alias_name in list(file_model.aliases.keys()): 

909 if self._matches_any_pattern(alias_name, compiled_patterns): 

910 removed_types.add(alias_name) 

911 self.logger.debug("Found removed typedef: %s in file %s", alias_name, file_path) 

912 

913 self.logger.debug("Total removed types identified: %s", list(removed_types)) 

914 

915 # Clean up type references across all files since typedefs can be used anywhere 

916 cleaned_count = 0 

917 for file_path, file_model in model.files.items(): 

918 file_cleaned = 0 

919 

920 # Clean function parameter and return types 

921 for func in file_model.functions: 

922 # Clean return type 

923 if func.return_type and self._contains_removed_type(func.return_type, removed_types): 

924 old_type = func.return_type 

925 func.return_type = self._remove_type_references(func.return_type, removed_types) 

926 if func.return_type != old_type: 

927 file_cleaned += 1 

928 self.logger.debug( 

929 "Cleaned return type '%s' -> '%s' in function %s", 

930 old_type, func.return_type, func.name 

931 ) 

932 

933 # Clean parameter types 

934 for param in func.parameters: 

935 if param.type and self._contains_removed_type(param.type, removed_types): 

936 old_type = param.type 

937 param.type = self._remove_type_references(param.type, removed_types) 

938 if param.type != old_type: 

939 file_cleaned += 1 

940 self.logger.debug( 

941 "Cleaned parameter type '%s' -> '%s' for parameter %s", 

942 old_type, param.type, param.name 

943 ) 

944 

945 # Clean global variable types 

946 for global_var in file_model.globals: 

947 if global_var.type and self._contains_removed_type(global_var.type, removed_types): 

948 old_type = global_var.type 

949 global_var.type = self._remove_type_references(global_var.type, removed_types) 

950 if global_var.type != old_type: 

951 file_cleaned += 1 

952 self.logger.debug( 

953 "Cleaned global variable type '%s' -> '%s' for %s", 

954 old_type, global_var.type, global_var.name 

955 ) 

956 

957 # Clean struct field types 

958 for struct in file_model.structs.values(): 

959 for field in struct.fields: 

960 if field.type and self._contains_removed_type(field.type, removed_types): 

961 old_type = field.type 

962 field.type = self._remove_type_references(field.type, removed_types) 

963 if field.type != old_type: 

964 file_cleaned += 1 

965 self.logger.debug( 

966 "Cleaned struct field type '%s' -> '%s' for %s.%s", 

967 old_type, field.type, struct.name, field.name 

968 ) 

969 

970 cleaned_count += file_cleaned 

971 

972 if cleaned_count > 0: 

973 self.logger.info( 

974 "Cleaned %d type references to removed typedefs: %s", 

975 cleaned_count, list(removed_types) 

976 ) 

977 

978 def _contains_removed_type(self, type_str: str, removed_types: Set[str]) -> bool: 

979 """Check if a type string contains any of the removed types""" 

980 if not type_str or not removed_types: 

981 return False 

982 

983 # Check for removed type names in the type string 

984 # This handles cases like "old_point_t *", "const old_config_t", etc. 

985 for removed_type in removed_types: 

986 if removed_type in type_str: 

987 return True 

988 return False 

989 

990 def _remove_type_references(self, type_str: str, removed_types: Set[str]) -> str: 

991 """Remove references to removed types from a type string""" 

992 if not type_str or not removed_types: 

993 return type_str 

994 

995 cleaned_type = type_str 

996 for removed_type in removed_types: 

997 if removed_type in cleaned_type: 

998 # Replace the removed type with "void" to maintain type safety 

999 cleaned_type = cleaned_type.replace(removed_type, "void") 

1000 

1001 # Clean up any double spaces or other artifacts 

1002 cleaned_type = " ".join(cleaned_type.split()) 

1003 return cleaned_type 

1004 

1005 def _cleanup_type_references_by_names( 

1006 self, model: ProjectModel, removed_typedef_names: Set[str] 

1007 ) -> None: 

1008 """ 

1009 Clean up type references using pre-collected typedef names 

1010  

1011 This method removes type references that point to removed typedefs from: 

1012 - Function parameters and return types 

1013 - Global variable types  

1014 - Struct field types 

1015 """ 

1016 if not removed_typedef_names: 

1017 self.logger.debug("No removed typedef names provided") 

1018 return 

1019 

1020 self.logger.debug("Cleaning type references for removed typedefs: %s", list(removed_typedef_names)) 

1021 

1022 # Clean up type references across all files since typedefs can be used anywhere 

1023 cleaned_count = 0 

1024 for file_path, file_model in model.files.items(): 

1025 file_cleaned = 0 

1026 

1027 # Clean function parameter and return types 

1028 for func in file_model.functions: 

1029 # Clean return type 

1030 if func.return_type and self._contains_removed_type(func.return_type, removed_typedef_names): 

1031 old_type = func.return_type 

1032 func.return_type = self._remove_type_references(func.return_type, removed_typedef_names) 

1033 if func.return_type != old_type: 

1034 file_cleaned += 1 

1035 self.logger.debug( 

1036 "Cleaned return type '%s' -> '%s' in function %s", 

1037 old_type, func.return_type, func.name 

1038 ) 

1039 

1040 # Clean parameter types 

1041 for param in func.parameters: 

1042 if param.type and self._contains_removed_type(param.type, removed_typedef_names): 

1043 old_type = param.type 

1044 param.type = self._remove_type_references(param.type, removed_typedef_names) 

1045 if param.type != old_type: 

1046 file_cleaned += 1 

1047 self.logger.debug( 

1048 "Cleaned parameter type '%s' -> '%s' for parameter %s", 

1049 old_type, param.type, param.name 

1050 ) 

1051 

1052 # Clean global variable types 

1053 for global_var in file_model.globals: 

1054 if global_var.type and self._contains_removed_type(global_var.type, removed_typedef_names): 

1055 old_type = global_var.type 

1056 global_var.type = self._remove_type_references(global_var.type, removed_typedef_names) 

1057 if global_var.type != old_type: 

1058 file_cleaned += 1 

1059 self.logger.debug( 

1060 "Cleaned global variable type '%s' -> '%s' for %s", 

1061 old_type, global_var.type, global_var.name 

1062 ) 

1063 

1064 # Clean struct field types 

1065 for struct in file_model.structs.values(): 

1066 for field in struct.fields: 

1067 if field.type and self._contains_removed_type(field.type, removed_typedef_names): 

1068 old_type = field.type 

1069 field.type = self._remove_type_references(field.type, removed_typedef_names) 

1070 if field.type != old_type: 

1071 file_cleaned += 1 

1072 self.logger.debug( 

1073 "Cleaned struct field type '%s' -> '%s' for %s.%s", 

1074 old_type, field.type, struct.name, field.name 

1075 ) 

1076 

1077 cleaned_count += file_cleaned 

1078 if file_cleaned > 0: 

1079 self.logger.debug("Cleaned %d type references in file %s", file_cleaned, file_path) 

1080 

1081 if cleaned_count > 0: 

1082 self.logger.info( 

1083 "Cleaned %d type references to removed typedefs: %s", 

1084 cleaned_count, list(removed_typedef_names) 

1085 ) 

1086 else: 

1087 self.logger.debug("No type references found to clean up") 

1088 

1089 def _update_type_references_for_renames(self, file_model: FileModel, typedef_renames: Dict[str, str]) -> None: 

1090 """Update all type references when typedefs are renamed""" 

1091 updated_count = 0 

1092 

1093 # Update function return types and parameter types 

1094 for func in file_model.functions: 

1095 # Update return type 

1096 if func.return_type: 

1097 old_type = func.return_type 

1098 new_type = self._update_type_string_for_renames(func.return_type, typedef_renames) 

1099 if new_type != old_type: 

1100 func.return_type = new_type 

1101 updated_count += 1 

1102 self.logger.debug( 

1103 "Updated return type '%s' -> '%s' in function %s", 

1104 old_type, new_type, func.name 

1105 ) 

1106 

1107 # Update parameter types 

1108 for param in func.parameters: 

1109 if param.type: 

1110 old_type = param.type 

1111 new_type = self._update_type_string_for_renames(param.type, typedef_renames) 

1112 if new_type != old_type: 

1113 param.type = new_type 

1114 updated_count += 1 

1115 self.logger.debug( 

1116 "Updated parameter type '%s' -> '%s' for parameter %s in function %s", 

1117 old_type, new_type, param.name, func.name 

1118 ) 

1119 

1120 # Update global variable types 

1121 for global_var in file_model.globals: 

1122 if global_var.type: 

1123 old_type = global_var.type 

1124 new_type = self._update_type_string_for_renames(global_var.type, typedef_renames) 

1125 if new_type != old_type: 

1126 global_var.type = new_type 

1127 updated_count += 1 

1128 self.logger.debug( 

1129 "Updated global variable type '%s' -> '%s' for %s", 

1130 old_type, new_type, global_var.name 

1131 ) 

1132 

1133 # Update struct field types 

1134 for struct in file_model.structs.values(): 

1135 for field in struct.fields: 

1136 if field.type: 

1137 old_type = field.type 

1138 new_type = self._update_type_string_for_renames(field.type, typedef_renames) 

1139 if new_type != old_type: 

1140 field.type = new_type 

1141 updated_count += 1 

1142 self.logger.debug( 

1143 "Updated struct field type '%s' -> '%s' for %s.%s", 

1144 old_type, new_type, struct.name, field.name 

1145 ) 

1146 

1147 # Update union field types 

1148 for union in file_model.unions.values(): 

1149 for field in union.fields: 

1150 if field.type: 

1151 old_type = field.type 

1152 new_type = self._update_type_string_for_renames(field.type, typedef_renames) 

1153 if new_type != old_type: 

1154 field.type = new_type 

1155 updated_count += 1 

1156 self.logger.debug( 

1157 "Updated union field type '%s' -> '%s' for %s.%s", 

1158 old_type, new_type, union.name, field.name 

1159 ) 

1160 

1161 if updated_count > 0: 

1162 self.logger.info( 

1163 "Updated %d type references for renamed typedefs in %s: %s", 

1164 updated_count, file_model.name, typedef_renames 

1165 ) 

1166 

1167 def _update_type_string_for_renames(self, type_str: str, typedef_renames: Dict[str, str]) -> str: 

1168 """Update a type string by replacing old typedef names with new ones""" 

1169 if not type_str or not typedef_renames: 

1170 return type_str 

1171 

1172 updated_type = type_str 

1173 for old_name, new_name in typedef_renames.items(): 

1174 # Use word boundaries to avoid partial matches 

1175 # This handles cases like "old_config_t *", "const old_config_t", etc. 

1176 pattern = r'\b' + re.escape(old_name) + r'\b' 

1177 updated_type = re.sub(pattern, new_name, updated_type) 

1178 

1179 return updated_type 

1180 

1181 def _rename_dict_elements( 

1182 self, 

1183 elements_dict: Dict[str, Any], 

1184 patterns_map: Dict[str, str], 

1185 create_renamed_element: Callable[[str, Any], Any], 

1186 element_type: str, 

1187 file_name: str 

1188 ) -> Dict[str, Any]: 

1189 """Generic method to rename dictionary elements with deduplication""" 

1190 original_count = len(elements_dict) 

1191 seen_names = set() 

1192 deduplicated_elements = {} 

1193 

1194 for name, element in elements_dict.items(): 

1195 # Apply rename patterns 

1196 new_name = self._apply_rename_patterns(name, patterns_map) 

1197 

1198 # Check for duplicates 

1199 if new_name in seen_names: 

1200 self.logger.debug( 

1201 "Deduplicating %s: removing duplicate '%s' (renamed from '%s')", 

1202 element_type, new_name, name 

1203 ) 

1204 continue 

1205 

1206 seen_names.add(new_name) 

1207 

1208 # Create updated element with new name 

1209 updated_element = create_renamed_element(new_name, element) 

1210 deduplicated_elements[new_name] = updated_element 

1211 

1212 removed_count = original_count - len(deduplicated_elements) 

1213 if removed_count > 0: 

1214 self.logger.info( 

1215 "Renamed %ss in %s, removed %d duplicates", element_type, file_name, removed_count 

1216 ) 

1217 

1218 return deduplicated_elements 

1219 

1220 def _rename_list_elements( 

1221 self, 

1222 elements_list: List[Any], 

1223 patterns_map: Dict[str, str], 

1224 get_element_name: Callable[[Any], str], 

1225 create_renamed_element: Callable[[str, Any], Any], 

1226 element_type: str, 

1227 file_name: str 

1228 ) -> List[Any]: 

1229 """Generic method to rename list elements with deduplication""" 

1230 original_count = len(elements_list) 

1231 seen_names = set() 

1232 deduplicated_elements = [] 

1233 

1234 for element in elements_list: 

1235 name = get_element_name(element) 

1236 # Apply rename patterns 

1237 new_name = self._apply_rename_patterns(name, patterns_map) 

1238 

1239 # Check for duplicates 

1240 if new_name in seen_names: 

1241 self.logger.debug( 

1242 "Deduplicating %s: removing duplicate '%s' (renamed from '%s')", 

1243 element_type, new_name, name 

1244 ) 

1245 continue 

1246 

1247 seen_names.add(new_name) 

1248 

1249 # Create updated element with new name 

1250 updated_element = create_renamed_element(new_name, element) 

1251 deduplicated_elements.append(updated_element) 

1252 

1253 removed_count = original_count - len(deduplicated_elements) 

1254 if removed_count > 0: 

1255 self.logger.info( 

1256 "Renamed %ss in %s, removed %d duplicates", element_type, file_name, removed_count 

1257 ) 

1258 

1259 return deduplicated_elements 

1260 

1261 def _apply_rename_patterns(self, original_name: str, patterns_map: Dict[str, str]) -> str: 

1262 """ 

1263 Apply rename patterns to an element name 

1264  

1265 Args: 

1266 original_name: Original element name 

1267 patterns_map: Dict mapping regex patterns to replacement strings 

1268  

1269 Returns: 

1270 Renamed element name (or original if no patterns match) 

1271 """ 

1272 for pattern, replacement in patterns_map.items(): 

1273 try: 

1274 # Apply regex substitution 

1275 new_name = re.sub(pattern, replacement, original_name) 

1276 if new_name != original_name: 

1277 self.logger.debug( 

1278 "Renamed '%s' to '%s' using pattern '%s'", 

1279 original_name, new_name, pattern 

1280 ) 

1281 return new_name 

1282 except re.error as e: 

1283 self.logger.warning( 

1284 "Invalid regex pattern '%s': %s", pattern, e 

1285 ) 

1286 continue 

1287 

1288 return original_name 

1289 

1290 def _rename_typedefs(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None: 

1291 """Rename typedefs with deduplication""" 

1292 if not patterns_map: 

1293 return 

1294 

1295 # Track old to new name mappings for type reference updates 

1296 typedef_renames = {} 

1297 

1298 def create_renamed_alias(name: str, alias: Alias) -> Alias: 

1299 return Alias(name, alias.original_type, alias.uses) 

1300 

1301 # Capture renames before applying them 

1302 for old_name in file_model.aliases: 

1303 new_name = self._apply_rename_patterns(old_name, patterns_map) 

1304 if new_name != old_name: 

1305 typedef_renames[old_name] = new_name 

1306 

1307 file_model.aliases = self._rename_dict_elements( 

1308 file_model.aliases, patterns_map, create_renamed_alias, "typedef", file_model.name 

1309 ) 

1310 

1311 # Update type references throughout the file 

1312 if typedef_renames: 

1313 self._update_type_references_for_renames(file_model, typedef_renames) 

1314 

1315 def _rename_functions(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None: 

1316 """Rename functions with deduplication""" 

1317 if not patterns_map: 

1318 return 

1319 

1320 def get_function_name(func: Function) -> str: 

1321 return func.name 

1322 

1323 def create_renamed_function(name: str, func: Function) -> Function: 

1324 return Function( 

1325 name, func.return_type, func.parameters, func.is_static, func.is_declaration 

1326 ) 

1327 

1328 file_model.functions = self._rename_list_elements( 

1329 file_model.functions, patterns_map, get_function_name, 

1330 create_renamed_function, "function", file_model.name 

1331 ) 

1332 

1333 def _rename_macros(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None: 

1334 """Rename macros with deduplication""" 

1335 if not patterns_map: 

1336 return 

1337 

1338 def get_macro_name(macro: str) -> str: 

1339 # Extract macro name from full macro definition 

1340 import re 

1341 if macro.startswith("#define "): 

1342 # Extract macro name using regex 

1343 match = re.search(r"#define\s+([A-Za-z_][A-Za-z0-9_]*)", macro) 

1344 if match: 

1345 return match.group(1) 

1346 return macro 

1347 

1348 def create_renamed_macro(name: str, macro: str) -> str: 

1349 # Replace the macro name in the full macro definition 

1350 import re 

1351 if macro.startswith("#define "): 

1352 # Use regex to replace the macro name while preserving parameters and value 

1353 # Pattern matches: #define MACRO_NAME or #define MACRO_NAME(params) 

1354 pattern = r"(#define\s+)([A-Za-z_][A-Za-z0-9_]*)(\s*\([^)]*\))?(.*)?" 

1355 match = re.match(pattern, macro) 

1356 if match: 

1357 define_part = match.group(1) # "#define " 

1358 old_name = match.group(2) # "OLD_NAME" 

1359 params = match.group(3) or "" # "(params)" or "" 

1360 rest = match.group(4) or "" # " value" or "" 

1361 return f"{define_part}{name}{params}{rest}" 

1362 return macro 

1363 

1364 file_model.macros = self._rename_list_elements( 

1365 file_model.macros, patterns_map, get_macro_name, 

1366 create_renamed_macro, "macro", file_model.name 

1367 ) 

1368 

1369 def _rename_globals(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None: 

1370 """Rename global variables with deduplication""" 

1371 if not patterns_map: 

1372 return 

1373 

1374 def get_global_name(global_var: Field) -> str: 

1375 return global_var.name 

1376 

1377 def create_renamed_global(name: str, global_var: Field) -> Field: 

1378 return Field(name, global_var.type) 

1379 

1380 file_model.globals = self._rename_list_elements( 

1381 file_model.globals, patterns_map, get_global_name, 

1382 create_renamed_global, "global", file_model.name 

1383 ) 

1384 

1385 def _rename_includes(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None: 

1386 """Rename includes with deduplication""" 

1387 if not patterns_map: 

1388 return 

1389 

1390 # Rename includes using set-based deduplication 

1391 file_model.includes = self._rename_set_elements( 

1392 file_model.includes, patterns_map, "include", file_model.name 

1393 ) 

1394 

1395 # Also update include_relations with new names 

1396 file_model.include_relations = self._rename_include_relations( 

1397 file_model.include_relations, patterns_map 

1398 ) 

1399 

1400 def _rename_set_elements( 

1401 self, 

1402 elements_set: Set[str], 

1403 patterns_map: Dict[str, str], 

1404 element_type: str, 

1405 file_name: str 

1406 ) -> Set[str]: 

1407 """Generic method to rename set elements with deduplication""" 

1408 original_count = len(elements_set) 

1409 seen_names = set() 

1410 deduplicated_elements = set() 

1411 

1412 for element in elements_set: 

1413 # Apply rename patterns 

1414 new_name = self._apply_rename_patterns(element, patterns_map) 

1415 

1416 # Check for duplicates 

1417 if new_name in seen_names: 

1418 self.logger.debug( 

1419 "Deduplicating %s: removing duplicate '%s' (renamed from '%s')", 

1420 element_type, new_name, element 

1421 ) 

1422 continue 

1423 

1424 seen_names.add(new_name) 

1425 deduplicated_elements.add(new_name) 

1426 

1427 removed_count = original_count - len(deduplicated_elements) 

1428 if removed_count > 0: 

1429 self.logger.info( 

1430 "Renamed %ss in %s, removed %d duplicates", element_type, file_name, removed_count 

1431 ) 

1432 

1433 return deduplicated_elements 

1434 

1435 def _rename_include_relations( 

1436 self, relations: List[IncludeRelation], patterns_map: Dict[str, str] 

1437 ) -> List[IncludeRelation]: 

1438 """Rename include relations with pattern mapping""" 

1439 updated_relations = [] 

1440 for relation in relations: 

1441 new_included_file = self._apply_rename_patterns(relation.included_file, patterns_map) 

1442 updated_relation = IncludeRelation( 

1443 relation.source_file, 

1444 new_included_file, 

1445 relation.depth 

1446 ) 

1447 updated_relations.append(updated_relation) 

1448 return updated_relations 

1449 

1450 def _rename_structs(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None: 

1451 """Rename structs with deduplication""" 

1452 if not patterns_map: 

1453 return 

1454 

1455 def create_renamed_struct(name: str, struct: Struct) -> Struct: 

1456 return Struct(name, struct.fields) 

1457 

1458 file_model.structs = self._rename_dict_elements( 

1459 file_model.structs, patterns_map, create_renamed_struct, "struct", file_model.name 

1460 ) 

1461 

1462 def _rename_enums(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None: 

1463 """Rename enums with deduplication""" 

1464 if not patterns_map: 

1465 return 

1466 

1467 def create_renamed_enum(name: str, enum: Enum) -> Enum: 

1468 return Enum(name, enum.values) 

1469 

1470 file_model.enums = self._rename_dict_elements( 

1471 file_model.enums, patterns_map, create_renamed_enum, "enum", file_model.name 

1472 ) 

1473 

1474 def _rename_unions(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None: 

1475 """Rename unions with deduplication""" 

1476 if not patterns_map: 

1477 return 

1478 

1479 def create_renamed_union(name: str, union: Union) -> Union: 

1480 return Union(name, union.fields) 

1481 

1482 file_model.unions = self._rename_dict_elements( 

1483 file_model.unions, patterns_map, create_renamed_union, "union", file_model.name 

1484 ) 

1485 

1486 def _rename_files(self, model: ProjectModel, patterns_map: Dict[str, str], target_files: Set[str]) -> ProjectModel: 

1487 """Rename files and update model.files keys""" 

1488 if not patterns_map: 

1489 return model 

1490 

1491 updated_files = {} 

1492 # Track mapping of old->new filenames (basenames) 

1493 file_rename_map: Dict[str, str] = {} 

1494 

1495 for file_path, file_model in model.files.items(): 

1496 # Only rename files in target_files 

1497 if file_path in target_files: 

1498 new_file_path = self._apply_rename_patterns(file_path, patterns_map) 

1499 

1500 if new_file_path != file_path: 

1501 # Update file_model.name to match new path 

1502 file_model.name = new_file_path 

1503 file_rename_map[Path(file_path).name] = Path(new_file_path).name 

1504 self.logger.debug("Renamed file: %s -> %s", file_path, new_file_path) 

1505 

1506 updated_files[new_file_path] = file_model 

1507 else: 

1508 # Keep original file unchanged 

1509 updated_files[file_path] = file_model 

1510 

1511 model.files = updated_files 

1512 

1513 # Propagate file renames to includes and include_relations across all files 

1514 if file_rename_map: 

1515 for fm in model.files.values(): 

1516 # Update includes set 

1517 if fm.includes: 

1518 new_includes: Set[str] = set() 

1519 for inc in fm.includes: 

1520 # Apply explicit rename map first; fallback to patterns map 

1521 inc_new = file_rename_map.get(inc, self._apply_rename_patterns(inc, patterns_map)) 

1522 new_includes.add(inc_new) 

1523 fm.includes = new_includes 

1524 

1525 # Update include_relations (both ends) 

1526 if fm.include_relations: 

1527 for rel in fm.include_relations: 

1528 src_new = file_rename_map.get(rel.source_file, self._apply_rename_patterns(rel.source_file, patterns_map)) 

1529 inc_new = file_rename_map.get(rel.included_file, self._apply_rename_patterns(rel.included_file, patterns_map)) 

1530 rel.source_file = src_new 

1531 rel.included_file = inc_new 

1532 

1533 return model 

1534 

1535 def _apply_additions( 

1536 self, model: ProjectModel, add_config: Dict[str, Any], target_files: Set[str] 

1537 ) -> ProjectModel: 

1538 """Apply addition transformations to selected files""" 

1539 self.logger.debug( 

1540 "Applying addition transformations to %d files", len(target_files) 

1541 ) 

1542 

1543 # Apply additions only to target files 

1544 for file_path in target_files: 

1545 if file_path in model.files: 

1546 # Apply addition logic here 

1547 # This would handle adding new elements like structs, enums, 

1548 # functions, etc. 

1549 self.logger.debug("Applying additions to file: %s", file_path) 

1550 

1551 return model 

1552 

1553 def _apply_removals( 

1554 self, model: ProjectModel, remove_config: Dict[str, Any], target_files: Set[str] 

1555 ) -> ProjectModel: 

1556 """Apply removal transformations to selected files""" 

1557 self.logger.debug( 

1558 "Applying removal transformations to %d files", len(target_files) 

1559 ) 

1560 

1561 # Apply removals only to target files 

1562 for file_path in target_files: 

1563 if file_path in model.files: 

1564 file_model = model.files[file_path] 

1565 self.logger.debug("Applying removals to file: %s", file_path) 

1566 self._apply_file_level_removals(file_model, remove_config) 

1567 

1568 return model 

1569 

1570 def _apply_file_level_removals( 

1571 self, file_model: FileModel, remove_config: Dict[str, Any] 

1572 ) -> None: 

1573 """Apply all removal operations to a single file""" 

1574 removal_operations = [ 

1575 ("typedef", self._remove_typedefs), 

1576 ("functions", self._remove_functions), 

1577 ("macros", self._remove_macros), 

1578 ("globals", self._remove_globals), 

1579 ("includes", self._remove_includes), 

1580 ("structs", self._remove_structs), 

1581 ("enums", self._remove_enums), 

1582 ("unions", self._remove_unions), 

1583 ] 

1584 

1585 for config_key, removal_method in removal_operations: 

1586 if config_key in remove_config: 

1587 removal_method(file_model, remove_config[config_key]) 

1588 

1589 def _remove_dict_elements( 

1590 self, 

1591 elements_dict: Dict[str, Any], 

1592 patterns: List[str], 

1593 element_type: str, 

1594 file_name: str 

1595 ) -> Dict[str, Any]: 

1596 """Generic method to remove dictionary elements matching patterns""" 

1597 if not patterns: 

1598 return elements_dict 

1599 

1600 original_count = len(elements_dict) 

1601 compiled_patterns = self._compile_patterns(patterns) 

1602 

1603 # Filter out elements that match any pattern 

1604 filtered_elements = {} 

1605 for name, element in elements_dict.items(): 

1606 if not self._matches_any_pattern(name, compiled_patterns): 

1607 filtered_elements[name] = element 

1608 else: 

1609 self.logger.debug("Removed %s: %s", element_type, name) 

1610 

1611 removed_count = original_count - len(filtered_elements) 

1612 if removed_count > 0: 

1613 self.logger.info( 

1614 "Removed %d %ss from %s", removed_count, element_type, file_name 

1615 ) 

1616 

1617 return filtered_elements 

1618 

1619 def _remove_list_elements( 

1620 self, 

1621 elements_list: List[Any], 

1622 patterns: List[str], 

1623 get_element_name: Callable[[Any], str], 

1624 element_type: str, 

1625 file_name: str 

1626 ) -> List[Any]: 

1627 """Generic method to remove list elements matching patterns""" 

1628 if not patterns: 

1629 return elements_list 

1630 

1631 original_count = len(elements_list) 

1632 compiled_patterns = self._compile_patterns(patterns) 

1633 

1634 # Filter out elements that match any pattern 

1635 filtered_elements = [] 

1636 for element in elements_list: 

1637 name = get_element_name(element) 

1638 if not self._matches_any_pattern(name, compiled_patterns): 

1639 filtered_elements.append(element) 

1640 else: 

1641 self.logger.debug("Removed %s: %s", element_type, name) 

1642 

1643 removed_count = original_count - len(filtered_elements) 

1644 if removed_count > 0: 

1645 self.logger.info( 

1646 "Removed %d %ss from %s", removed_count, element_type, file_name 

1647 ) 

1648 

1649 return filtered_elements 

1650 

1651 def _remove_typedefs(self, file_model: FileModel, patterns: List[str]) -> None: 

1652 """Remove typedefs matching regex patterns""" 

1653 file_model.aliases = self._remove_dict_elements( 

1654 file_model.aliases, patterns, "typedef", file_model.name 

1655 ) 

1656 

1657 def _remove_functions(self, file_model: FileModel, patterns: List[str]) -> None: 

1658 """Remove functions matching regex patterns""" 

1659 def get_function_name(func: Function) -> str: 

1660 return func.name 

1661 

1662 file_model.functions = self._remove_list_elements( 

1663 file_model.functions, patterns, get_function_name, "function", file_model.name 

1664 ) 

1665 

1666 def _remove_macros(self, file_model: FileModel, patterns: List[str]) -> None: 

1667 """Remove macros matching regex patterns""" 

1668 def get_macro_name(macro: str) -> str: 

1669 # Extract macro name from full macro definition 

1670 import re 

1671 if macro.startswith("#define "): 

1672 # Extract macro name using regex 

1673 match = re.search(r"#define\s+([A-Za-z_][A-Za-z0-9_]*)", macro) 

1674 if match: 

1675 return match.group(1) 

1676 return macro 

1677 

1678 file_model.macros = self._remove_list_elements( 

1679 file_model.macros, patterns, get_macro_name, "macro", file_model.name 

1680 ) 

1681 

1682 def _remove_globals(self, file_model: FileModel, patterns: List[str]) -> None: 

1683 """Remove global variables matching regex patterns""" 

1684 def get_global_name(global_var: Field) -> str: 

1685 return global_var.name 

1686 

1687 file_model.globals = self._remove_list_elements( 

1688 file_model.globals, patterns, get_global_name, "global variable", file_model.name 

1689 ) 

1690 

1691 def _remove_includes(self, file_model: FileModel, patterns: List[str]) -> None: 

1692 """Remove includes matching regex patterns""" 

1693 if not patterns: 

1694 return 

1695 

1696 original_count = len(file_model.includes) 

1697 compiled_patterns = self._compile_patterns(patterns) 

1698 

1699 # Filter out includes that match any pattern 

1700 filtered_includes = set() 

1701 for include in file_model.includes: 

1702 if not self._matches_any_pattern(include, compiled_patterns): 

1703 filtered_includes.add(include) 

1704 else: 

1705 self.logger.debug("Removed include: %s", include) 

1706 

1707 file_model.includes = filtered_includes 

1708 removed_count = original_count - len(file_model.includes) 

1709 

1710 # Also remove matching include_relations 

1711 if removed_count > 0: 

1712 self._remove_matching_include_relations(file_model, compiled_patterns, removed_count) 

1713 

1714 def _remove_matching_include_relations( 

1715 self, file_model: FileModel, compiled_patterns: List[Pattern[str]], removed_includes_count: int 

1716 ) -> None: 

1717 """Remove include relations that match the removed includes""" 

1718 original_relations_count = len(file_model.include_relations) 

1719 filtered_relations = [] 

1720 

1721 for relation in file_model.include_relations: 

1722 if not self._matches_any_pattern(relation.included_file, compiled_patterns): 

1723 filtered_relations.append(relation) 

1724 else: 

1725 self.logger.debug("Removed include relation: %s -> %s", 

1726 relation.source_file, relation.included_file) 

1727 

1728 file_model.include_relations = filtered_relations 

1729 removed_relations_count = original_relations_count - len(file_model.include_relations) 

1730 

1731 self.logger.info( 

1732 "Removed %d includes and %d include relations from %s", 

1733 removed_includes_count, removed_relations_count, file_model.name 

1734 ) 

1735 

1736 def _remove_structs(self, file_model: FileModel, patterns: List[str]) -> None: 

1737 """Remove structs matching regex patterns""" 

1738 file_model.structs = self._remove_dict_elements( 

1739 file_model.structs, patterns, "struct", file_model.name 

1740 ) 

1741 

1742 def _remove_enums(self, file_model: FileModel, patterns: List[str]) -> None: 

1743 """Remove enums matching regex patterns""" 

1744 file_model.enums = self._remove_dict_elements( 

1745 file_model.enums, patterns, "enum", file_model.name 

1746 ) 

1747 

1748 def _remove_unions(self, file_model: FileModel, patterns: List[str]) -> None: 

1749 """Remove unions matching regex patterns""" 

1750 file_model.unions = self._remove_dict_elements( 

1751 file_model.unions, patterns, "union", file_model.name 

1752 ) 

1753 

1754 def _should_include_file( 

1755 self, 

1756 file_path: str, 

1757 include_patterns: List[Pattern[str]], 

1758 exclude_patterns: List[Pattern[str]], 

1759 ) -> bool: 

1760 """Check if a file should be included based on filters""" 

1761 # Check include patterns 

1762 if include_patterns: 

1763 if not any(pattern.search(file_path) for pattern in include_patterns): 

1764 return False 

1765 

1766 # Check exclude patterns 

1767 if exclude_patterns: 

1768 if any(pattern.search(file_path) for pattern in exclude_patterns): 

1769 return False 

1770 

1771 return True 

1772 

1773 def _compile_patterns(self, patterns: List[str]) -> List[Pattern[str]]: 

1774 """Compile regex patterns with error handling""" 

1775 compiled_patterns: List[Pattern[str]] = [] 

1776 for pattern in patterns: 

1777 try: 

1778 compiled_patterns.append(re.compile(pattern)) 

1779 except re.error as e: 

1780 self.logger.warning("Invalid regex pattern '%s': %s", pattern, e) 

1781 return compiled_patterns 

1782 

1783 def _filter_dict(self, items: Dict[str, Any], filters: Dict[str, Any]) -> Dict[str, Any]: 

1784 """Filter a dictionary based on include/exclude patterns""" 

1785 include_patterns = self._compile_patterns(filters.get("include", [])) 

1786 exclude_patterns = self._compile_patterns(filters.get("exclude", [])) 

1787 

1788 filtered = {} 

1789 for name, item in items.items(): 

1790 # Check include patterns 

1791 if include_patterns: 

1792 if not any(pattern.search(name) for pattern in include_patterns): 

1793 continue 

1794 

1795 # Check exclude patterns 

1796 if exclude_patterns: 

1797 if any(pattern.search(name) for pattern in exclude_patterns): 

1798 continue 

1799 

1800 filtered[name] = item 

1801 

1802 return filtered 

1803 

1804 def _filter_list(self, items: List[Any], filters: Dict[str, Any], key: Optional[Callable[[Any], str]] = None) -> List[Any]: 

1805 """Filter a list based on include/exclude patterns""" 

1806 include_patterns = self._compile_patterns(filters.get("include", [])) 

1807 exclude_patterns = self._compile_patterns(filters.get("exclude", [])) 

1808 

1809 filtered = [] 

1810 for item in items: 

1811 item_name = key(item) if key else str(item) 

1812 

1813 # Check include patterns 

1814 if include_patterns: 

1815 if not any(pattern.search(item_name) for pattern in include_patterns): 

1816 continue 

1817 

1818 # Check exclude patterns 

1819 if exclude_patterns: 

1820 if any(pattern.search(item_name) for pattern in exclude_patterns): 

1821 continue 

1822 

1823 filtered.append(item) 

1824 

1825 return filtered 

1826 

1827 def _dict_to_file_model(self, data: Dict[str, Any]) -> FileModel: 

1828 """Convert dictionary back to FileModel""" 

1829 

1830 # Convert structs 

1831 structs = {} 

1832 for name, struct_data in data.get("structs", {}).items(): 

1833 fields = [ 

1834 Field(f["name"], f["type"]) for f in struct_data.get("fields", []) 

1835 ] 

1836 structs[name] = Struct( 

1837 name, 

1838 fields, 

1839 struct_data.get("methods", []), 

1840 struct_data.get("tag_name", ""), 

1841 struct_data.get("uses", []), 

1842 ) 

1843 

1844 # Convert enums 

1845 enums = {} 

1846 for name, enum_data in data.get("enums", {}).items(): 

1847 values = [] 

1848 for value_data in enum_data.get("values", []): 

1849 if isinstance(value_data, dict): 

1850 values.append( 

1851 EnumValue(value_data["name"], value_data.get("value")) 

1852 ) 

1853 else: 

1854 values.append(EnumValue(value_data)) 

1855 enums[name] = Enum(name, values) 

1856 

1857 # Convert unions 

1858 unions = {} 

1859 for name, union_data in data.get("unions", {}).items(): 

1860 fields = [Field(f["name"], f["type"]) for f in union_data.get("fields", [])] 

1861 unions[name] = Union( 

1862 name, fields, union_data.get("tag_name", ""), union_data.get("uses", []) 

1863 ) 

1864 

1865 # Convert aliases 

1866 aliases = {} 

1867 for name, alias_data in data.get("aliases", {}).items(): 

1868 if isinstance(alias_data, dict): 

1869 aliases[name] = Alias( 

1870 alias_data.get("name", name), 

1871 alias_data.get("original_type", ""), 

1872 alias_data.get("uses", []), 

1873 ) 

1874 else: 

1875 # Handle legacy format where aliases was Dict[str, str] 

1876 aliases[name] = Alias(name, alias_data, []) 

1877 

1878 # Convert functions 

1879 functions = [] 

1880 for func_data in data.get("functions", []): 

1881 parameters = [ 

1882 Field(p["name"], p["type"]) for p in func_data.get("parameters", []) 

1883 ] 

1884 functions.append( 

1885 Function( 

1886 func_data["name"], 

1887 func_data["return_type"], 

1888 parameters, 

1889 is_static=func_data.get("is_static", False), 

1890 is_declaration=func_data.get("is_declaration", False), 

1891 ) 

1892 ) 

1893 

1894 # Convert globals 

1895 globals_list = [] 

1896 for global_data in data.get("globals", []): 

1897 globals_list.append(Field(global_data["name"], global_data["type"])) 

1898 

1899 return FileModel( 

1900 file_path=data["file_path"], 

1901 structs=structs, 

1902 enums=enums, 

1903 unions=unions, 

1904 functions=functions, 

1905 globals=globals_list, 

1906 includes=set(data.get("includes", [])), 

1907 macros=data.get("macros", []), 

1908 aliases=aliases, 

1909 anonymous_relationships=data.get("anonymous_relationships", {}), 

1910 ) 

1911 

1912 def _save_model(self, model: ProjectModel, output_file: str) -> None: 

1913 """Save model to JSON file""" 

1914 try: 

1915 model.save(output_file) 

1916 self.logger.debug("Model saved to: %s", output_file) 

1917 except Exception as e: 

1918 raise ValueError(f"Failed to save model to {output_file}: {e}") from e