Coverage for src/c2puml/core/transformer.py: 57%

1#!/usr/bin/env python3

2"""

3Transformer module for C to PlantUML converter - Step 2: Transform model based on

4configuration

5"""

7import json

8import logging

9import re

10from pathlib import Path

11from typing import Any, Callable, Dict, List, Optional, Pattern, Set, Tuple, Union as TypingUnion

12from collections import deque

14from ..models import (

15 Alias,

16 Enum,

17 EnumValue,

18 Field,

19 FileModel,

20 Function,

21 IncludeRelation,

22 ProjectModel,

23 Struct,

24 Union,

25)

28class Transformer:

29 """Main transformer class for Step 2: Transform model based on configuration"""

31 def __init__(self) -> None:

32 self.logger = logging.getLogger(__name__)

34 def transform(

35 self, model_file: str, config_file: str, output_file: Optional[str] = None

36 ) -> str:

37 """

38 Step 2: Transform model based on configuration

40 Args:

41 model_file: Input JSON model file path

42 config_file: Configuration file path

43 output_file: Output transformed model file path (optional, defaults to

44 model_file)

46 Returns:

47 Path to the transformed model file

48 """

49 self.logger.info("Step 2: Transforming model: %s", model_file)

51 # Load the model and configuration

52 model = self._load_model(model_file)

53 config = self._load_config(config_file)

55 # Apply transformations

56 transformed_model = self._apply_transformations(model, config)

58 # Save transformed model

59 output_path = output_file or model_file

60 self._save_model(transformed_model, output_path)

62 self.logger.info("Step 2 complete! Transformed model saved to: %s", output_path)

63 return output_path

65 def _load_model(self, model_file: str) -> ProjectModel:

66 """Load model from JSON file"""

67 model_path = Path(model_file)

68 if not model_path.exists():

69 raise FileNotFoundError(f"Model file not found: {model_file}")

71 try:

72 model = ProjectModel.load(model_file)

73 self.logger.debug("Loaded model with %d files", len(model.files))

74 return model

75 except Exception as e:

76 raise ValueError(f"Failed to load model from {model_file}: {e}") from e

78 def _load_config(self, config_file: str) -> Dict[str, Any]:

79 """Load configuration from JSON file"""

80 config_path = Path(config_file)

81 if not config_path.exists():

82 raise FileNotFoundError(f"Configuration file not found: {config_file}")

84 try:

85 with open(config_file, "r", encoding="utf-8") as f:

86 config = json.load(f)

88 self.logger.debug("Loaded configuration from: %s", config_file)

89 return config

91 except Exception as e:

92 raise ValueError(

93 f"Failed to load configuration from {config_file}: {e}"

94 ) from e

96 def _apply_transformations(

97 self, model: ProjectModel, config: Dict[str, Any]

98 ) -> ProjectModel:

99 """Apply all configured transformations to the model"""

100 self.logger.info("Applying transformations to model")

101

102 # Apply comprehensive file filtering (moved from parser)

103 if "file_filters" in config:

104 model = self._apply_file_filters(model, config["file_filters"])

105

106 # Support backward compatibility - convert single 'transformations' to container format

107 config = self._ensure_backward_compatibility(config)

108

109 # Discover and apply transformation containers

110 model = self._apply_transformation_containers(model, config)

111

112 # Apply simplified depth-based include processing

113 if self._should_process_include_relations(config):

114 model = self._process_include_relations_simplified(model, config)

115

116 self.logger.info(

117 "Transformations complete. Model now has %d files", len(model.files)

118 )

119 return model

120

121 def _apply_transformation_containers(

122 self, model: ProjectModel, config: Dict[str, Any]

123 ) -> ProjectModel:

124 """Discover and apply transformation containers in alphabetical order"""

125 transformation_containers = self._discover_transformation_containers(config)

126

127 if not transformation_containers:

128 return model

129

130 for container_name, transformation_config in transformation_containers:

131 self.logger.info("Applying transformation container: %s", container_name)

132 model = self._apply_single_transformation_container(

133 model, transformation_config, container_name

134 )

135 self._log_model_state_after_container(model, container_name)

136

137 return model

138

139 def _log_model_state_after_container(

140 self, model: ProjectModel, container_name: str

141 ) -> None:

142 """Log model state after applying a transformation container"""

143 total_elements = sum(

144 len(file_model.structs) + len(file_model.enums) + len(file_model.unions) +

145 len(file_model.functions) + len(file_model.globals) + len(file_model.macros) +

146 len(file_model.aliases)

147 for file_model in model.files.values()

148 )

149 self.logger.info(

150 "After %s: model contains %d files with %d total elements",

151 container_name, len(model.files), total_elements

152 )

153

154 def _should_process_include_relations(self, config: Dict[str, Any]) -> bool:

155 """Check if include relations should be processed based on global or file-specific settings"""

156 # Check global include_depth

157 if config.get("include_depth", 1) > 1:

158 return True

159

160 # Check file-specific include_depth settings

161 if "file_specific" in config:

162 for file_config in config["file_specific"].values():

163 if file_config.get("include_depth", 1) > 1:

164 return True

165

166 return False

167

168 def _discover_transformation_containers(self, config: Dict[str, Any]) -> List[Tuple[str, Dict[str, Any]]]:

169 """

170 Discover all transformation containers and sort them alphabetically

171

172 Returns:

173 List of (container_name, transformation_config) tuples sorted by name

174 """

175 transformation_containers = [

176 (key, value)

177 for key, value in config.items()

178 if key.startswith("transformations") and isinstance(value, dict)

179 ]

180

181 # Sort alphabetically by container name

182 transformation_containers.sort(key=lambda x: x[0])

183

184 self.logger.info(

185 "Discovered %d transformation containers: %s",

186 len(transformation_containers),

187 [name for name, _ in transformation_containers]

188 )

189

190 return transformation_containers

191

192 def _ensure_backward_compatibility(self, config: Dict[str, Any]) -> Dict[str, Any]:

193 """

194 Ensure backward compatibility by converting old single 'transformations'

195 to new container format

196 """

197 # Make a copy to avoid modifying the original

198 config = config.copy()

199

200 # Check if old format is used (single 'transformations' key)

201 if self._is_legacy_transformation_format(config):

202 self.logger.info("Converting legacy 'transformations' format to container format")

203

204 # Move old transformations to a default container

205 old_transformations = config.pop("transformations")

206 config["transformations_00_default"] = old_transformations

207

208 self.logger.debug("Converted to container: transformations_00_default")

209

210 return config

211

212 def _is_legacy_transformation_format(self, config: Dict[str, Any]) -> bool:

213 """Check if configuration uses legacy transformation format"""

214 return (

215 "transformations" in config and

216 not any(key.startswith("transformations_") for key in config.keys())

217 )

218

219 def _apply_single_transformation_container(

220 self,

221 model: ProjectModel,

222 transformation_config: Dict[str, Any],

223 container_name: str

224 ) -> ProjectModel:

225 """

226 Apply a single transformation container

227

228 Args:

229 model: Project model to transform

230 transformation_config: Single transformation container configuration

231 container_name: Name of the container for logging

232

233 Returns:

234 Transformed project model

235 """

236 self.logger.debug("Processing transformation container: %s", container_name)

237

238 # Determine target files for this container

239 target_files = self._get_target_files(model, transformation_config)

240

241 # Apply transformations in specific order: remove -> rename -> add

242 # This order ensures that removals happen first, then renaming with deduplication,

243 # then additions to the cleaned model

244 model = self._apply_remove_operations(model, transformation_config, target_files, container_name)

245 model = self._apply_rename_operations(model, transformation_config, target_files, container_name)

246 model = self._apply_add_operations(model, transformation_config, target_files, container_name)

247

248 return model

249

250 def _get_target_files(

251 self, model: ProjectModel, transformation_config: Dict[str, Any]

252 ) -> Set[str]:

253 """Determine which files to apply transformations to based on file_selection"""

254 selected_files = transformation_config.get("file_selection", [])

255

256 # Validate that file_selection is a list

257 if not isinstance(selected_files, list):

258 selected_files = []

259 self.logger.warning("Invalid file_selection format, must be a list, defaulting to empty list")

260

261 # Determine which files to apply transformations to

262 if not selected_files:

263 target_files = set(model.files.keys())

264 self.logger.debug("No file selection specified, applying to all %d files", len(target_files))

265 else:

266 target_files = self._match_files_by_patterns(model, selected_files)

267 self.logger.debug(

268 "File selection patterns %s matched %d files: %s",

269 selected_files, len(target_files), list(target_files)

270 )

271

272 return target_files

273

274 def _match_files_by_patterns(

275 self, model: ProjectModel, patterns: List[str]

276 ) -> Set[str]:

277 """Match files based on selection patterns"""

278 target_files = set()

279 for pattern in patterns:

280 for file_path in model.files.keys():

281 if self._matches_pattern(file_path, pattern):

282 target_files.add(file_path)

283 return target_files

284

285 def _apply_remove_operations(

286 self,

287 model: ProjectModel,

288 transformation_config: Dict[str, Any],

289 target_files: Set[str],

290 container_name: str

291 ) -> ProjectModel:

292 """Apply remove operations for a transformation container"""

293 if "remove" not in transformation_config:

294 return model

295

296 self.logger.debug("Applying remove operations for container: %s", container_name)

297

298 # Collect typedef names BEFORE removing them for type reference cleanup

299 removed_typedef_names = self._collect_typedef_names_for_removal(

300 model, transformation_config["remove"], target_files

301 )

302

303 model = self._apply_removals(model, transformation_config["remove"], target_files)

304

305 # Clean up type references after typedef removal using pre-collected names

306 if removed_typedef_names:

307 self.logger.debug("Calling type reference cleanup for container: %s", container_name)

308 self._cleanup_type_references_by_names(model, removed_typedef_names)

309

310 return model

311

312 def _apply_rename_operations(

313 self,

314 model: ProjectModel,

315 transformation_config: Dict[str, Any],

316 target_files: Set[str],

317 container_name: str

318 ) -> ProjectModel:

319 """Apply rename operations for a transformation container"""

320 if "rename" not in transformation_config:

321 return model

322

323 self.logger.debug("Applying rename operations for container: %s", container_name)

324 return self._apply_renaming(model, transformation_config["rename"], target_files)

325

326 def _apply_add_operations(

327 self,

328 model: ProjectModel,

329 transformation_config: Dict[str, Any],

330 target_files: Set[str],

331 container_name: str

332 ) -> ProjectModel:

333 """Apply add operations for a transformation container"""

334 if "add" not in transformation_config:

335 return model

336

337 self.logger.debug("Applying add operations for container: %s", container_name)

338 return self._apply_additions(model, transformation_config["add"], target_files)

339

340 def _collect_typedef_names_for_removal(

341 self,

342 model: ProjectModel,

343 remove_config: Dict[str, Any],

344 target_files: Set[str]

345 ) -> Set[str]:

346 """Collect typedef names that will be removed for type reference cleanup"""

347 removed_typedef_names = set()

348

349 if "typedef" not in remove_config:

350 return removed_typedef_names

351

352 typedef_patterns = remove_config["typedef"]

353 compiled_patterns = self._compile_patterns(typedef_patterns)

354

355 if not compiled_patterns:

356 return removed_typedef_names

357

358 for file_path in target_files:

359 if file_path in model.files:

360 file_model = model.files[file_path]

361 for alias_name in file_model.aliases.keys():

362 if self._matches_any_pattern(alias_name, compiled_patterns):

363 removed_typedef_names.add(alias_name)

364

365 self.logger.debug("Pre-identified typedefs for removal: %s", list(removed_typedef_names))

366 return removed_typedef_names

367

368 def _process_include_relations_simplified(

369 self, model: ProjectModel, config: Dict[str, Any]

370 ) -> ProjectModel:

371 """

372 Simplified include processing following a structured depth-based approach:

373 1. Each include structure has a single root C file

374 2. Process C file's direct includes through filters first

375 3. Then recursively process header files' includes with filtering

376 4. Continue until include_depth is reached

377 """

378 global_include_depth = config.get("include_depth", 1)

379 file_specific_config = config.get("file_specific", {})

380 include_filter_local_only = config.get("include_filter_local_only", False)

381 always_show_includes = config.get("always_show_includes", False)

382

383 self.logger.info(

384 "Processing includes with simplified depth-based approach (global_depth=%d)",

385 global_include_depth

386 )

387

388 # Clear all existing include relations

389 for file_model in model.files.values():

390 file_model.include_relations = []

391

392 # Create filename to file model mapping for quick lookup

393 file_map = {}

394 for file_model in model.files.values():

395 filename = Path(file_model.name).name

396 file_map[filename] = file_model

397

398 # Process each C file as a root with its own include structure

399 c_files = [fm for fm in model.files.values() if fm.name.endswith(".c")]

400

401 for root_file in c_files:

402 self._process_root_c_file_includes(

403 root_file, file_map, global_include_depth, file_specific_config, include_filter_local_only, always_show_includes

404 )

405

406 return model

407

408 def _process_root_c_file_includes(

409 self,

410 root_file: FileModel,

411 file_map: Dict[str, FileModel],

412 global_include_depth: int,

413 file_specific_config: Dict[str, Any],

414 include_filter_local_only: bool,

415 always_show_includes: bool

416 ) -> None:

417 """

418 Process includes for a single root C file following the simplified approach:

419 - Start with root C file

420 - Apply filters at each depth level

421 - Process layer by layer until max depth reached

422 """

423 root_filename = Path(root_file.name).name

424

425 # Get file-specific settings or use global defaults

426 include_depth = global_include_depth

427 include_filters = []

428

429 if root_filename in file_specific_config:

430 file_config = file_specific_config[root_filename]

431 include_depth = file_config.get("include_depth", global_include_depth)

432 include_filters = file_config.get("include_filter", [])

433

434 # If configured to keep only local header, ensure filter pattern for local header is present

435 if include_filter_local_only:

436 local_header_pattern = f"^{Path(root_filename).stem}\\.h$"

437 if local_header_pattern not in include_filters:

438 include_filters.append(local_header_pattern)

439

440 # Skip processing if depth is 1 or less (no include relations needed)

441 if include_depth <= 1:

442 self.logger.debug(

443 "Skipping include processing for %s (depth=%d)",

444 root_filename, include_depth

445 )

446 return

447

448 # Compile filter patterns

449 compiled_filters = []

450 if include_filters:

451 try:

452 compiled_filters = [re.compile(pattern) for pattern in include_filters]

453 self.logger.debug(

454 "Compiled %d filter patterns for %s",

455 len(compiled_filters), root_filename

456 )

457 except re.error as e:

458 self.logger.warning(

459 "Invalid regex pattern for %s: %s", root_filename, e

460 )

461

462 self.logger.debug(

463 "Processing includes for root C file %s (depth=%d, filters=%d)",

464 root_filename, include_depth, len(compiled_filters)

465 )

466

467 # Track processed files to avoid cycles

468 processed_files = set()

469

470 # Reset placeholder headers for this root file context

471 try:

472 root_file.placeholder_headers.clear()

473 except Exception:

474 # In case the model was loaded without this field

475 root_file.placeholder_headers = set()

476

477 # Process includes level by level using BFS approach

478 current_level = [root_file] # Start with the root C file

479

480 for depth in range(1, include_depth + 1):

481 next_level = []

482

483 self.logger.debug(

484 "Processing depth %d for %s (%d files at current level)",

485 depth, root_filename, len(current_level)

486 )

487

488 for current_file in current_level:

489 current_filename = Path(current_file.name).name

490

491 # Skip if already processed to avoid cycles

492 if current_filename in processed_files:

493 continue

494 processed_files.add(current_filename)

495

496 # Process each include in the current file

497 for include_name in current_file.includes:

498 # Determine if this include is filtered out by patterns

499 filtered_out_by_patterns = False

500 if compiled_filters:

501 if not any(pattern.search(include_name) for pattern in compiled_filters):

502 if always_show_includes:

503 filtered_out_by_patterns = True

504 self.logger.debug(

505 "Include %s filtered by patterns at depth %d for %s, but will be shown as placeholder",

506 include_name, depth, root_filename

507 )

508 else:

509 self.logger.debug(

510 "Filtered out include %s at depth %d for %s",

511 include_name, depth, root_filename

512 )

513 continue

514

515 # Check if included file exists in our project

516 if include_name not in file_map:

517 self.logger.debug(

518 "Include %s not found in project files (depth %d, root %s)",

519 include_name, depth, root_filename

520 )

521 continue

522

523 # Prevent self-references

524 if include_name == current_filename:

525 self.logger.debug(

526 "Skipping self-reference %s at depth %d for %s",

527 include_name, depth, root_filename

528 )

529 continue

530

531 # Check for duplicate relations to prevent cycles

532 existing_relation = any(

533 rel.source_file == current_filename and rel.included_file == include_name

534 for rel in root_file.include_relations

535 )

536

537 if existing_relation:

538 self.logger.debug(

539 "Skipping duplicate relation %s -> %s for %s",

540 current_filename, include_name, root_filename

541 )

542 continue

543

544 # Prevent processing files that would create cycles (already processed)

545 if include_name in processed_files:

546 self.logger.debug(

547 "Skipping already processed file %s to prevent cycle for %s",

548 include_name, root_filename

549 )

550 continue

551

552 # Create and add the include relation to the root C file

553 relation = IncludeRelation(

554 source_file=current_filename,

555 included_file=include_name,

556 depth=depth

557 )

558 root_file.include_relations.append(relation)

559

560 self.logger.debug(

561 "Added include relation: %s -> %s (depth %d) for root %s",

562 current_filename, include_name, depth, root_filename

563 )

564

565 # If filtered out by patterns and always_show_includes is enabled, mark as placeholder

566 if filtered_out_by_patterns:

567 try:

568 root_file.placeholder_headers.add(include_name)

569 except Exception:

570 root_file.placeholder_headers = {include_name}

571 # Do not process further includes/content for this header

572 continue

573

574 # Add included file to next level for further processing

575 included_file = file_map[include_name]

576 if included_file not in next_level and include_name not in processed_files:

577 next_level.append(included_file)

578

579 # Move to next level for the next iteration

580 current_level = next_level

581

582 # Break if no more files to process

583 if not current_level:

584 self.logger.debug(

585 "No more files to process at depth %d for %s",

586 depth + 1, root_filename

587 )

588 break

589

590 self.logger.debug(

591 "Completed include processing for %s: %d relations generated",

592 root_filename, len(root_file.include_relations)

593 )

594

595 def _apply_file_filters(

596 self, model: ProjectModel, filters: Dict[str, Any]

597 ) -> ProjectModel:

598 """Apply user-configured file-level filters (important filtering already

599 done in parser)"""

600 include_patterns = self._compile_patterns(filters.get("include", []))

601 exclude_patterns = self._compile_patterns(filters.get("exclude", []))

602

603 if not include_patterns and not exclude_patterns:

604 return model

605

606 filtered_files = {}

607 for file_path, file_model in model.files.items():

608 if self._should_include_file(file_path, include_patterns, exclude_patterns):

609 filtered_files[file_path] = file_model

610

611 model.files = filtered_files

612 self.logger.debug(

613 "User file filtering: %d files after filtering", len(model.files)

614 )

615 return model

616

617 def _apply_include_filters(

618 self, model: ProjectModel, include_filters: Dict[str, List[str]]

619 ) -> ProjectModel:

620 """Apply include filters for each root file based on regex patterns

621

622 Args:

623 model: The project model to apply filters to

624 include_filters: Dictionary mapping root files to their include filter patterns

625 """

626 self.logger.info(

627 "Applying include filters for %d root files", len(include_filters)

628 )

629

630 # Compile regex patterns for each root file

631 compiled_filters = {}

632 for root_file, patterns in include_filters.items():

633 try:

634 compiled_filters[root_file] = [

635 re.compile(pattern) for pattern in patterns

636 ]

637 self.logger.debug(

638 "Compiled %d patterns for root file: %s", len(patterns), root_file

639 )

640 except re.error as e:

641 self.logger.warning(

642 "Invalid regex pattern for root file %s: %s", root_file, e

643 )

644 # Skip invalid patterns for this root file

645 continue

646

647 if not compiled_filters:

648 self.logger.warning(

649 "No valid include filters found, skipping include filtering"

650 )

651 return model

652

653 # Create a mapping from header files to their root C files

654 header_to_root = self._create_header_to_root_mapping(model)

655

656 # Apply filters to each file in the model

657 for file_path, file_model in model.files.items():

658 # Find the root file for this file

659 root_file = self._find_root_file_with_mapping(

660 file_path, file_model, header_to_root

661 )

662

663 if root_file in compiled_filters:

664 # Apply filtering (preserve includes arrays, filter include_relations)

665 self._filter_include_relations(

666 file_model, compiled_filters[root_file], root_file

667 )

668

669 return model

670

671 def _create_header_to_root_mapping(self, model: ProjectModel) -> Dict[str, str]:

672 """Create a mapping from header files to their root C files"""

673 header_to_root = {}

674

675 # First, map C files to themselves

676 c_files = []

677 for file_path, file_model in model.files.items():

678 if file_model.name.endswith(".c"):

679 header_to_root[file_model.name] = file_model.name

680 c_files.append(file_model.name)

681

682 # Then, map header files to their corresponding C files

683 for file_path, file_model in model.files.items():

684 if not file_model.name.endswith(".c"): # It's a header file

685 # Strategy 1: Look for a C file with the same base name

686 header_base_name = Path(file_model.name).stem

687 matching_c_file = header_base_name + ".c"

688

689 if matching_c_file in [Path(c_file).name for c_file in c_files]:

690 header_to_root[file_model.name] = matching_c_file

691 else:

692 # Strategy 2: Find which C file includes this header

693 including_c_files = []

694 for c_file_path, c_file_model in model.files.items():

695 if (c_file_model.name.endswith(".c") and

696 file_model.name in c_file_model.includes):

697 including_c_files.append(c_file_model.name)

698

699 if including_c_files:

700 # Use the first C file that includes this header

701 header_to_root[file_model.name] = including_c_files[0]

702 else:

703 # Strategy 3: Fallback to first available C file

704 if c_files:

705 header_to_root[file_model.name] = c_files[0]

706

707 return header_to_root

708

709 def _find_root_file_with_mapping(

710 self, file_path: str, file_model: FileModel, header_to_root: Dict[str, str]

711 ) -> str:

712 """Find the root C file for a given file using the header mapping"""

713 if file_model.name.endswith(".c"):

714 return file_model.name

715

716 # For header files, use the mapping

717 return header_to_root.get(file_model.name, file_model.name)

718

719 def _find_root_file(self, file_path: str, file_model: FileModel) -> str:

720 """Find the root C file for a given file"""

721 filename = Path(file_path).name

722

723 # If it's a .c file, it's its own root

724 if filename.endswith(".c"):

725 return filename

726

727 # For header files, find the corresponding .c file

728 base_name = Path(file_path).stem

729

730 # Look for a .c file with the same base name

731 if base_name and not filename.startswith("."):

732 return base_name + ".c"

733

734 # Fallback: use the filename as root (original behavior)

735 return filename

736

737 def _filter_include_relations(

738 self, file_model: FileModel, patterns: List[re.Pattern], root_file: str

739 ) -> None:

740 """Unified include_relations filtering, preserving includes arrays."""

741 self.logger.debug(

742 "Filtering include_relations for file %s (root: %s)", file_model.name, root_file

743 )

744

745 original_relations_count = len(file_model.include_relations)

746 filtered_relations: List[IncludeRelation] = []

747

748 for relation in file_model.include_relations:

749 if self._matches_any_pattern(relation.included_file, patterns):

750 filtered_relations.append(relation)

751 else:

752 self.logger.debug(

753 "Filtered out include relation: %s -> %s (root: %s)",

754 relation.source_file,

755 relation.included_file,

756 root_file,

757 )

758

759 file_model.include_relations = filtered_relations

760

761 self.logger.debug(

762 "Include filtering for %s: relations %d->%d (includes preserved)",

763 file_model.name,

764 original_relations_count,

765 len(file_model.include_relations),

766 )

767

768 # Removed deprecated include filtering wrappers; use _filter_include_relations instead

769

770 def _matches_any_pattern(self, text: str, patterns: List[Pattern[str]]) -> bool:

771 """Check if text matches any of the given regex patterns"""

772 return any(pattern.search(text) for pattern in patterns)

773

774 def _matches_pattern(self, text: str, pattern: str) -> bool:

775 """Safe regex match for a single pattern string"""

776 try:

777 return bool(re.search(pattern, text))

778 except re.error as e:

779 self.logger.warning("Invalid regex pattern '%s': %s", pattern, e)

780 return False

781

782 def _apply_model_transformations(

783 self, model: ProjectModel, transformations: Dict[str, Any]

784 ) -> ProjectModel:

785 """Apply model-level transformations with file selection support"""

786 # Get file selection configuration

787 selected_files = transformations.get("file_selection", [])

788

789 # Validate that file_selection is a list

790 if not isinstance(selected_files, list):

791 selected_files = []

792 self.logger.warning("Invalid file_selection format, must be a list, defaulting to empty list")

793

794 # Determine which files to apply transformations to

795 # If selected_files is empty or not specified, apply to all files

796 if not selected_files:

797 target_files = set(model.files.keys())

798 self.logger.debug("No file selection specified, applying to all %d files", len(target_files))

799 else:

800 # Apply only to selected files

801 target_files = set()

802 for pattern in selected_files:

803 for file_path in model.files.keys():

804 if self._matches_pattern(file_path, pattern):

805 target_files.add(file_path)

806

807 self.logger.debug(

808 "File selection patterns %s matched %d files: %s",

809 selected_files, len(target_files), list(target_files)

810 )

811

812 self.logger.debug(

813 "Applying transformations to %d files: %s",

814 len(target_files),

815 list(target_files),

816 )

817

818 # Rename elements

819 if "rename" in transformations:

820 model = self._apply_renaming(model, transformations["rename"], target_files)

821

822 # Add elements

823 if "add" in transformations:

824 model = self._apply_additions(model, transformations["add"], target_files)

825

826 # Remove elements

827 if "remove" in transformations:

828 model = self._apply_removals(model, transformations["remove"], target_files)

829

830 # Clean up type references after typedef removal

831 if "typedef" in transformations["remove"]:

832 self._cleanup_type_references(model, transformations["remove"]["typedef"], target_files)

833

834 return model

835

836 def _apply_renaming(

837 self, model: ProjectModel, rename_config: Dict[str, Any], target_files: Set[str]

838 ) -> ProjectModel:

839 """Apply renaming transformations to selected files"""

840 self.logger.debug(

841 "Applying renaming transformations to %d files", len(target_files)

842 )

843

844 # Apply renaming only to target files

845 for file_path in target_files:

846 if file_path in model.files:

847 file_model = model.files[file_path]

848 self.logger.debug("Applying renaming to file: %s", file_path)

849 self._apply_file_level_renaming(file_model, rename_config)

850

851 # Apply file renaming (affects model.files keys)

852 if "files" in rename_config:

853 model = self._rename_files(model, rename_config["files"], target_files)

854

855 return model

856

857 def _apply_file_level_renaming(

858 self, file_model: FileModel, rename_config: Dict[str, Any]

859 ) -> None:

860 """Apply all renaming operations to a single file"""

861 rename_operations = [

862 ("typedef", self._rename_typedefs),

863 ("functions", self._rename_functions),

864 ("macros", self._rename_macros),

865 ("globals", self._rename_globals),

866 ("includes", self._rename_includes),

867 ("structs", self._rename_structs),

868 ("enums", self._rename_enums),

869 ("unions", self._rename_unions),

870 ]

871

872 for config_key, rename_method in rename_operations:

873 if config_key in rename_config:

874 rename_method(file_model, rename_config[config_key])

875

876 def _cleanup_type_references(

877 self, model: ProjectModel, removed_typedef_patterns: List[str], target_files: Set[str]

878 ) -> None:

879 """

880 Clean up type references after typedef removal

881

882 This method removes type references that point to removed typedefs from:

883 - Function parameters and return types

884 - Global variable types

885 - Struct field types

886 """

887 self.logger.debug("Starting type reference cleanup with patterns: %s, target_files: %s",

888 removed_typedef_patterns, list(target_files))

889

890 if not removed_typedef_patterns:

891 self.logger.debug("No typedef patterns to clean up")

892 return

893

894 compiled_patterns = self._compile_patterns(removed_typedef_patterns)

895 if not compiled_patterns:

896 self.logger.debug("No valid compiled patterns")

897 return

898

899 # Track removed type names for cleanup

900 removed_types = set()

901

902 # First, collect all removed typedef names from all target files

903 for file_path in target_files:

904 if file_path in model.files:

905 file_model = model.files[file_path]

906

907 # Check what typedefs would be removed from this file

908 for alias_name in list(file_model.aliases.keys()):

909 if self._matches_any_pattern(alias_name, compiled_patterns):

910 removed_types.add(alias_name)

911 self.logger.debug("Found removed typedef: %s in file %s", alias_name, file_path)

912

913 self.logger.debug("Total removed types identified: %s", list(removed_types))

914

915 # Clean up type references across all files since typedefs can be used anywhere

916 cleaned_count = 0

917 for file_path, file_model in model.files.items():

918 file_cleaned = 0

919

920 # Clean function parameter and return types

921 for func in file_model.functions:

922 # Clean return type

923 if func.return_type and self._contains_removed_type(func.return_type, removed_types):

924 old_type = func.return_type

925 func.return_type = self._remove_type_references(func.return_type, removed_types)

926 if func.return_type != old_type:

927 file_cleaned += 1

928 self.logger.debug(

929 "Cleaned return type '%s' -> '%s' in function %s",

930 old_type, func.return_type, func.name

931 )

932

933 # Clean parameter types

934 for param in func.parameters:

935 if param.type and self._contains_removed_type(param.type, removed_types):

936 old_type = param.type

937 param.type = self._remove_type_references(param.type, removed_types)

938 if param.type != old_type:

939 file_cleaned += 1

940 self.logger.debug(

941 "Cleaned parameter type '%s' -> '%s' for parameter %s",

942 old_type, param.type, param.name

943 )

944

945 # Clean global variable types

946 for global_var in file_model.globals:

947 if global_var.type and self._contains_removed_type(global_var.type, removed_types):

948 old_type = global_var.type

949 global_var.type = self._remove_type_references(global_var.type, removed_types)

950 if global_var.type != old_type:

951 file_cleaned += 1

952 self.logger.debug(

953 "Cleaned global variable type '%s' -> '%s' for %s",

954 old_type, global_var.type, global_var.name

955 )

956

957 # Clean struct field types

958 for struct in file_model.structs.values():

959 for field in struct.fields:

960 if field.type and self._contains_removed_type(field.type, removed_types):

961 old_type = field.type

962 field.type = self._remove_type_references(field.type, removed_types)

963 if field.type != old_type:

964 file_cleaned += 1

965 self.logger.debug(

966 "Cleaned struct field type '%s' -> '%s' for %s.%s",

967 old_type, field.type, struct.name, field.name

968 )

969

970 cleaned_count += file_cleaned

971

972 if cleaned_count > 0:

973 self.logger.info(

974 "Cleaned %d type references to removed typedefs: %s",

975 cleaned_count, list(removed_types)

976 )

977

978 def _contains_removed_type(self, type_str: str, removed_types: Set[str]) -> bool:

979 """Check if a type string contains any of the removed types"""

980 if not type_str or not removed_types:

981 return False

982

983 # Check for removed type names in the type string

984 # This handles cases like "old_point_t *", "const old_config_t", etc.

985 for removed_type in removed_types:

986 if removed_type in type_str:

987 return True

988 return False

989

990 def _remove_type_references(self, type_str: str, removed_types: Set[str]) -> str:

991 """Remove references to removed types from a type string"""

992 if not type_str or not removed_types:

993 return type_str

994

995 cleaned_type = type_str

996 for removed_type in removed_types:

997 if removed_type in cleaned_type:

998 # Replace the removed type with "void" to maintain type safety

999 cleaned_type = cleaned_type.replace(removed_type, "void")

1000

1001 # Clean up any double spaces or other artifacts

1002 cleaned_type = " ".join(cleaned_type.split())

1003 return cleaned_type

1004

1005 def _cleanup_type_references_by_names(

1006 self, model: ProjectModel, removed_typedef_names: Set[str]

1007 ) -> None:

1008 """

1009 Clean up type references using pre-collected typedef names

1010

1011 This method removes type references that point to removed typedefs from:

1012 - Function parameters and return types

1013 - Global variable types

1014 - Struct field types

1015 """

1016 if not removed_typedef_names:

1017 self.logger.debug("No removed typedef names provided")

1018 return

1019

1020 self.logger.debug("Cleaning type references for removed typedefs: %s", list(removed_typedef_names))

1021

1022 # Clean up type references across all files since typedefs can be used anywhere

1023 cleaned_count = 0

1024 for file_path, file_model in model.files.items():

1025 file_cleaned = 0

1026

1027 # Clean function parameter and return types

1028 for func in file_model.functions:

1029 # Clean return type

1030 if func.return_type and self._contains_removed_type(func.return_type, removed_typedef_names):

1031 old_type = func.return_type

1032 func.return_type = self._remove_type_references(func.return_type, removed_typedef_names)

1033 if func.return_type != old_type:

1034 file_cleaned += 1

1035 self.logger.debug(

1036 "Cleaned return type '%s' -> '%s' in function %s",

1037 old_type, func.return_type, func.name

1038 )

1039

1040 # Clean parameter types

1041 for param in func.parameters:

1042 if param.type and self._contains_removed_type(param.type, removed_typedef_names):

1043 old_type = param.type

1044 param.type = self._remove_type_references(param.type, removed_typedef_names)

1045 if param.type != old_type:

1046 file_cleaned += 1

1047 self.logger.debug(

1048 "Cleaned parameter type '%s' -> '%s' for parameter %s",

1049 old_type, param.type, param.name

1050 )

1051

1052 # Clean global variable types

1053 for global_var in file_model.globals:

1054 if global_var.type and self._contains_removed_type(global_var.type, removed_typedef_names):

1055 old_type = global_var.type

1056 global_var.type = self._remove_type_references(global_var.type, removed_typedef_names)

1057 if global_var.type != old_type:

1058 file_cleaned += 1

1059 self.logger.debug(

1060 "Cleaned global variable type '%s' -> '%s' for %s",

1061 old_type, global_var.type, global_var.name

1062 )

1063

1064 # Clean struct field types

1065 for struct in file_model.structs.values():

1066 for field in struct.fields:

1067 if field.type and self._contains_removed_type(field.type, removed_typedef_names):

1068 old_type = field.type

1069 field.type = self._remove_type_references(field.type, removed_typedef_names)

1070 if field.type != old_type:

1071 file_cleaned += 1

1072 self.logger.debug(

1073 "Cleaned struct field type '%s' -> '%s' for %s.%s",

1074 old_type, field.type, struct.name, field.name

1075 )

1076

1077 cleaned_count += file_cleaned

1078 if file_cleaned > 0:

1079 self.logger.debug("Cleaned %d type references in file %s", file_cleaned, file_path)

1080

1081 if cleaned_count > 0:

1082 self.logger.info(

1083 "Cleaned %d type references to removed typedefs: %s",

1084 cleaned_count, list(removed_typedef_names)

1085 )

1086 else:

1087 self.logger.debug("No type references found to clean up")

1088

1089 def _update_type_references_for_renames(self, file_model: FileModel, typedef_renames: Dict[str, str]) -> None:

1090 """Update all type references when typedefs are renamed"""

1091 updated_count = 0

1092

1093 # Update function return types and parameter types

1094 for func in file_model.functions:

1095 # Update return type

1096 if func.return_type:

1097 old_type = func.return_type

1098 new_type = self._update_type_string_for_renames(func.return_type, typedef_renames)

1099 if new_type != old_type:

1100 func.return_type = new_type

1101 updated_count += 1

1102 self.logger.debug(

1103 "Updated return type '%s' -> '%s' in function %s",

1104 old_type, new_type, func.name

1105 )

1106

1107 # Update parameter types

1108 for param in func.parameters:

1109 if param.type:

1110 old_type = param.type

1111 new_type = self._update_type_string_for_renames(param.type, typedef_renames)

1112 if new_type != old_type:

1113 param.type = new_type

1114 updated_count += 1

1115 self.logger.debug(

1116 "Updated parameter type '%s' -> '%s' for parameter %s in function %s",

1117 old_type, new_type, param.name, func.name

1118 )

1119

1120 # Update global variable types

1121 for global_var in file_model.globals:

1122 if global_var.type:

1123 old_type = global_var.type

1124 new_type = self._update_type_string_for_renames(global_var.type, typedef_renames)

1125 if new_type != old_type:

1126 global_var.type = new_type

1127 updated_count += 1

1128 self.logger.debug(

1129 "Updated global variable type '%s' -> '%s' for %s",

1130 old_type, new_type, global_var.name

1131 )

1132

1133 # Update struct field types

1134 for struct in file_model.structs.values():

1135 for field in struct.fields:

1136 if field.type:

1137 old_type = field.type

1138 new_type = self._update_type_string_for_renames(field.type, typedef_renames)

1139 if new_type != old_type:

1140 field.type = new_type

1141 updated_count += 1

1142 self.logger.debug(

1143 "Updated struct field type '%s' -> '%s' for %s.%s",

1144 old_type, new_type, struct.name, field.name

1145 )

1146

1147 # Update union field types

1148 for union in file_model.unions.values():

1149 for field in union.fields:

1150 if field.type:

1151 old_type = field.type

1152 new_type = self._update_type_string_for_renames(field.type, typedef_renames)

1153 if new_type != old_type:

1154 field.type = new_type

1155 updated_count += 1

1156 self.logger.debug(

1157 "Updated union field type '%s' -> '%s' for %s.%s",

1158 old_type, new_type, union.name, field.name

1159 )

1160

1161 if updated_count > 0:

1162 self.logger.info(

1163 "Updated %d type references for renamed typedefs in %s: %s",

1164 updated_count, file_model.name, typedef_renames

1165 )

1166

1167 def _update_type_string_for_renames(self, type_str: str, typedef_renames: Dict[str, str]) -> str:

1168 """Update a type string by replacing old typedef names with new ones"""

1169 if not type_str or not typedef_renames:

1170 return type_str

1171

1172 updated_type = type_str

1173 for old_name, new_name in typedef_renames.items():

1174 # Use word boundaries to avoid partial matches

1175 # This handles cases like "old_config_t *", "const old_config_t", etc.

1176 pattern = r'\b' + re.escape(old_name) + r'\b'

1177 updated_type = re.sub(pattern, new_name, updated_type)

1178

1179 return updated_type

1180

1181 def _rename_dict_elements(

1182 self,

1183 elements_dict: Dict[str, Any],

1184 patterns_map: Dict[str, str],

1185 create_renamed_element: Callable[[str, Any], Any],

1186 element_type: str,

1187 file_name: str

1188 ) -> Dict[str, Any]:

1189 """Generic method to rename dictionary elements with deduplication"""

1190 original_count = len(elements_dict)

1191 seen_names = set()

1192 deduplicated_elements = {}

1193

1194 for name, element in elements_dict.items():

1195 # Apply rename patterns

1196 new_name = self._apply_rename_patterns(name, patterns_map)

1197

1198 # Check for duplicates

1199 if new_name in seen_names:

1200 self.logger.debug(

1201 "Deduplicating %s: removing duplicate '%s' (renamed from '%s')",

1202 element_type, new_name, name

1203 )

1204 continue

1205

1206 seen_names.add(new_name)

1207

1208 # Create updated element with new name

1209 updated_element = create_renamed_element(new_name, element)

1210 deduplicated_elements[new_name] = updated_element

1211

1212 removed_count = original_count - len(deduplicated_elements)

1213 if removed_count > 0:

1214 self.logger.info(

1215 "Renamed %ss in %s, removed %d duplicates", element_type, file_name, removed_count

1216 )

1217

1218 return deduplicated_elements

1219

1220 def _rename_list_elements(

1221 self,

1222 elements_list: List[Any],

1223 patterns_map: Dict[str, str],

1224 get_element_name: Callable[[Any], str],

1225 create_renamed_element: Callable[[str, Any], Any],

1226 element_type: str,

1227 file_name: str

1228 ) -> List[Any]:

1229 """Generic method to rename list elements with deduplication"""

1230 original_count = len(elements_list)

1231 seen_names = set()

1232 deduplicated_elements = []

1233

1234 for element in elements_list:

1235 name = get_element_name(element)

1236 # Apply rename patterns

1237 new_name = self._apply_rename_patterns(name, patterns_map)

1238

1239 # Check for duplicates

1240 if new_name in seen_names:

1241 self.logger.debug(

1242 "Deduplicating %s: removing duplicate '%s' (renamed from '%s')",

1243 element_type, new_name, name

1244 )

1245 continue

1246

1247 seen_names.add(new_name)

1248

1249 # Create updated element with new name

1250 updated_element = create_renamed_element(new_name, element)

1251 deduplicated_elements.append(updated_element)

1252

1253 removed_count = original_count - len(deduplicated_elements)

1254 if removed_count > 0:

1255 self.logger.info(

1256 "Renamed %ss in %s, removed %d duplicates", element_type, file_name, removed_count

1257 )

1258

1259 return deduplicated_elements

1260

1261 def _apply_rename_patterns(self, original_name: str, patterns_map: Dict[str, str]) -> str:

1262 """

1263 Apply rename patterns to an element name

1264

1265 Args:

1266 original_name: Original element name

1267 patterns_map: Dict mapping regex patterns to replacement strings

1268

1269 Returns:

1270 Renamed element name (or original if no patterns match)

1271 """

1272 for pattern, replacement in patterns_map.items():

1273 try:

1274 # Apply regex substitution

1275 new_name = re.sub(pattern, replacement, original_name)

1276 if new_name != original_name:

1277 self.logger.debug(

1278 "Renamed '%s' to '%s' using pattern '%s'",

1279 original_name, new_name, pattern

1280 )

1281 return new_name

1282 except re.error as e:

1283 self.logger.warning(

1284 "Invalid regex pattern '%s': %s", pattern, e

1285 )

1286 continue

1287

1288 return original_name

1289

1290 def _rename_typedefs(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None:

1291 """Rename typedefs with deduplication"""

1292 if not patterns_map:

1293 return

1294

1295 # Track old to new name mappings for type reference updates

1296 typedef_renames = {}

1297

1298 def create_renamed_alias(name: str, alias: Alias) -> Alias:

1299 return Alias(name, alias.original_type, alias.uses)

1300

1301 # Capture renames before applying them

1302 for old_name in file_model.aliases:

1303 new_name = self._apply_rename_patterns(old_name, patterns_map)

1304 if new_name != old_name:

1305 typedef_renames[old_name] = new_name

1306

1307 file_model.aliases = self._rename_dict_elements(

1308 file_model.aliases, patterns_map, create_renamed_alias, "typedef", file_model.name

1309 )

1310

1311 # Update type references throughout the file

1312 if typedef_renames:

1313 self._update_type_references_for_renames(file_model, typedef_renames)

1314

1315 def _rename_functions(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None:

1316 """Rename functions with deduplication"""

1317 if not patterns_map:

1318 return

1319

1320 def get_function_name(func: Function) -> str:

1321 return func.name

1322

1323 def create_renamed_function(name: str, func: Function) -> Function:

1324 return Function(

1325 name, func.return_type, func.parameters, func.is_static, func.is_declaration

1326 )

1327

1328 file_model.functions = self._rename_list_elements(

1329 file_model.functions, patterns_map, get_function_name,

1330 create_renamed_function, "function", file_model.name

1331 )

1332

1333 def _rename_macros(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None:

1334 """Rename macros with deduplication"""

1335 if not patterns_map:

1336 return

1337

1338 def get_macro_name(macro: str) -> str:

1339 # Extract macro name from full macro definition

1340 import re

1341 if macro.startswith("#define "):

1342 # Extract macro name using regex

1343 match = re.search(r"#define\s+([A-Za-z_][A-Za-z0-9_]*)", macro)

1344 if match:

1345 return match.group(1)

1346 return macro

1347

1348 def create_renamed_macro(name: str, macro: str) -> str:

1349 # Replace the macro name in the full macro definition

1350 import re

1351 if macro.startswith("#define "):

1352 # Use regex to replace the macro name while preserving parameters and value

1353 # Pattern matches: #define MACRO_NAME or #define MACRO_NAME(params)

1354 pattern = r"(#define\s+)([A-Za-z_][A-Za-z0-9_]*)(\s*$[^)]*$)?(.*)?"

1355 match = re.match(pattern, macro)

1356 if match:

1357 define_part = match.group(1) # "#define "

1358 old_name = match.group(2) # "OLD_NAME"

1359 params = match.group(3) or "" # "(params)" or ""

1360 rest = match.group(4) or "" # " value" or ""

1361 return f"{define_part}{name}{params}{rest}"

1362 return macro

1363

1364 file_model.macros = self._rename_list_elements(

1365 file_model.macros, patterns_map, get_macro_name,

1366 create_renamed_macro, "macro", file_model.name

1367 )

1368

1369 def _rename_globals(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None:

1370 """Rename global variables with deduplication"""

1371 if not patterns_map:

1372 return

1373

1374 def get_global_name(global_var: Field) -> str:

1375 return global_var.name

1376

1377 def create_renamed_global(name: str, global_var: Field) -> Field:

1378 return Field(name, global_var.type)

1379

1380 file_model.globals = self._rename_list_elements(

1381 file_model.globals, patterns_map, get_global_name,

1382 create_renamed_global, "global", file_model.name

1383 )

1384

1385 def _rename_includes(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None:

1386 """Rename includes with deduplication"""

1387 if not patterns_map:

1388 return

1389

1390 # Rename includes using set-based deduplication

1391 file_model.includes = self._rename_set_elements(

1392 file_model.includes, patterns_map, "include", file_model.name

1393 )

1394

1395 # Also update include_relations with new names

1396 file_model.include_relations = self._rename_include_relations(

1397 file_model.include_relations, patterns_map

1398 )

1399

1400 def _rename_set_elements(

1401 self,

1402 elements_set: Set[str],

1403 patterns_map: Dict[str, str],

1404 element_type: str,

1405 file_name: str

1406 ) -> Set[str]:

1407 """Generic method to rename set elements with deduplication"""

1408 original_count = len(elements_set)

1409 seen_names = set()

1410 deduplicated_elements = set()

1411

1412 for element in elements_set:

1413 # Apply rename patterns

1414 new_name = self._apply_rename_patterns(element, patterns_map)

1415

1416 # Check for duplicates

1417 if new_name in seen_names:

1418 self.logger.debug(

1419 "Deduplicating %s: removing duplicate '%s' (renamed from '%s')",

1420 element_type, new_name, element

1421 )

1422 continue

1423

1424 seen_names.add(new_name)

1425 deduplicated_elements.add(new_name)

1426

1427 removed_count = original_count - len(deduplicated_elements)

1428 if removed_count > 0:

1429 self.logger.info(

1430 "Renamed %ss in %s, removed %d duplicates", element_type, file_name, removed_count

1431 )

1432

1433 return deduplicated_elements

1434

1435 def _rename_include_relations(

1436 self, relations: List[IncludeRelation], patterns_map: Dict[str, str]

1437 ) -> List[IncludeRelation]:

1438 """Rename include relations with pattern mapping"""

1439 updated_relations = []

1440 for relation in relations:

1441 new_included_file = self._apply_rename_patterns(relation.included_file, patterns_map)

1442 updated_relation = IncludeRelation(

1443 relation.source_file,

1444 new_included_file,

1445 relation.depth

1446 )

1447 updated_relations.append(updated_relation)

1448 return updated_relations

1449

1450 def _rename_structs(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None:

1451 """Rename structs with deduplication"""

1452 if not patterns_map:

1453 return

1454

1455 def create_renamed_struct(name: str, struct: Struct) -> Struct:

1456 return Struct(name, struct.fields)

1457

1458 file_model.structs = self._rename_dict_elements(

1459 file_model.structs, patterns_map, create_renamed_struct, "struct", file_model.name

1460 )

1461

1462 def _rename_enums(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None:

1463 """Rename enums with deduplication"""

1464 if not patterns_map:

1465 return

1466

1467 def create_renamed_enum(name: str, enum: Enum) -> Enum:

1468 return Enum(name, enum.values)

1469

1470 file_model.enums = self._rename_dict_elements(

1471 file_model.enums, patterns_map, create_renamed_enum, "enum", file_model.name

1472 )

1473

1474 def _rename_unions(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None:

1475 """Rename unions with deduplication"""

1476 if not patterns_map:

1477 return

1478

1479 def create_renamed_union(name: str, union: Union) -> Union:

1480 return Union(name, union.fields)

1481

1482 file_model.unions = self._rename_dict_elements(

1483 file_model.unions, patterns_map, create_renamed_union, "union", file_model.name

1484 )

1485

1486 def _rename_files(self, model: ProjectModel, patterns_map: Dict[str, str], target_files: Set[str]) -> ProjectModel:

1487 """Rename files and update model.files keys"""

1488 if not patterns_map:

1489 return model

1490

1491 updated_files = {}

1492 # Track mapping of old->new filenames (basenames)

1493 file_rename_map: Dict[str, str] = {}

1494

1495 for file_path, file_model in model.files.items():

1496 # Only rename files in target_files

1497 if file_path in target_files:

1498 new_file_path = self._apply_rename_patterns(file_path, patterns_map)

1499

1500 if new_file_path != file_path:

1501 # Update file_model.name to match new path

1502 file_model.name = new_file_path

1503 file_rename_map[Path(file_path).name] = Path(new_file_path).name

1504 self.logger.debug("Renamed file: %s -> %s", file_path, new_file_path)

1505

1506 updated_files[new_file_path] = file_model

1507 else:

1508 # Keep original file unchanged

1509 updated_files[file_path] = file_model

1510

1511 model.files = updated_files

1512

1513 # Propagate file renames to includes and include_relations across all files

1514 if file_rename_map:

1515 for fm in model.files.values():

1516 # Update includes set

1517 if fm.includes:

1518 new_includes: Set[str] = set()

1519 for inc in fm.includes:

1520 # Apply explicit rename map first; fallback to patterns map

1521 inc_new = file_rename_map.get(inc, self._apply_rename_patterns(inc, patterns_map))

1522 new_includes.add(inc_new)

1523 fm.includes = new_includes

1524

1525 # Update include_relations (both ends)

1526 if fm.include_relations:

1527 for rel in fm.include_relations:

1528 src_new = file_rename_map.get(rel.source_file, self._apply_rename_patterns(rel.source_file, patterns_map))

1529 inc_new = file_rename_map.get(rel.included_file, self._apply_rename_patterns(rel.included_file, patterns_map))

1530 rel.source_file = src_new

1531 rel.included_file = inc_new

1532

1533 return model

1534

1535 def _apply_additions(

1536 self, model: ProjectModel, add_config: Dict[str, Any], target_files: Set[str]

1537 ) -> ProjectModel:

1538 """Apply addition transformations to selected files"""

1539 self.logger.debug(

1540 "Applying addition transformations to %d files", len(target_files)

1541 )

1542

1543 # Apply additions only to target files

1544 for file_path in target_files:

1545 if file_path in model.files:

1546 # Apply addition logic here

1547 # This would handle adding new elements like structs, enums,

1548 # functions, etc.

1549 self.logger.debug("Applying additions to file: %s", file_path)

1550

1551 return model

1552

1553 def _apply_removals(

1554 self, model: ProjectModel, remove_config: Dict[str, Any], target_files: Set[str]

1555 ) -> ProjectModel:

1556 """Apply removal transformations to selected files"""

1557 self.logger.debug(

1558 "Applying removal transformations to %d files", len(target_files)

1559 )

1560

1561 # Apply removals only to target files

1562 for file_path in target_files:

1563 if file_path in model.files:

1564 file_model = model.files[file_path]

1565 self.logger.debug("Applying removals to file: %s", file_path)

1566 self._apply_file_level_removals(file_model, remove_config)

1567

1568 return model

1569

1570 def _apply_file_level_removals(

1571 self, file_model: FileModel, remove_config: Dict[str, Any]

1572 ) -> None:

1573 """Apply all removal operations to a single file"""

1574 removal_operations = [

1575 ("typedef", self._remove_typedefs),

1576 ("functions", self._remove_functions),

1577 ("macros", self._remove_macros),

1578 ("globals", self._remove_globals),

1579 ("includes", self._remove_includes),

1580 ("structs", self._remove_structs),

1581 ("enums", self._remove_enums),

1582 ("unions", self._remove_unions),

1583 ]

1584

1585 for config_key, removal_method in removal_operations:

1586 if config_key in remove_config:

1587 removal_method(file_model, remove_config[config_key])

1588

1589 def _remove_dict_elements(

1590 self,

1591 elements_dict: Dict[str, Any],

1592 patterns: List[str],

1593 element_type: str,

1594 file_name: str

1595 ) -> Dict[str, Any]:

1596 """Generic method to remove dictionary elements matching patterns"""

1597 if not patterns:

1598 return elements_dict

1599

1600 original_count = len(elements_dict)

1601 compiled_patterns = self._compile_patterns(patterns)

1602

1603 # Filter out elements that match any pattern

1604 filtered_elements = {}

1605 for name, element in elements_dict.items():

1606 if not self._matches_any_pattern(name, compiled_patterns):

1607 filtered_elements[name] = element

1608 else:

1609 self.logger.debug("Removed %s: %s", element_type, name)

1610

1611 removed_count = original_count - len(filtered_elements)

1612 if removed_count > 0:

1613 self.logger.info(

1614 "Removed %d %ss from %s", removed_count, element_type, file_name

1615 )

1616

1617 return filtered_elements

1618

1619 def _remove_list_elements(

1620 self,

1621 elements_list: List[Any],

1622 patterns: List[str],

1623 get_element_name: Callable[[Any], str],

1624 element_type: str,

1625 file_name: str

1626 ) -> List[Any]:

1627 """Generic method to remove list elements matching patterns"""

1628 if not patterns:

1629 return elements_list

1630

1631 original_count = len(elements_list)

1632 compiled_patterns = self._compile_patterns(patterns)

1633

1634 # Filter out elements that match any pattern

1635 filtered_elements = []

1636 for element in elements_list:

1637 name = get_element_name(element)

1638 if not self._matches_any_pattern(name, compiled_patterns):

1639 filtered_elements.append(element)

1640 else:

1641 self.logger.debug("Removed %s: %s", element_type, name)

1642

1643 removed_count = original_count - len(filtered_elements)

1644 if removed_count > 0:

1645 self.logger.info(

1646 "Removed %d %ss from %s", removed_count, element_type, file_name

1647 )

1648

1649 return filtered_elements

1650

1651 def _remove_typedefs(self, file_model: FileModel, patterns: List[str]) -> None:

1652 """Remove typedefs matching regex patterns"""

1653 file_model.aliases = self._remove_dict_elements(

1654 file_model.aliases, patterns, "typedef", file_model.name

1655 )

1656

1657 def _remove_functions(self, file_model: FileModel, patterns: List[str]) -> None:

1658 """Remove functions matching regex patterns"""

1659 def get_function_name(func: Function) -> str:

1660 return func.name

1661

1662 file_model.functions = self._remove_list_elements(

1663 file_model.functions, patterns, get_function_name, "function", file_model.name

1664 )

1665

1666 def _remove_macros(self, file_model: FileModel, patterns: List[str]) -> None:

1667 """Remove macros matching regex patterns"""

1668 def get_macro_name(macro: str) -> str:

1669 # Extract macro name from full macro definition

1670 import re

1671 if macro.startswith("#define "):

1672 # Extract macro name using regex

1673 match = re.search(r"#define\s+([A-Za-z_][A-Za-z0-9_]*)", macro)

1674 if match:

1675 return match.group(1)

1676 return macro

1677

1678 file_model.macros = self._remove_list_elements(

1679 file_model.macros, patterns, get_macro_name, "macro", file_model.name

1680 )

1681

1682 def _remove_globals(self, file_model: FileModel, patterns: List[str]) -> None:

1683 """Remove global variables matching regex patterns"""

1684 def get_global_name(global_var: Field) -> str:

1685 return global_var.name

1686

1687 file_model.globals = self._remove_list_elements(

1688 file_model.globals, patterns, get_global_name, "global variable", file_model.name

1689 )

1690

1691 def _remove_includes(self, file_model: FileModel, patterns: List[str]) -> None:

1692 """Remove includes matching regex patterns"""

1693 if not patterns:

1694 return

1695

1696 original_count = len(file_model.includes)

1697 compiled_patterns = self._compile_patterns(patterns)

1698

1699 # Filter out includes that match any pattern

1700 filtered_includes = set()

1701 for include in file_model.includes:

1702 if not self._matches_any_pattern(include, compiled_patterns):

1703 filtered_includes.add(include)

1704 else:

1705 self.logger.debug("Removed include: %s", include)

1706

1707 file_model.includes = filtered_includes

1708 removed_count = original_count - len(file_model.includes)

1709

1710 # Also remove matching include_relations

1711 if removed_count > 0:

1712 self._remove_matching_include_relations(file_model, compiled_patterns, removed_count)

1713

1714 def _remove_matching_include_relations(

1715 self, file_model: FileModel, compiled_patterns: List[Pattern[str]], removed_includes_count: int

1716 ) -> None:

1717 """Remove include relations that match the removed includes"""

1718 original_relations_count = len(file_model.include_relations)

1719 filtered_relations = []

1720

1721 for relation in file_model.include_relations:

1722 if not self._matches_any_pattern(relation.included_file, compiled_patterns):

1723 filtered_relations.append(relation)

1724 else:

1725 self.logger.debug("Removed include relation: %s -> %s",

1726 relation.source_file, relation.included_file)

1727

1728 file_model.include_relations = filtered_relations

1729 removed_relations_count = original_relations_count - len(file_model.include_relations)

1730

1731 self.logger.info(

1732 "Removed %d includes and %d include relations from %s",

1733 removed_includes_count, removed_relations_count, file_model.name

1734 )

1735

1736 def _remove_structs(self, file_model: FileModel, patterns: List[str]) -> None:

1737 """Remove structs matching regex patterns"""

1738 file_model.structs = self._remove_dict_elements(

1739 file_model.structs, patterns, "struct", file_model.name

1740 )

1741

1742 def _remove_enums(self, file_model: FileModel, patterns: List[str]) -> None:

1743 """Remove enums matching regex patterns"""

1744 file_model.enums = self._remove_dict_elements(

1745 file_model.enums, patterns, "enum", file_model.name

1746 )

1747

1748 def _remove_unions(self, file_model: FileModel, patterns: List[str]) -> None:

1749 """Remove unions matching regex patterns"""

1750 file_model.unions = self._remove_dict_elements(

1751 file_model.unions, patterns, "union", file_model.name

1752 )

1753

1754 def _should_include_file(

1755 self,

1756 file_path: str,

1757 include_patterns: List[Pattern[str]],

1758 exclude_patterns: List[Pattern[str]],

1759 ) -> bool:

1760 """Check if a file should be included based on filters"""

1761 # Check include patterns

1762 if include_patterns:

1763 if not any(pattern.search(file_path) for pattern in include_patterns):

1764 return False

1765

1766 # Check exclude patterns

1767 if exclude_patterns:

1768 if any(pattern.search(file_path) for pattern in exclude_patterns):

1769 return False

1770

1771 return True

1772

1773 def _compile_patterns(self, patterns: List[str]) -> List[Pattern[str]]:

1774 """Compile regex patterns with error handling"""

1775 compiled_patterns: List[Pattern[str]] = []

1776 for pattern in patterns:

1777 try:

1778 compiled_patterns.append(re.compile(pattern))

1779 except re.error as e:

1780 self.logger.warning("Invalid regex pattern '%s': %s", pattern, e)

1781 return compiled_patterns

1782

1783 def _filter_dict(self, items: Dict[str, Any], filters: Dict[str, Any]) -> Dict[str, Any]:

1784 """Filter a dictionary based on include/exclude patterns"""

1785 include_patterns = self._compile_patterns(filters.get("include", []))

1786 exclude_patterns = self._compile_patterns(filters.get("exclude", []))

1787

1788 filtered = {}

1789 for name, item in items.items():

1790 # Check include patterns

1791 if include_patterns:

1792 if not any(pattern.search(name) for pattern in include_patterns):

1793 continue

1794

1795 # Check exclude patterns

1796 if exclude_patterns:

1797 if any(pattern.search(name) for pattern in exclude_patterns):

1798 continue

1799

1800 filtered[name] = item

1801

1802 return filtered

1803

1804 def _filter_list(self, items: List[Any], filters: Dict[str, Any], key: Optional[Callable[[Any], str]] = None) -> List[Any]:

1805 """Filter a list based on include/exclude patterns"""

1806 include_patterns = self._compile_patterns(filters.get("include", []))

1807 exclude_patterns = self._compile_patterns(filters.get("exclude", []))

1808

1809 filtered = []

1810 for item in items:

1811 item_name = key(item) if key else str(item)

1812

1813 # Check include patterns

1814 if include_patterns:

1815 if not any(pattern.search(item_name) for pattern in include_patterns):

1816 continue

1817

1818 # Check exclude patterns

1819 if exclude_patterns:

1820 if any(pattern.search(item_name) for pattern in exclude_patterns):

1821 continue

1822

1823 filtered.append(item)

1824

1825 return filtered

1826

1827 def _dict_to_file_model(self, data: Dict[str, Any]) -> FileModel:

1828 """Convert dictionary back to FileModel"""

1829

1830 # Convert structs

1831 structs = {}

1832 for name, struct_data in data.get("structs", {}).items():

1833 fields = [

1834 Field(f["name"], f["type"]) for f in struct_data.get("fields", [])

1835 ]

1836 structs[name] = Struct(

1837 name,

1838 fields,

1839 struct_data.get("methods", []),

1840 struct_data.get("tag_name", ""),

1841 struct_data.get("uses", []),

1842 )

1843

1844 # Convert enums

1845 enums = {}

1846 for name, enum_data in data.get("enums", {}).items():

1847 values = []

1848 for value_data in enum_data.get("values", []):

1849 if isinstance(value_data, dict):

1850 values.append(

1851 EnumValue(value_data["name"], value_data.get("value"))

1852 )

1853 else:

1854 values.append(EnumValue(value_data))

1855 enums[name] = Enum(name, values)

1856

1857 # Convert unions

1858 unions = {}

1859 for name, union_data in data.get("unions", {}).items():

1860 fields = [Field(f["name"], f["type"]) for f in union_data.get("fields", [])]

1861 unions[name] = Union(

1862 name, fields, union_data.get("tag_name", ""), union_data.get("uses", [])

1863 )

1864

1865 # Convert aliases

1866 aliases = {}

1867 for name, alias_data in data.get("aliases", {}).items():

1868 if isinstance(alias_data, dict):

1869 aliases[name] = Alias(

1870 alias_data.get("name", name),

1871 alias_data.get("original_type", ""),

1872 alias_data.get("uses", []),

1873 )

1874 else:

1875 # Handle legacy format where aliases was Dict[str, str]

1876 aliases[name] = Alias(name, alias_data, [])

1877

1878 # Convert functions

1879 functions = []

1880 for func_data in data.get("functions", []):

1881 parameters = [

1882 Field(p["name"], p["type"]) for p in func_data.get("parameters", [])

1883 ]

1884 functions.append(

1885 Function(

1886 func_data["name"],

1887 func_data["return_type"],

1888 parameters,

1889 is_static=func_data.get("is_static", False),

1890 is_declaration=func_data.get("is_declaration", False),

1891 )

1892 )

1893

1894 # Convert globals

1895 globals_list = []

1896 for global_data in data.get("globals", []):

1897 globals_list.append(Field(global_data["name"], global_data["type"]))

1898

1899 return FileModel(

1900 file_path=data["file_path"],

1901 structs=structs,

1902 enums=enums,

1903 unions=unions,

1904 functions=functions,

1905 globals=globals_list,

1906 includes=set(data.get("includes", [])),

1907 macros=data.get("macros", []),

1908 aliases=aliases,

1909 anonymous_relationships=data.get("anonymous_relationships", {}),

1910 )

1911

1912 def _save_model(self, model: ProjectModel, output_file: str) -> None:

1913 """Save model to JSON file"""

1914 try:

1915 model.save(output_file)

1916 self.logger.debug("Model saved to: %s", output_file)

1917 except Exception as e:

1918 raise ValueError(f"Failed to save model to {output_file}: {e}") from e