Coverage for src/c2puml/core/transformer.py: 57%
873 statements
« prev ^ index » next coverage.py v7.10.4, created at 2025-08-20 03:53 +0000
« prev ^ index » next coverage.py v7.10.4, created at 2025-08-20 03:53 +0000
1#!/usr/bin/env python3
2"""
3Transformer module for C to PlantUML converter - Step 2: Transform model based on
4configuration
5"""
7import json
8import logging
9import re
10from pathlib import Path
11from typing import Any, Callable, Dict, List, Optional, Pattern, Set, Tuple, Union as TypingUnion
12from collections import deque
14from ..models import (
15 Alias,
16 Enum,
17 EnumValue,
18 Field,
19 FileModel,
20 Function,
21 IncludeRelation,
22 ProjectModel,
23 Struct,
24 Union,
25)
28class Transformer:
29 """Main transformer class for Step 2: Transform model based on configuration"""
31 def __init__(self) -> None:
32 self.logger = logging.getLogger(__name__)
34 def transform(
35 self, model_file: str, config_file: str, output_file: Optional[str] = None
36 ) -> str:
37 """
38 Step 2: Transform model based on configuration
40 Args:
41 model_file: Input JSON model file path
42 config_file: Configuration file path
43 output_file: Output transformed model file path (optional, defaults to
44 model_file)
46 Returns:
47 Path to the transformed model file
48 """
49 self.logger.info("Step 2: Transforming model: %s", model_file)
51 # Load the model and configuration
52 model = self._load_model(model_file)
53 config = self._load_config(config_file)
55 # Apply transformations
56 transformed_model = self._apply_transformations(model, config)
58 # Save transformed model
59 output_path = output_file or model_file
60 self._save_model(transformed_model, output_path)
62 self.logger.info("Step 2 complete! Transformed model saved to: %s", output_path)
63 return output_path
65 def _load_model(self, model_file: str) -> ProjectModel:
66 """Load model from JSON file"""
67 model_path = Path(model_file)
68 if not model_path.exists():
69 raise FileNotFoundError(f"Model file not found: {model_file}")
71 try:
72 model = ProjectModel.load(model_file)
73 self.logger.debug("Loaded model with %d files", len(model.files))
74 return model
75 except Exception as e:
76 raise ValueError(f"Failed to load model from {model_file}: {e}") from e
78 def _load_config(self, config_file: str) -> Dict[str, Any]:
79 """Load configuration from JSON file"""
80 config_path = Path(config_file)
81 if not config_path.exists():
82 raise FileNotFoundError(f"Configuration file not found: {config_file}")
84 try:
85 with open(config_file, "r", encoding="utf-8") as f:
86 config = json.load(f)
88 self.logger.debug("Loaded configuration from: %s", config_file)
89 return config
91 except Exception as e:
92 raise ValueError(
93 f"Failed to load configuration from {config_file}: {e}"
94 ) from e
96 def _apply_transformations(
97 self, model: ProjectModel, config: Dict[str, Any]
98 ) -> ProjectModel:
99 """Apply all configured transformations to the model"""
100 self.logger.info("Applying transformations to model")
102 # Apply comprehensive file filtering (moved from parser)
103 if "file_filters" in config:
104 model = self._apply_file_filters(model, config["file_filters"])
106 # Support backward compatibility - convert single 'transformations' to container format
107 config = self._ensure_backward_compatibility(config)
109 # Discover and apply transformation containers
110 model = self._apply_transformation_containers(model, config)
112 # Apply simplified depth-based include processing
113 if self._should_process_include_relations(config):
114 model = self._process_include_relations_simplified(model, config)
116 self.logger.info(
117 "Transformations complete. Model now has %d files", len(model.files)
118 )
119 return model
121 def _apply_transformation_containers(
122 self, model: ProjectModel, config: Dict[str, Any]
123 ) -> ProjectModel:
124 """Discover and apply transformation containers in alphabetical order"""
125 transformation_containers = self._discover_transformation_containers(config)
127 if not transformation_containers:
128 return model
130 for container_name, transformation_config in transformation_containers:
131 self.logger.info("Applying transformation container: %s", container_name)
132 model = self._apply_single_transformation_container(
133 model, transformation_config, container_name
134 )
135 self._log_model_state_after_container(model, container_name)
137 return model
139 def _log_model_state_after_container(
140 self, model: ProjectModel, container_name: str
141 ) -> None:
142 """Log model state after applying a transformation container"""
143 total_elements = sum(
144 len(file_model.structs) + len(file_model.enums) + len(file_model.unions) +
145 len(file_model.functions) + len(file_model.globals) + len(file_model.macros) +
146 len(file_model.aliases)
147 for file_model in model.files.values()
148 )
149 self.logger.info(
150 "After %s: model contains %d files with %d total elements",
151 container_name, len(model.files), total_elements
152 )
154 def _should_process_include_relations(self, config: Dict[str, Any]) -> bool:
155 """Check if include relations should be processed based on global or file-specific settings"""
156 # Check global include_depth
157 if config.get("include_depth", 1) > 1:
158 return True
160 # Check file-specific include_depth settings
161 if "file_specific" in config:
162 for file_config in config["file_specific"].values():
163 if file_config.get("include_depth", 1) > 1:
164 return True
166 return False
168 def _discover_transformation_containers(self, config: Dict[str, Any]) -> List[Tuple[str, Dict[str, Any]]]:
169 """
170 Discover all transformation containers and sort them alphabetically
172 Returns:
173 List of (container_name, transformation_config) tuples sorted by name
174 """
175 transformation_containers = [
176 (key, value)
177 for key, value in config.items()
178 if key.startswith("transformations") and isinstance(value, dict)
179 ]
181 # Sort alphabetically by container name
182 transformation_containers.sort(key=lambda x: x[0])
184 self.logger.info(
185 "Discovered %d transformation containers: %s",
186 len(transformation_containers),
187 [name for name, _ in transformation_containers]
188 )
190 return transformation_containers
192 def _ensure_backward_compatibility(self, config: Dict[str, Any]) -> Dict[str, Any]:
193 """
194 Ensure backward compatibility by converting old single 'transformations'
195 to new container format
196 """
197 # Make a copy to avoid modifying the original
198 config = config.copy()
200 # Check if old format is used (single 'transformations' key)
201 if self._is_legacy_transformation_format(config):
202 self.logger.info("Converting legacy 'transformations' format to container format")
204 # Move old transformations to a default container
205 old_transformations = config.pop("transformations")
206 config["transformations_00_default"] = old_transformations
208 self.logger.debug("Converted to container: transformations_00_default")
210 return config
212 def _is_legacy_transformation_format(self, config: Dict[str, Any]) -> bool:
213 """Check if configuration uses legacy transformation format"""
214 return (
215 "transformations" in config and
216 not any(key.startswith("transformations_") for key in config.keys())
217 )
219 def _apply_single_transformation_container(
220 self,
221 model: ProjectModel,
222 transformation_config: Dict[str, Any],
223 container_name: str
224 ) -> ProjectModel:
225 """
226 Apply a single transformation container
228 Args:
229 model: Project model to transform
230 transformation_config: Single transformation container configuration
231 container_name: Name of the container for logging
233 Returns:
234 Transformed project model
235 """
236 self.logger.debug("Processing transformation container: %s", container_name)
238 # Determine target files for this container
239 target_files = self._get_target_files(model, transformation_config)
241 # Apply transformations in specific order: remove -> rename -> add
242 # This order ensures that removals happen first, then renaming with deduplication,
243 # then additions to the cleaned model
244 model = self._apply_remove_operations(model, transformation_config, target_files, container_name)
245 model = self._apply_rename_operations(model, transformation_config, target_files, container_name)
246 model = self._apply_add_operations(model, transformation_config, target_files, container_name)
248 return model
250 def _get_target_files(
251 self, model: ProjectModel, transformation_config: Dict[str, Any]
252 ) -> Set[str]:
253 """Determine which files to apply transformations to based on file_selection"""
254 selected_files = transformation_config.get("file_selection", [])
256 # Validate that file_selection is a list
257 if not isinstance(selected_files, list):
258 selected_files = []
259 self.logger.warning("Invalid file_selection format, must be a list, defaulting to empty list")
261 # Determine which files to apply transformations to
262 if not selected_files:
263 target_files = set(model.files.keys())
264 self.logger.debug("No file selection specified, applying to all %d files", len(target_files))
265 else:
266 target_files = self._match_files_by_patterns(model, selected_files)
267 self.logger.debug(
268 "File selection patterns %s matched %d files: %s",
269 selected_files, len(target_files), list(target_files)
270 )
272 return target_files
274 def _match_files_by_patterns(
275 self, model: ProjectModel, patterns: List[str]
276 ) -> Set[str]:
277 """Match files based on selection patterns"""
278 target_files = set()
279 for pattern in patterns:
280 for file_path in model.files.keys():
281 if self._matches_pattern(file_path, pattern):
282 target_files.add(file_path)
283 return target_files
285 def _apply_remove_operations(
286 self,
287 model: ProjectModel,
288 transformation_config: Dict[str, Any],
289 target_files: Set[str],
290 container_name: str
291 ) -> ProjectModel:
292 """Apply remove operations for a transformation container"""
293 if "remove" not in transformation_config:
294 return model
296 self.logger.debug("Applying remove operations for container: %s", container_name)
298 # Collect typedef names BEFORE removing them for type reference cleanup
299 removed_typedef_names = self._collect_typedef_names_for_removal(
300 model, transformation_config["remove"], target_files
301 )
303 model = self._apply_removals(model, transformation_config["remove"], target_files)
305 # Clean up type references after typedef removal using pre-collected names
306 if removed_typedef_names:
307 self.logger.debug("Calling type reference cleanup for container: %s", container_name)
308 self._cleanup_type_references_by_names(model, removed_typedef_names)
310 return model
312 def _apply_rename_operations(
313 self,
314 model: ProjectModel,
315 transformation_config: Dict[str, Any],
316 target_files: Set[str],
317 container_name: str
318 ) -> ProjectModel:
319 """Apply rename operations for a transformation container"""
320 if "rename" not in transformation_config:
321 return model
323 self.logger.debug("Applying rename operations for container: %s", container_name)
324 return self._apply_renaming(model, transformation_config["rename"], target_files)
326 def _apply_add_operations(
327 self,
328 model: ProjectModel,
329 transformation_config: Dict[str, Any],
330 target_files: Set[str],
331 container_name: str
332 ) -> ProjectModel:
333 """Apply add operations for a transformation container"""
334 if "add" not in transformation_config:
335 return model
337 self.logger.debug("Applying add operations for container: %s", container_name)
338 return self._apply_additions(model, transformation_config["add"], target_files)
340 def _collect_typedef_names_for_removal(
341 self,
342 model: ProjectModel,
343 remove_config: Dict[str, Any],
344 target_files: Set[str]
345 ) -> Set[str]:
346 """Collect typedef names that will be removed for type reference cleanup"""
347 removed_typedef_names = set()
349 if "typedef" not in remove_config:
350 return removed_typedef_names
352 typedef_patterns = remove_config["typedef"]
353 compiled_patterns = self._compile_patterns(typedef_patterns)
355 if not compiled_patterns:
356 return removed_typedef_names
358 for file_path in target_files:
359 if file_path in model.files:
360 file_model = model.files[file_path]
361 for alias_name in file_model.aliases.keys():
362 if self._matches_any_pattern(alias_name, compiled_patterns):
363 removed_typedef_names.add(alias_name)
365 self.logger.debug("Pre-identified typedefs for removal: %s", list(removed_typedef_names))
366 return removed_typedef_names
368 def _process_include_relations_simplified(
369 self, model: ProjectModel, config: Dict[str, Any]
370 ) -> ProjectModel:
371 """
372 Simplified include processing following a structured depth-based approach:
373 1. Each include structure has a single root C file
374 2. Process C file's direct includes through filters first
375 3. Then recursively process header files' includes with filtering
376 4. Continue until include_depth is reached
377 """
378 global_include_depth = config.get("include_depth", 1)
379 file_specific_config = config.get("file_specific", {})
380 include_filter_local_only = config.get("include_filter_local_only", False)
381 always_show_includes = config.get("always_show_includes", False)
383 self.logger.info(
384 "Processing includes with simplified depth-based approach (global_depth=%d)",
385 global_include_depth
386 )
388 # Clear all existing include relations
389 for file_model in model.files.values():
390 file_model.include_relations = []
392 # Create filename to file model mapping for quick lookup
393 file_map = {}
394 for file_model in model.files.values():
395 filename = Path(file_model.name).name
396 file_map[filename] = file_model
398 # Process each C file as a root with its own include structure
399 c_files = [fm for fm in model.files.values() if fm.name.endswith(".c")]
401 for root_file in c_files:
402 self._process_root_c_file_includes(
403 root_file, file_map, global_include_depth, file_specific_config, include_filter_local_only, always_show_includes
404 )
406 return model
408 def _process_root_c_file_includes(
409 self,
410 root_file: FileModel,
411 file_map: Dict[str, FileModel],
412 global_include_depth: int,
413 file_specific_config: Dict[str, Any],
414 include_filter_local_only: bool,
415 always_show_includes: bool
416 ) -> None:
417 """
418 Process includes for a single root C file following the simplified approach:
419 - Start with root C file
420 - Apply filters at each depth level
421 - Process layer by layer until max depth reached
422 """
423 root_filename = Path(root_file.name).name
425 # Get file-specific settings or use global defaults
426 include_depth = global_include_depth
427 include_filters = []
429 if root_filename in file_specific_config:
430 file_config = file_specific_config[root_filename]
431 include_depth = file_config.get("include_depth", global_include_depth)
432 include_filters = file_config.get("include_filter", [])
434 # If configured to keep only local header, ensure filter pattern for local header is present
435 if include_filter_local_only:
436 local_header_pattern = f"^{Path(root_filename).stem}\\.h$"
437 if local_header_pattern not in include_filters:
438 include_filters.append(local_header_pattern)
440 # Skip processing if depth is 1 or less (no include relations needed)
441 if include_depth <= 1:
442 self.logger.debug(
443 "Skipping include processing for %s (depth=%d)",
444 root_filename, include_depth
445 )
446 return
448 # Compile filter patterns
449 compiled_filters = []
450 if include_filters:
451 try:
452 compiled_filters = [re.compile(pattern) for pattern in include_filters]
453 self.logger.debug(
454 "Compiled %d filter patterns for %s",
455 len(compiled_filters), root_filename
456 )
457 except re.error as e:
458 self.logger.warning(
459 "Invalid regex pattern for %s: %s", root_filename, e
460 )
462 self.logger.debug(
463 "Processing includes for root C file %s (depth=%d, filters=%d)",
464 root_filename, include_depth, len(compiled_filters)
465 )
467 # Track processed files to avoid cycles
468 processed_files = set()
470 # Reset placeholder headers for this root file context
471 try:
472 root_file.placeholder_headers.clear()
473 except Exception:
474 # In case the model was loaded without this field
475 root_file.placeholder_headers = set()
477 # Process includes level by level using BFS approach
478 current_level = [root_file] # Start with the root C file
480 for depth in range(1, include_depth + 1):
481 next_level = []
483 self.logger.debug(
484 "Processing depth %d for %s (%d files at current level)",
485 depth, root_filename, len(current_level)
486 )
488 for current_file in current_level:
489 current_filename = Path(current_file.name).name
491 # Skip if already processed to avoid cycles
492 if current_filename in processed_files:
493 continue
494 processed_files.add(current_filename)
496 # Process each include in the current file
497 for include_name in current_file.includes:
498 # Determine if this include is filtered out by patterns
499 filtered_out_by_patterns = False
500 if compiled_filters:
501 if not any(pattern.search(include_name) for pattern in compiled_filters):
502 if always_show_includes:
503 filtered_out_by_patterns = True
504 self.logger.debug(
505 "Include %s filtered by patterns at depth %d for %s, but will be shown as placeholder",
506 include_name, depth, root_filename
507 )
508 else:
509 self.logger.debug(
510 "Filtered out include %s at depth %d for %s",
511 include_name, depth, root_filename
512 )
513 continue
515 # Check if included file exists in our project
516 if include_name not in file_map:
517 self.logger.debug(
518 "Include %s not found in project files (depth %d, root %s)",
519 include_name, depth, root_filename
520 )
521 continue
523 # Prevent self-references
524 if include_name == current_filename:
525 self.logger.debug(
526 "Skipping self-reference %s at depth %d for %s",
527 include_name, depth, root_filename
528 )
529 continue
531 # Check for duplicate relations to prevent cycles
532 existing_relation = any(
533 rel.source_file == current_filename and rel.included_file == include_name
534 for rel in root_file.include_relations
535 )
537 if existing_relation:
538 self.logger.debug(
539 "Skipping duplicate relation %s -> %s for %s",
540 current_filename, include_name, root_filename
541 )
542 continue
544 # Prevent processing files that would create cycles (already processed)
545 if include_name in processed_files:
546 self.logger.debug(
547 "Skipping already processed file %s to prevent cycle for %s",
548 include_name, root_filename
549 )
550 continue
552 # Create and add the include relation to the root C file
553 relation = IncludeRelation(
554 source_file=current_filename,
555 included_file=include_name,
556 depth=depth
557 )
558 root_file.include_relations.append(relation)
560 self.logger.debug(
561 "Added include relation: %s -> %s (depth %d) for root %s",
562 current_filename, include_name, depth, root_filename
563 )
565 # If filtered out by patterns and always_show_includes is enabled, mark as placeholder
566 if filtered_out_by_patterns:
567 try:
568 root_file.placeholder_headers.add(include_name)
569 except Exception:
570 root_file.placeholder_headers = {include_name}
571 # Do not process further includes/content for this header
572 continue
574 # Add included file to next level for further processing
575 included_file = file_map[include_name]
576 if included_file not in next_level and include_name not in processed_files:
577 next_level.append(included_file)
579 # Move to next level for the next iteration
580 current_level = next_level
582 # Break if no more files to process
583 if not current_level:
584 self.logger.debug(
585 "No more files to process at depth %d for %s",
586 depth + 1, root_filename
587 )
588 break
590 self.logger.debug(
591 "Completed include processing for %s: %d relations generated",
592 root_filename, len(root_file.include_relations)
593 )
595 def _apply_file_filters(
596 self, model: ProjectModel, filters: Dict[str, Any]
597 ) -> ProjectModel:
598 """Apply user-configured file-level filters (important filtering already
599 done in parser)"""
600 include_patterns = self._compile_patterns(filters.get("include", []))
601 exclude_patterns = self._compile_patterns(filters.get("exclude", []))
603 if not include_patterns and not exclude_patterns:
604 return model
606 filtered_files = {}
607 for file_path, file_model in model.files.items():
608 if self._should_include_file(file_path, include_patterns, exclude_patterns):
609 filtered_files[file_path] = file_model
611 model.files = filtered_files
612 self.logger.debug(
613 "User file filtering: %d files after filtering", len(model.files)
614 )
615 return model
617 def _apply_include_filters(
618 self, model: ProjectModel, include_filters: Dict[str, List[str]]
619 ) -> ProjectModel:
620 """Apply include filters for each root file based on regex patterns
622 Args:
623 model: The project model to apply filters to
624 include_filters: Dictionary mapping root files to their include filter patterns
625 """
626 self.logger.info(
627 "Applying include filters for %d root files", len(include_filters)
628 )
630 # Compile regex patterns for each root file
631 compiled_filters = {}
632 for root_file, patterns in include_filters.items():
633 try:
634 compiled_filters[root_file] = [
635 re.compile(pattern) for pattern in patterns
636 ]
637 self.logger.debug(
638 "Compiled %d patterns for root file: %s", len(patterns), root_file
639 )
640 except re.error as e:
641 self.logger.warning(
642 "Invalid regex pattern for root file %s: %s", root_file, e
643 )
644 # Skip invalid patterns for this root file
645 continue
647 if not compiled_filters:
648 self.logger.warning(
649 "No valid include filters found, skipping include filtering"
650 )
651 return model
653 # Create a mapping from header files to their root C files
654 header_to_root = self._create_header_to_root_mapping(model)
656 # Apply filters to each file in the model
657 for file_path, file_model in model.files.items():
658 # Find the root file for this file
659 root_file = self._find_root_file_with_mapping(
660 file_path, file_model, header_to_root
661 )
663 if root_file in compiled_filters:
664 # Apply filtering (preserve includes arrays, filter include_relations)
665 self._filter_include_relations(
666 file_model, compiled_filters[root_file], root_file
667 )
669 return model
671 def _create_header_to_root_mapping(self, model: ProjectModel) -> Dict[str, str]:
672 """Create a mapping from header files to their root C files"""
673 header_to_root = {}
675 # First, map C files to themselves
676 c_files = []
677 for file_path, file_model in model.files.items():
678 if file_model.name.endswith(".c"):
679 header_to_root[file_model.name] = file_model.name
680 c_files.append(file_model.name)
682 # Then, map header files to their corresponding C files
683 for file_path, file_model in model.files.items():
684 if not file_model.name.endswith(".c"): # It's a header file
685 # Strategy 1: Look for a C file with the same base name
686 header_base_name = Path(file_model.name).stem
687 matching_c_file = header_base_name + ".c"
689 if matching_c_file in [Path(c_file).name for c_file in c_files]:
690 header_to_root[file_model.name] = matching_c_file
691 else:
692 # Strategy 2: Find which C file includes this header
693 including_c_files = []
694 for c_file_path, c_file_model in model.files.items():
695 if (c_file_model.name.endswith(".c") and
696 file_model.name in c_file_model.includes):
697 including_c_files.append(c_file_model.name)
699 if including_c_files:
700 # Use the first C file that includes this header
701 header_to_root[file_model.name] = including_c_files[0]
702 else:
703 # Strategy 3: Fallback to first available C file
704 if c_files:
705 header_to_root[file_model.name] = c_files[0]
707 return header_to_root
709 def _find_root_file_with_mapping(
710 self, file_path: str, file_model: FileModel, header_to_root: Dict[str, str]
711 ) -> str:
712 """Find the root C file for a given file using the header mapping"""
713 if file_model.name.endswith(".c"):
714 return file_model.name
716 # For header files, use the mapping
717 return header_to_root.get(file_model.name, file_model.name)
719 def _find_root_file(self, file_path: str, file_model: FileModel) -> str:
720 """Find the root C file for a given file"""
721 filename = Path(file_path).name
723 # If it's a .c file, it's its own root
724 if filename.endswith(".c"):
725 return filename
727 # For header files, find the corresponding .c file
728 base_name = Path(file_path).stem
730 # Look for a .c file with the same base name
731 if base_name and not filename.startswith("."):
732 return base_name + ".c"
734 # Fallback: use the filename as root (original behavior)
735 return filename
737 def _filter_include_relations(
738 self, file_model: FileModel, patterns: List[re.Pattern], root_file: str
739 ) -> None:
740 """Unified include_relations filtering, preserving includes arrays."""
741 self.logger.debug(
742 "Filtering include_relations for file %s (root: %s)", file_model.name, root_file
743 )
745 original_relations_count = len(file_model.include_relations)
746 filtered_relations: List[IncludeRelation] = []
748 for relation in file_model.include_relations:
749 if self._matches_any_pattern(relation.included_file, patterns):
750 filtered_relations.append(relation)
751 else:
752 self.logger.debug(
753 "Filtered out include relation: %s -> %s (root: %s)",
754 relation.source_file,
755 relation.included_file,
756 root_file,
757 )
759 file_model.include_relations = filtered_relations
761 self.logger.debug(
762 "Include filtering for %s: relations %d->%d (includes preserved)",
763 file_model.name,
764 original_relations_count,
765 len(file_model.include_relations),
766 )
768 # Removed deprecated include filtering wrappers; use _filter_include_relations instead
770 def _matches_any_pattern(self, text: str, patterns: List[Pattern[str]]) -> bool:
771 """Check if text matches any of the given regex patterns"""
772 return any(pattern.search(text) for pattern in patterns)
774 def _matches_pattern(self, text: str, pattern: str) -> bool:
775 """Safe regex match for a single pattern string"""
776 try:
777 return bool(re.search(pattern, text))
778 except re.error as e:
779 self.logger.warning("Invalid regex pattern '%s': %s", pattern, e)
780 return False
782 def _apply_model_transformations(
783 self, model: ProjectModel, transformations: Dict[str, Any]
784 ) -> ProjectModel:
785 """Apply model-level transformations with file selection support"""
786 # Get file selection configuration
787 selected_files = transformations.get("file_selection", [])
789 # Validate that file_selection is a list
790 if not isinstance(selected_files, list):
791 selected_files = []
792 self.logger.warning("Invalid file_selection format, must be a list, defaulting to empty list")
794 # Determine which files to apply transformations to
795 # If selected_files is empty or not specified, apply to all files
796 if not selected_files:
797 target_files = set(model.files.keys())
798 self.logger.debug("No file selection specified, applying to all %d files", len(target_files))
799 else:
800 # Apply only to selected files
801 target_files = set()
802 for pattern in selected_files:
803 for file_path in model.files.keys():
804 if self._matches_pattern(file_path, pattern):
805 target_files.add(file_path)
807 self.logger.debug(
808 "File selection patterns %s matched %d files: %s",
809 selected_files, len(target_files), list(target_files)
810 )
812 self.logger.debug(
813 "Applying transformations to %d files: %s",
814 len(target_files),
815 list(target_files),
816 )
818 # Rename elements
819 if "rename" in transformations:
820 model = self._apply_renaming(model, transformations["rename"], target_files)
822 # Add elements
823 if "add" in transformations:
824 model = self._apply_additions(model, transformations["add"], target_files)
826 # Remove elements
827 if "remove" in transformations:
828 model = self._apply_removals(model, transformations["remove"], target_files)
830 # Clean up type references after typedef removal
831 if "typedef" in transformations["remove"]:
832 self._cleanup_type_references(model, transformations["remove"]["typedef"], target_files)
834 return model
836 def _apply_renaming(
837 self, model: ProjectModel, rename_config: Dict[str, Any], target_files: Set[str]
838 ) -> ProjectModel:
839 """Apply renaming transformations to selected files"""
840 self.logger.debug(
841 "Applying renaming transformations to %d files", len(target_files)
842 )
844 # Apply renaming only to target files
845 for file_path in target_files:
846 if file_path in model.files:
847 file_model = model.files[file_path]
848 self.logger.debug("Applying renaming to file: %s", file_path)
849 self._apply_file_level_renaming(file_model, rename_config)
851 # Apply file renaming (affects model.files keys)
852 if "files" in rename_config:
853 model = self._rename_files(model, rename_config["files"], target_files)
855 return model
857 def _apply_file_level_renaming(
858 self, file_model: FileModel, rename_config: Dict[str, Any]
859 ) -> None:
860 """Apply all renaming operations to a single file"""
861 rename_operations = [
862 ("typedef", self._rename_typedefs),
863 ("functions", self._rename_functions),
864 ("macros", self._rename_macros),
865 ("globals", self._rename_globals),
866 ("includes", self._rename_includes),
867 ("structs", self._rename_structs),
868 ("enums", self._rename_enums),
869 ("unions", self._rename_unions),
870 ]
872 for config_key, rename_method in rename_operations:
873 if config_key in rename_config:
874 rename_method(file_model, rename_config[config_key])
876 def _cleanup_type_references(
877 self, model: ProjectModel, removed_typedef_patterns: List[str], target_files: Set[str]
878 ) -> None:
879 """
880 Clean up type references after typedef removal
882 This method removes type references that point to removed typedefs from:
883 - Function parameters and return types
884 - Global variable types
885 - Struct field types
886 """
887 self.logger.debug("Starting type reference cleanup with patterns: %s, target_files: %s",
888 removed_typedef_patterns, list(target_files))
890 if not removed_typedef_patterns:
891 self.logger.debug("No typedef patterns to clean up")
892 return
894 compiled_patterns = self._compile_patterns(removed_typedef_patterns)
895 if not compiled_patterns:
896 self.logger.debug("No valid compiled patterns")
897 return
899 # Track removed type names for cleanup
900 removed_types = set()
902 # First, collect all removed typedef names from all target files
903 for file_path in target_files:
904 if file_path in model.files:
905 file_model = model.files[file_path]
907 # Check what typedefs would be removed from this file
908 for alias_name in list(file_model.aliases.keys()):
909 if self._matches_any_pattern(alias_name, compiled_patterns):
910 removed_types.add(alias_name)
911 self.logger.debug("Found removed typedef: %s in file %s", alias_name, file_path)
913 self.logger.debug("Total removed types identified: %s", list(removed_types))
915 # Clean up type references across all files since typedefs can be used anywhere
916 cleaned_count = 0
917 for file_path, file_model in model.files.items():
918 file_cleaned = 0
920 # Clean function parameter and return types
921 for func in file_model.functions:
922 # Clean return type
923 if func.return_type and self._contains_removed_type(func.return_type, removed_types):
924 old_type = func.return_type
925 func.return_type = self._remove_type_references(func.return_type, removed_types)
926 if func.return_type != old_type:
927 file_cleaned += 1
928 self.logger.debug(
929 "Cleaned return type '%s' -> '%s' in function %s",
930 old_type, func.return_type, func.name
931 )
933 # Clean parameter types
934 for param in func.parameters:
935 if param.type and self._contains_removed_type(param.type, removed_types):
936 old_type = param.type
937 param.type = self._remove_type_references(param.type, removed_types)
938 if param.type != old_type:
939 file_cleaned += 1
940 self.logger.debug(
941 "Cleaned parameter type '%s' -> '%s' for parameter %s",
942 old_type, param.type, param.name
943 )
945 # Clean global variable types
946 for global_var in file_model.globals:
947 if global_var.type and self._contains_removed_type(global_var.type, removed_types):
948 old_type = global_var.type
949 global_var.type = self._remove_type_references(global_var.type, removed_types)
950 if global_var.type != old_type:
951 file_cleaned += 1
952 self.logger.debug(
953 "Cleaned global variable type '%s' -> '%s' for %s",
954 old_type, global_var.type, global_var.name
955 )
957 # Clean struct field types
958 for struct in file_model.structs.values():
959 for field in struct.fields:
960 if field.type and self._contains_removed_type(field.type, removed_types):
961 old_type = field.type
962 field.type = self._remove_type_references(field.type, removed_types)
963 if field.type != old_type:
964 file_cleaned += 1
965 self.logger.debug(
966 "Cleaned struct field type '%s' -> '%s' for %s.%s",
967 old_type, field.type, struct.name, field.name
968 )
970 cleaned_count += file_cleaned
972 if cleaned_count > 0:
973 self.logger.info(
974 "Cleaned %d type references to removed typedefs: %s",
975 cleaned_count, list(removed_types)
976 )
978 def _contains_removed_type(self, type_str: str, removed_types: Set[str]) -> bool:
979 """Check if a type string contains any of the removed types"""
980 if not type_str or not removed_types:
981 return False
983 # Check for removed type names in the type string
984 # This handles cases like "old_point_t *", "const old_config_t", etc.
985 for removed_type in removed_types:
986 if removed_type in type_str:
987 return True
988 return False
990 def _remove_type_references(self, type_str: str, removed_types: Set[str]) -> str:
991 """Remove references to removed types from a type string"""
992 if not type_str or not removed_types:
993 return type_str
995 cleaned_type = type_str
996 for removed_type in removed_types:
997 if removed_type in cleaned_type:
998 # Replace the removed type with "void" to maintain type safety
999 cleaned_type = cleaned_type.replace(removed_type, "void")
1001 # Clean up any double spaces or other artifacts
1002 cleaned_type = " ".join(cleaned_type.split())
1003 return cleaned_type
1005 def _cleanup_type_references_by_names(
1006 self, model: ProjectModel, removed_typedef_names: Set[str]
1007 ) -> None:
1008 """
1009 Clean up type references using pre-collected typedef names
1011 This method removes type references that point to removed typedefs from:
1012 - Function parameters and return types
1013 - Global variable types
1014 - Struct field types
1015 """
1016 if not removed_typedef_names:
1017 self.logger.debug("No removed typedef names provided")
1018 return
1020 self.logger.debug("Cleaning type references for removed typedefs: %s", list(removed_typedef_names))
1022 # Clean up type references across all files since typedefs can be used anywhere
1023 cleaned_count = 0
1024 for file_path, file_model in model.files.items():
1025 file_cleaned = 0
1027 # Clean function parameter and return types
1028 for func in file_model.functions:
1029 # Clean return type
1030 if func.return_type and self._contains_removed_type(func.return_type, removed_typedef_names):
1031 old_type = func.return_type
1032 func.return_type = self._remove_type_references(func.return_type, removed_typedef_names)
1033 if func.return_type != old_type:
1034 file_cleaned += 1
1035 self.logger.debug(
1036 "Cleaned return type '%s' -> '%s' in function %s",
1037 old_type, func.return_type, func.name
1038 )
1040 # Clean parameter types
1041 for param in func.parameters:
1042 if param.type and self._contains_removed_type(param.type, removed_typedef_names):
1043 old_type = param.type
1044 param.type = self._remove_type_references(param.type, removed_typedef_names)
1045 if param.type != old_type:
1046 file_cleaned += 1
1047 self.logger.debug(
1048 "Cleaned parameter type '%s' -> '%s' for parameter %s",
1049 old_type, param.type, param.name
1050 )
1052 # Clean global variable types
1053 for global_var in file_model.globals:
1054 if global_var.type and self._contains_removed_type(global_var.type, removed_typedef_names):
1055 old_type = global_var.type
1056 global_var.type = self._remove_type_references(global_var.type, removed_typedef_names)
1057 if global_var.type != old_type:
1058 file_cleaned += 1
1059 self.logger.debug(
1060 "Cleaned global variable type '%s' -> '%s' for %s",
1061 old_type, global_var.type, global_var.name
1062 )
1064 # Clean struct field types
1065 for struct in file_model.structs.values():
1066 for field in struct.fields:
1067 if field.type and self._contains_removed_type(field.type, removed_typedef_names):
1068 old_type = field.type
1069 field.type = self._remove_type_references(field.type, removed_typedef_names)
1070 if field.type != old_type:
1071 file_cleaned += 1
1072 self.logger.debug(
1073 "Cleaned struct field type '%s' -> '%s' for %s.%s",
1074 old_type, field.type, struct.name, field.name
1075 )
1077 cleaned_count += file_cleaned
1078 if file_cleaned > 0:
1079 self.logger.debug("Cleaned %d type references in file %s", file_cleaned, file_path)
1081 if cleaned_count > 0:
1082 self.logger.info(
1083 "Cleaned %d type references to removed typedefs: %s",
1084 cleaned_count, list(removed_typedef_names)
1085 )
1086 else:
1087 self.logger.debug("No type references found to clean up")
1089 def _update_type_references_for_renames(self, file_model: FileModel, typedef_renames: Dict[str, str]) -> None:
1090 """Update all type references when typedefs are renamed"""
1091 updated_count = 0
1093 # Update function return types and parameter types
1094 for func in file_model.functions:
1095 # Update return type
1096 if func.return_type:
1097 old_type = func.return_type
1098 new_type = self._update_type_string_for_renames(func.return_type, typedef_renames)
1099 if new_type != old_type:
1100 func.return_type = new_type
1101 updated_count += 1
1102 self.logger.debug(
1103 "Updated return type '%s' -> '%s' in function %s",
1104 old_type, new_type, func.name
1105 )
1107 # Update parameter types
1108 for param in func.parameters:
1109 if param.type:
1110 old_type = param.type
1111 new_type = self._update_type_string_for_renames(param.type, typedef_renames)
1112 if new_type != old_type:
1113 param.type = new_type
1114 updated_count += 1
1115 self.logger.debug(
1116 "Updated parameter type '%s' -> '%s' for parameter %s in function %s",
1117 old_type, new_type, param.name, func.name
1118 )
1120 # Update global variable types
1121 for global_var in file_model.globals:
1122 if global_var.type:
1123 old_type = global_var.type
1124 new_type = self._update_type_string_for_renames(global_var.type, typedef_renames)
1125 if new_type != old_type:
1126 global_var.type = new_type
1127 updated_count += 1
1128 self.logger.debug(
1129 "Updated global variable type '%s' -> '%s' for %s",
1130 old_type, new_type, global_var.name
1131 )
1133 # Update struct field types
1134 for struct in file_model.structs.values():
1135 for field in struct.fields:
1136 if field.type:
1137 old_type = field.type
1138 new_type = self._update_type_string_for_renames(field.type, typedef_renames)
1139 if new_type != old_type:
1140 field.type = new_type
1141 updated_count += 1
1142 self.logger.debug(
1143 "Updated struct field type '%s' -> '%s' for %s.%s",
1144 old_type, new_type, struct.name, field.name
1145 )
1147 # Update union field types
1148 for union in file_model.unions.values():
1149 for field in union.fields:
1150 if field.type:
1151 old_type = field.type
1152 new_type = self._update_type_string_for_renames(field.type, typedef_renames)
1153 if new_type != old_type:
1154 field.type = new_type
1155 updated_count += 1
1156 self.logger.debug(
1157 "Updated union field type '%s' -> '%s' for %s.%s",
1158 old_type, new_type, union.name, field.name
1159 )
1161 if updated_count > 0:
1162 self.logger.info(
1163 "Updated %d type references for renamed typedefs in %s: %s",
1164 updated_count, file_model.name, typedef_renames
1165 )
1167 def _update_type_string_for_renames(self, type_str: str, typedef_renames: Dict[str, str]) -> str:
1168 """Update a type string by replacing old typedef names with new ones"""
1169 if not type_str or not typedef_renames:
1170 return type_str
1172 updated_type = type_str
1173 for old_name, new_name in typedef_renames.items():
1174 # Use word boundaries to avoid partial matches
1175 # This handles cases like "old_config_t *", "const old_config_t", etc.
1176 pattern = r'\b' + re.escape(old_name) + r'\b'
1177 updated_type = re.sub(pattern, new_name, updated_type)
1179 return updated_type
1181 def _rename_dict_elements(
1182 self,
1183 elements_dict: Dict[str, Any],
1184 patterns_map: Dict[str, str],
1185 create_renamed_element: Callable[[str, Any], Any],
1186 element_type: str,
1187 file_name: str
1188 ) -> Dict[str, Any]:
1189 """Generic method to rename dictionary elements with deduplication"""
1190 original_count = len(elements_dict)
1191 seen_names = set()
1192 deduplicated_elements = {}
1194 for name, element in elements_dict.items():
1195 # Apply rename patterns
1196 new_name = self._apply_rename_patterns(name, patterns_map)
1198 # Check for duplicates
1199 if new_name in seen_names:
1200 self.logger.debug(
1201 "Deduplicating %s: removing duplicate '%s' (renamed from '%s')",
1202 element_type, new_name, name
1203 )
1204 continue
1206 seen_names.add(new_name)
1208 # Create updated element with new name
1209 updated_element = create_renamed_element(new_name, element)
1210 deduplicated_elements[new_name] = updated_element
1212 removed_count = original_count - len(deduplicated_elements)
1213 if removed_count > 0:
1214 self.logger.info(
1215 "Renamed %ss in %s, removed %d duplicates", element_type, file_name, removed_count
1216 )
1218 return deduplicated_elements
1220 def _rename_list_elements(
1221 self,
1222 elements_list: List[Any],
1223 patterns_map: Dict[str, str],
1224 get_element_name: Callable[[Any], str],
1225 create_renamed_element: Callable[[str, Any], Any],
1226 element_type: str,
1227 file_name: str
1228 ) -> List[Any]:
1229 """Generic method to rename list elements with deduplication"""
1230 original_count = len(elements_list)
1231 seen_names = set()
1232 deduplicated_elements = []
1234 for element in elements_list:
1235 name = get_element_name(element)
1236 # Apply rename patterns
1237 new_name = self._apply_rename_patterns(name, patterns_map)
1239 # Check for duplicates
1240 if new_name in seen_names:
1241 self.logger.debug(
1242 "Deduplicating %s: removing duplicate '%s' (renamed from '%s')",
1243 element_type, new_name, name
1244 )
1245 continue
1247 seen_names.add(new_name)
1249 # Create updated element with new name
1250 updated_element = create_renamed_element(new_name, element)
1251 deduplicated_elements.append(updated_element)
1253 removed_count = original_count - len(deduplicated_elements)
1254 if removed_count > 0:
1255 self.logger.info(
1256 "Renamed %ss in %s, removed %d duplicates", element_type, file_name, removed_count
1257 )
1259 return deduplicated_elements
1261 def _apply_rename_patterns(self, original_name: str, patterns_map: Dict[str, str]) -> str:
1262 """
1263 Apply rename patterns to an element name
1265 Args:
1266 original_name: Original element name
1267 patterns_map: Dict mapping regex patterns to replacement strings
1269 Returns:
1270 Renamed element name (or original if no patterns match)
1271 """
1272 for pattern, replacement in patterns_map.items():
1273 try:
1274 # Apply regex substitution
1275 new_name = re.sub(pattern, replacement, original_name)
1276 if new_name != original_name:
1277 self.logger.debug(
1278 "Renamed '%s' to '%s' using pattern '%s'",
1279 original_name, new_name, pattern
1280 )
1281 return new_name
1282 except re.error as e:
1283 self.logger.warning(
1284 "Invalid regex pattern '%s': %s", pattern, e
1285 )
1286 continue
1288 return original_name
1290 def _rename_typedefs(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None:
1291 """Rename typedefs with deduplication"""
1292 if not patterns_map:
1293 return
1295 # Track old to new name mappings for type reference updates
1296 typedef_renames = {}
1298 def create_renamed_alias(name: str, alias: Alias) -> Alias:
1299 return Alias(name, alias.original_type, alias.uses)
1301 # Capture renames before applying them
1302 for old_name in file_model.aliases:
1303 new_name = self._apply_rename_patterns(old_name, patterns_map)
1304 if new_name != old_name:
1305 typedef_renames[old_name] = new_name
1307 file_model.aliases = self._rename_dict_elements(
1308 file_model.aliases, patterns_map, create_renamed_alias, "typedef", file_model.name
1309 )
1311 # Update type references throughout the file
1312 if typedef_renames:
1313 self._update_type_references_for_renames(file_model, typedef_renames)
1315 def _rename_functions(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None:
1316 """Rename functions with deduplication"""
1317 if not patterns_map:
1318 return
1320 def get_function_name(func: Function) -> str:
1321 return func.name
1323 def create_renamed_function(name: str, func: Function) -> Function:
1324 return Function(
1325 name, func.return_type, func.parameters, func.is_static, func.is_declaration
1326 )
1328 file_model.functions = self._rename_list_elements(
1329 file_model.functions, patterns_map, get_function_name,
1330 create_renamed_function, "function", file_model.name
1331 )
1333 def _rename_macros(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None:
1334 """Rename macros with deduplication"""
1335 if not patterns_map:
1336 return
1338 def get_macro_name(macro: str) -> str:
1339 # Extract macro name from full macro definition
1340 import re
1341 if macro.startswith("#define "):
1342 # Extract macro name using regex
1343 match = re.search(r"#define\s+([A-Za-z_][A-Za-z0-9_]*)", macro)
1344 if match:
1345 return match.group(1)
1346 return macro
1348 def create_renamed_macro(name: str, macro: str) -> str:
1349 # Replace the macro name in the full macro definition
1350 import re
1351 if macro.startswith("#define "):
1352 # Use regex to replace the macro name while preserving parameters and value
1353 # Pattern matches: #define MACRO_NAME or #define MACRO_NAME(params)
1354 pattern = r"(#define\s+)([A-Za-z_][A-Za-z0-9_]*)(\s*\([^)]*\))?(.*)?"
1355 match = re.match(pattern, macro)
1356 if match:
1357 define_part = match.group(1) # "#define "
1358 old_name = match.group(2) # "OLD_NAME"
1359 params = match.group(3) or "" # "(params)" or ""
1360 rest = match.group(4) or "" # " value" or ""
1361 return f"{define_part}{name}{params}{rest}"
1362 return macro
1364 file_model.macros = self._rename_list_elements(
1365 file_model.macros, patterns_map, get_macro_name,
1366 create_renamed_macro, "macro", file_model.name
1367 )
1369 def _rename_globals(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None:
1370 """Rename global variables with deduplication"""
1371 if not patterns_map:
1372 return
1374 def get_global_name(global_var: Field) -> str:
1375 return global_var.name
1377 def create_renamed_global(name: str, global_var: Field) -> Field:
1378 return Field(name, global_var.type)
1380 file_model.globals = self._rename_list_elements(
1381 file_model.globals, patterns_map, get_global_name,
1382 create_renamed_global, "global", file_model.name
1383 )
1385 def _rename_includes(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None:
1386 """Rename includes with deduplication"""
1387 if not patterns_map:
1388 return
1390 # Rename includes using set-based deduplication
1391 file_model.includes = self._rename_set_elements(
1392 file_model.includes, patterns_map, "include", file_model.name
1393 )
1395 # Also update include_relations with new names
1396 file_model.include_relations = self._rename_include_relations(
1397 file_model.include_relations, patterns_map
1398 )
1400 def _rename_set_elements(
1401 self,
1402 elements_set: Set[str],
1403 patterns_map: Dict[str, str],
1404 element_type: str,
1405 file_name: str
1406 ) -> Set[str]:
1407 """Generic method to rename set elements with deduplication"""
1408 original_count = len(elements_set)
1409 seen_names = set()
1410 deduplicated_elements = set()
1412 for element in elements_set:
1413 # Apply rename patterns
1414 new_name = self._apply_rename_patterns(element, patterns_map)
1416 # Check for duplicates
1417 if new_name in seen_names:
1418 self.logger.debug(
1419 "Deduplicating %s: removing duplicate '%s' (renamed from '%s')",
1420 element_type, new_name, element
1421 )
1422 continue
1424 seen_names.add(new_name)
1425 deduplicated_elements.add(new_name)
1427 removed_count = original_count - len(deduplicated_elements)
1428 if removed_count > 0:
1429 self.logger.info(
1430 "Renamed %ss in %s, removed %d duplicates", element_type, file_name, removed_count
1431 )
1433 return deduplicated_elements
1435 def _rename_include_relations(
1436 self, relations: List[IncludeRelation], patterns_map: Dict[str, str]
1437 ) -> List[IncludeRelation]:
1438 """Rename include relations with pattern mapping"""
1439 updated_relations = []
1440 for relation in relations:
1441 new_included_file = self._apply_rename_patterns(relation.included_file, patterns_map)
1442 updated_relation = IncludeRelation(
1443 relation.source_file,
1444 new_included_file,
1445 relation.depth
1446 )
1447 updated_relations.append(updated_relation)
1448 return updated_relations
1450 def _rename_structs(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None:
1451 """Rename structs with deduplication"""
1452 if not patterns_map:
1453 return
1455 def create_renamed_struct(name: str, struct: Struct) -> Struct:
1456 return Struct(name, struct.fields)
1458 file_model.structs = self._rename_dict_elements(
1459 file_model.structs, patterns_map, create_renamed_struct, "struct", file_model.name
1460 )
1462 def _rename_enums(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None:
1463 """Rename enums with deduplication"""
1464 if not patterns_map:
1465 return
1467 def create_renamed_enum(name: str, enum: Enum) -> Enum:
1468 return Enum(name, enum.values)
1470 file_model.enums = self._rename_dict_elements(
1471 file_model.enums, patterns_map, create_renamed_enum, "enum", file_model.name
1472 )
1474 def _rename_unions(self, file_model: FileModel, patterns_map: Dict[str, str]) -> None:
1475 """Rename unions with deduplication"""
1476 if not patterns_map:
1477 return
1479 def create_renamed_union(name: str, union: Union) -> Union:
1480 return Union(name, union.fields)
1482 file_model.unions = self._rename_dict_elements(
1483 file_model.unions, patterns_map, create_renamed_union, "union", file_model.name
1484 )
1486 def _rename_files(self, model: ProjectModel, patterns_map: Dict[str, str], target_files: Set[str]) -> ProjectModel:
1487 """Rename files and update model.files keys"""
1488 if not patterns_map:
1489 return model
1491 updated_files = {}
1492 # Track mapping of old->new filenames (basenames)
1493 file_rename_map: Dict[str, str] = {}
1495 for file_path, file_model in model.files.items():
1496 # Only rename files in target_files
1497 if file_path in target_files:
1498 new_file_path = self._apply_rename_patterns(file_path, patterns_map)
1500 if new_file_path != file_path:
1501 # Update file_model.name to match new path
1502 file_model.name = new_file_path
1503 file_rename_map[Path(file_path).name] = Path(new_file_path).name
1504 self.logger.debug("Renamed file: %s -> %s", file_path, new_file_path)
1506 updated_files[new_file_path] = file_model
1507 else:
1508 # Keep original file unchanged
1509 updated_files[file_path] = file_model
1511 model.files = updated_files
1513 # Propagate file renames to includes and include_relations across all files
1514 if file_rename_map:
1515 for fm in model.files.values():
1516 # Update includes set
1517 if fm.includes:
1518 new_includes: Set[str] = set()
1519 for inc in fm.includes:
1520 # Apply explicit rename map first; fallback to patterns map
1521 inc_new = file_rename_map.get(inc, self._apply_rename_patterns(inc, patterns_map))
1522 new_includes.add(inc_new)
1523 fm.includes = new_includes
1525 # Update include_relations (both ends)
1526 if fm.include_relations:
1527 for rel in fm.include_relations:
1528 src_new = file_rename_map.get(rel.source_file, self._apply_rename_patterns(rel.source_file, patterns_map))
1529 inc_new = file_rename_map.get(rel.included_file, self._apply_rename_patterns(rel.included_file, patterns_map))
1530 rel.source_file = src_new
1531 rel.included_file = inc_new
1533 return model
1535 def _apply_additions(
1536 self, model: ProjectModel, add_config: Dict[str, Any], target_files: Set[str]
1537 ) -> ProjectModel:
1538 """Apply addition transformations to selected files"""
1539 self.logger.debug(
1540 "Applying addition transformations to %d files", len(target_files)
1541 )
1543 # Apply additions only to target files
1544 for file_path in target_files:
1545 if file_path in model.files:
1546 # Apply addition logic here
1547 # This would handle adding new elements like structs, enums,
1548 # functions, etc.
1549 self.logger.debug("Applying additions to file: %s", file_path)
1551 return model
1553 def _apply_removals(
1554 self, model: ProjectModel, remove_config: Dict[str, Any], target_files: Set[str]
1555 ) -> ProjectModel:
1556 """Apply removal transformations to selected files"""
1557 self.logger.debug(
1558 "Applying removal transformations to %d files", len(target_files)
1559 )
1561 # Apply removals only to target files
1562 for file_path in target_files:
1563 if file_path in model.files:
1564 file_model = model.files[file_path]
1565 self.logger.debug("Applying removals to file: %s", file_path)
1566 self._apply_file_level_removals(file_model, remove_config)
1568 return model
1570 def _apply_file_level_removals(
1571 self, file_model: FileModel, remove_config: Dict[str, Any]
1572 ) -> None:
1573 """Apply all removal operations to a single file"""
1574 removal_operations = [
1575 ("typedef", self._remove_typedefs),
1576 ("functions", self._remove_functions),
1577 ("macros", self._remove_macros),
1578 ("globals", self._remove_globals),
1579 ("includes", self._remove_includes),
1580 ("structs", self._remove_structs),
1581 ("enums", self._remove_enums),
1582 ("unions", self._remove_unions),
1583 ]
1585 for config_key, removal_method in removal_operations:
1586 if config_key in remove_config:
1587 removal_method(file_model, remove_config[config_key])
1589 def _remove_dict_elements(
1590 self,
1591 elements_dict: Dict[str, Any],
1592 patterns: List[str],
1593 element_type: str,
1594 file_name: str
1595 ) -> Dict[str, Any]:
1596 """Generic method to remove dictionary elements matching patterns"""
1597 if not patterns:
1598 return elements_dict
1600 original_count = len(elements_dict)
1601 compiled_patterns = self._compile_patterns(patterns)
1603 # Filter out elements that match any pattern
1604 filtered_elements = {}
1605 for name, element in elements_dict.items():
1606 if not self._matches_any_pattern(name, compiled_patterns):
1607 filtered_elements[name] = element
1608 else:
1609 self.logger.debug("Removed %s: %s", element_type, name)
1611 removed_count = original_count - len(filtered_elements)
1612 if removed_count > 0:
1613 self.logger.info(
1614 "Removed %d %ss from %s", removed_count, element_type, file_name
1615 )
1617 return filtered_elements
1619 def _remove_list_elements(
1620 self,
1621 elements_list: List[Any],
1622 patterns: List[str],
1623 get_element_name: Callable[[Any], str],
1624 element_type: str,
1625 file_name: str
1626 ) -> List[Any]:
1627 """Generic method to remove list elements matching patterns"""
1628 if not patterns:
1629 return elements_list
1631 original_count = len(elements_list)
1632 compiled_patterns = self._compile_patterns(patterns)
1634 # Filter out elements that match any pattern
1635 filtered_elements = []
1636 for element in elements_list:
1637 name = get_element_name(element)
1638 if not self._matches_any_pattern(name, compiled_patterns):
1639 filtered_elements.append(element)
1640 else:
1641 self.logger.debug("Removed %s: %s", element_type, name)
1643 removed_count = original_count - len(filtered_elements)
1644 if removed_count > 0:
1645 self.logger.info(
1646 "Removed %d %ss from %s", removed_count, element_type, file_name
1647 )
1649 return filtered_elements
1651 def _remove_typedefs(self, file_model: FileModel, patterns: List[str]) -> None:
1652 """Remove typedefs matching regex patterns"""
1653 file_model.aliases = self._remove_dict_elements(
1654 file_model.aliases, patterns, "typedef", file_model.name
1655 )
1657 def _remove_functions(self, file_model: FileModel, patterns: List[str]) -> None:
1658 """Remove functions matching regex patterns"""
1659 def get_function_name(func: Function) -> str:
1660 return func.name
1662 file_model.functions = self._remove_list_elements(
1663 file_model.functions, patterns, get_function_name, "function", file_model.name
1664 )
1666 def _remove_macros(self, file_model: FileModel, patterns: List[str]) -> None:
1667 """Remove macros matching regex patterns"""
1668 def get_macro_name(macro: str) -> str:
1669 # Extract macro name from full macro definition
1670 import re
1671 if macro.startswith("#define "):
1672 # Extract macro name using regex
1673 match = re.search(r"#define\s+([A-Za-z_][A-Za-z0-9_]*)", macro)
1674 if match:
1675 return match.group(1)
1676 return macro
1678 file_model.macros = self._remove_list_elements(
1679 file_model.macros, patterns, get_macro_name, "macro", file_model.name
1680 )
1682 def _remove_globals(self, file_model: FileModel, patterns: List[str]) -> None:
1683 """Remove global variables matching regex patterns"""
1684 def get_global_name(global_var: Field) -> str:
1685 return global_var.name
1687 file_model.globals = self._remove_list_elements(
1688 file_model.globals, patterns, get_global_name, "global variable", file_model.name
1689 )
1691 def _remove_includes(self, file_model: FileModel, patterns: List[str]) -> None:
1692 """Remove includes matching regex patterns"""
1693 if not patterns:
1694 return
1696 original_count = len(file_model.includes)
1697 compiled_patterns = self._compile_patterns(patterns)
1699 # Filter out includes that match any pattern
1700 filtered_includes = set()
1701 for include in file_model.includes:
1702 if not self._matches_any_pattern(include, compiled_patterns):
1703 filtered_includes.add(include)
1704 else:
1705 self.logger.debug("Removed include: %s", include)
1707 file_model.includes = filtered_includes
1708 removed_count = original_count - len(file_model.includes)
1710 # Also remove matching include_relations
1711 if removed_count > 0:
1712 self._remove_matching_include_relations(file_model, compiled_patterns, removed_count)
1714 def _remove_matching_include_relations(
1715 self, file_model: FileModel, compiled_patterns: List[Pattern[str]], removed_includes_count: int
1716 ) -> None:
1717 """Remove include relations that match the removed includes"""
1718 original_relations_count = len(file_model.include_relations)
1719 filtered_relations = []
1721 for relation in file_model.include_relations:
1722 if not self._matches_any_pattern(relation.included_file, compiled_patterns):
1723 filtered_relations.append(relation)
1724 else:
1725 self.logger.debug("Removed include relation: %s -> %s",
1726 relation.source_file, relation.included_file)
1728 file_model.include_relations = filtered_relations
1729 removed_relations_count = original_relations_count - len(file_model.include_relations)
1731 self.logger.info(
1732 "Removed %d includes and %d include relations from %s",
1733 removed_includes_count, removed_relations_count, file_model.name
1734 )
1736 def _remove_structs(self, file_model: FileModel, patterns: List[str]) -> None:
1737 """Remove structs matching regex patterns"""
1738 file_model.structs = self._remove_dict_elements(
1739 file_model.structs, patterns, "struct", file_model.name
1740 )
1742 def _remove_enums(self, file_model: FileModel, patterns: List[str]) -> None:
1743 """Remove enums matching regex patterns"""
1744 file_model.enums = self._remove_dict_elements(
1745 file_model.enums, patterns, "enum", file_model.name
1746 )
1748 def _remove_unions(self, file_model: FileModel, patterns: List[str]) -> None:
1749 """Remove unions matching regex patterns"""
1750 file_model.unions = self._remove_dict_elements(
1751 file_model.unions, patterns, "union", file_model.name
1752 )
1754 def _should_include_file(
1755 self,
1756 file_path: str,
1757 include_patterns: List[Pattern[str]],
1758 exclude_patterns: List[Pattern[str]],
1759 ) -> bool:
1760 """Check if a file should be included based on filters"""
1761 # Check include patterns
1762 if include_patterns:
1763 if not any(pattern.search(file_path) for pattern in include_patterns):
1764 return False
1766 # Check exclude patterns
1767 if exclude_patterns:
1768 if any(pattern.search(file_path) for pattern in exclude_patterns):
1769 return False
1771 return True
1773 def _compile_patterns(self, patterns: List[str]) -> List[Pattern[str]]:
1774 """Compile regex patterns with error handling"""
1775 compiled_patterns: List[Pattern[str]] = []
1776 for pattern in patterns:
1777 try:
1778 compiled_patterns.append(re.compile(pattern))
1779 except re.error as e:
1780 self.logger.warning("Invalid regex pattern '%s': %s", pattern, e)
1781 return compiled_patterns
1783 def _filter_dict(self, items: Dict[str, Any], filters: Dict[str, Any]) -> Dict[str, Any]:
1784 """Filter a dictionary based on include/exclude patterns"""
1785 include_patterns = self._compile_patterns(filters.get("include", []))
1786 exclude_patterns = self._compile_patterns(filters.get("exclude", []))
1788 filtered = {}
1789 for name, item in items.items():
1790 # Check include patterns
1791 if include_patterns:
1792 if not any(pattern.search(name) for pattern in include_patterns):
1793 continue
1795 # Check exclude patterns
1796 if exclude_patterns:
1797 if any(pattern.search(name) for pattern in exclude_patterns):
1798 continue
1800 filtered[name] = item
1802 return filtered
1804 def _filter_list(self, items: List[Any], filters: Dict[str, Any], key: Optional[Callable[[Any], str]] = None) -> List[Any]:
1805 """Filter a list based on include/exclude patterns"""
1806 include_patterns = self._compile_patterns(filters.get("include", []))
1807 exclude_patterns = self._compile_patterns(filters.get("exclude", []))
1809 filtered = []
1810 for item in items:
1811 item_name = key(item) if key else str(item)
1813 # Check include patterns
1814 if include_patterns:
1815 if not any(pattern.search(item_name) for pattern in include_patterns):
1816 continue
1818 # Check exclude patterns
1819 if exclude_patterns:
1820 if any(pattern.search(item_name) for pattern in exclude_patterns):
1821 continue
1823 filtered.append(item)
1825 return filtered
1827 def _dict_to_file_model(self, data: Dict[str, Any]) -> FileModel:
1828 """Convert dictionary back to FileModel"""
1830 # Convert structs
1831 structs = {}
1832 for name, struct_data in data.get("structs", {}).items():
1833 fields = [
1834 Field(f["name"], f["type"]) for f in struct_data.get("fields", [])
1835 ]
1836 structs[name] = Struct(
1837 name,
1838 fields,
1839 struct_data.get("methods", []),
1840 struct_data.get("tag_name", ""),
1841 struct_data.get("uses", []),
1842 )
1844 # Convert enums
1845 enums = {}
1846 for name, enum_data in data.get("enums", {}).items():
1847 values = []
1848 for value_data in enum_data.get("values", []):
1849 if isinstance(value_data, dict):
1850 values.append(
1851 EnumValue(value_data["name"], value_data.get("value"))
1852 )
1853 else:
1854 values.append(EnumValue(value_data))
1855 enums[name] = Enum(name, values)
1857 # Convert unions
1858 unions = {}
1859 for name, union_data in data.get("unions", {}).items():
1860 fields = [Field(f["name"], f["type"]) for f in union_data.get("fields", [])]
1861 unions[name] = Union(
1862 name, fields, union_data.get("tag_name", ""), union_data.get("uses", [])
1863 )
1865 # Convert aliases
1866 aliases = {}
1867 for name, alias_data in data.get("aliases", {}).items():
1868 if isinstance(alias_data, dict):
1869 aliases[name] = Alias(
1870 alias_data.get("name", name),
1871 alias_data.get("original_type", ""),
1872 alias_data.get("uses", []),
1873 )
1874 else:
1875 # Handle legacy format where aliases was Dict[str, str]
1876 aliases[name] = Alias(name, alias_data, [])
1878 # Convert functions
1879 functions = []
1880 for func_data in data.get("functions", []):
1881 parameters = [
1882 Field(p["name"], p["type"]) for p in func_data.get("parameters", [])
1883 ]
1884 functions.append(
1885 Function(
1886 func_data["name"],
1887 func_data["return_type"],
1888 parameters,
1889 is_static=func_data.get("is_static", False),
1890 is_declaration=func_data.get("is_declaration", False),
1891 )
1892 )
1894 # Convert globals
1895 globals_list = []
1896 for global_data in data.get("globals", []):
1897 globals_list.append(Field(global_data["name"], global_data["type"]))
1899 return FileModel(
1900 file_path=data["file_path"],
1901 structs=structs,
1902 enums=enums,
1903 unions=unions,
1904 functions=functions,
1905 globals=globals_list,
1906 includes=set(data.get("includes", [])),
1907 macros=data.get("macros", []),
1908 aliases=aliases,
1909 anonymous_relationships=data.get("anonymous_relationships", {}),
1910 )
1912 def _save_model(self, model: ProjectModel, output_file: str) -> None:
1913 """Save model to JSON file"""
1914 try:
1915 model.save(output_file)
1916 self.logger.debug("Model saved to: %s", output_file)
1917 except Exception as e:
1918 raise ValueError(f"Failed to save model to {output_file}: {e}") from e