Coverage for src/c2puml/core/preprocessor.py: 68%
294 statements
« prev ^ index » next coverage.py v7.10.4, created at 2025-08-20 03:53 +0000
« prev ^ index » next coverage.py v7.10.4, created at 2025-08-20 03:53 +0000
1#!/usr/bin/env python3
2"""
3Preprocessor module for C to PlantUML converter.
4Handles #if, #elif, #else, #endif directives and conditional compilation.
5"""
7import logging
8import re
9from dataclasses import dataclass
10from enum import Enum
11from typing import Dict, List, Optional, Set, Tuple
13from .parser_tokenizer import Token, TokenType
16class PreprocessorDirective(Enum):
17 """Types of preprocessor directives."""
19 IF = "if"
20 ELIF = "elif"
21 ELSE = "else"
22 ENDIF = "endif"
23 IFDEF = "ifdef"
24 IFNDEF = "ifndef"
25 DEFINE = "define"
26 UNDEF = "undef"
29@dataclass
30class PreprocessorBlock:
31 """Represents a preprocessor conditional block."""
33 directive: PreprocessorDirective
34 condition: str
35 start_token: int
36 end_token: int
37 is_active: bool
38 children: List["PreprocessorBlock"]
39 parent: Optional["PreprocessorBlock"] = None
42class PreprocessorEvaluator:
43 """Evaluates preprocessor conditions and manages conditional compilation."""
45 def __init__(self):
46 self.logger = logging.getLogger(__name__)
47 self.defined_macros: Set[str] = set()
48 self.macro_values: Dict[str, str] = {}
49 self.blocks: List[PreprocessorBlock] = []
50 self.current_block_stack: List[PreprocessorBlock] = []
52 def add_define(self, name: str, value: str = ""):
53 """Add a defined macro."""
54 self.defined_macros.add(name)
55 if value:
56 self.macro_values[name] = value
58 def add_undef(self, name: str):
59 """Remove a defined macro."""
60 self.defined_macros.discard(name)
61 self.macro_values.pop(name, None)
63 def is_defined(self, name: str) -> bool:
64 """Check if a macro is defined."""
65 return name in self.defined_macros
67 def get_macro_value(self, name: str) -> str:
68 """Get the value of a defined macro."""
69 return self.macro_values.get(name, "")
71 def evaluate_condition(self, condition: str) -> bool:
72 """Evaluate a preprocessor condition."""
73 if not condition.strip():
74 return True
76 # Handle defined() operator
77 condition = self._expand_defined_operator(condition)
79 # Handle macro expansions
80 condition = self._expand_macros(condition)
82 # Evaluate common patterns
83 return self._evaluate_simple_expression(condition)
85 def _expand_defined_operator(self, condition: str) -> str:
86 """Expand defined() operators in the condition."""
88 def replace_defined(match):
89 macro_name = match.group(1)
90 return "1" if self.is_defined(macro_name) else "0"
92 # Replace defined(macro) with 1 or 0
93 condition = re.sub(
94 r"defined\s*\(\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\)", replace_defined, condition
95 )
96 return condition
98 def _expand_macros(self, condition: str) -> str:
99 """Expand macros in the condition."""
100 # Macro expansion for common cases
101 for macro_name, macro_value in self.macro_values.items():
102 # Replace macro name with its value
103 pattern = r"\b" + re.escape(macro_name) + r"\b"
104 condition = re.sub(pattern, macro_value, condition)
105 return condition
107 def _evaluate_simple_expression(self, condition: str) -> bool:
108 """Evaluate simple expressions like comparisons and logical operators."""
109 try:
110 # Handle common comparison operators
111 condition = condition.strip()
113 # Handle simple boolean values
114 if condition.lower() in ["1", "true", "yes"]:
115 return True
116 if condition.lower() in ["0", "false", "no"]:
117 return False
119 # Handle simple comparisons
120 if "==" in condition:
121 left, right = condition.split("==", 1)
122 left_val = left.strip()
123 right_val = right.strip()
125 # If both sides are known macros, compare their values
126 if left_val in self.defined_macros and right_val in self.defined_macros:
127 return self.get_macro_value(left_val) == self.get_macro_value(
128 right_val
129 )
130 # If one side is a known macro, compare with the other side
131 elif left_val in self.defined_macros:
132 return self.get_macro_value(left_val) == right_val
133 elif right_val in self.defined_macros:
134 return left_val == self.get_macro_value(right_val)
135 # If neither side is a known macro, both are undefined, so they're equal
136 else:
137 return True # Both undefined macros are considered equal
139 if "!=" in condition:
140 left, right = condition.split("!=", 1)
141 left_val = left.strip()
142 right_val = right.strip()
144 # If both sides are known macros, compare their values
145 if left_val in self.defined_macros and right_val in self.defined_macros:
146 return self.get_macro_value(left_val) != self.get_macro_value(
147 right_val
148 )
149 # If one side is a known macro, compare with the other side
150 elif left_val in self.defined_macros:
151 return self.get_macro_value(left_val) != right_val
152 elif right_val in self.defined_macros:
153 return left_val != self.get_macro_value(right_val)
154 # If neither side is a known macro, do string comparison
155 else:
156 return left_val != right_val
158 if ">" in condition:
159 left, right = condition.split(">", 1)
160 try:
161 left_val = self._evaluate_operand(left.strip())
162 right_val = self._evaluate_operand(right.strip())
163 return float(left_val) > float(right_val)
164 except ValueError:
165 return False
166 if "<" in condition:
167 left, right = condition.split("<", 1)
168 try:
169 left_val = self._evaluate_operand(left.strip())
170 right_val = self._evaluate_operand(right.strip())
171 return float(left_val) < float(right_val)
172 except ValueError:
173 return False
174 if ">=" in condition:
175 left, right = condition.split(">=", 1)
176 try:
177 left_val = self._evaluate_operand(left.strip())
178 right_val = self._evaluate_operand(right.strip())
179 return float(left_val) >= float(right_val)
180 except ValueError:
181 return False
182 if "<=" in condition:
183 left, right = condition.split("<=", 1)
184 try:
185 left_val = self._evaluate_operand(left.strip())
186 right_val = self._evaluate_operand(right.strip())
187 return float(left_val) <= float(right_val)
188 except ValueError:
189 return False
191 # Handle logical operators
192 if "&&" in condition:
193 parts = condition.split("&&")
194 return all(
195 self._evaluate_simple_expression(part.strip()) for part in parts
196 )
197 if "||" in condition:
198 parts = condition.split("||")
199 return any(
200 self._evaluate_simple_expression(part.strip()) for part in parts
201 )
202 if "!" in condition:
203 # Negation
204 negated = condition.replace("!", "").strip()
205 return not self._evaluate_simple_expression(negated)
207 # If it's just a macro name, check if it's defined
208 if condition in self.defined_macros:
209 return True
211 # Try to evaluate as a number
212 try:
213 return bool(int(condition))
214 except ValueError:
215 pass
217 # Default to True for unknown conditions (backward compatibility)
218 # This ensures existing tests continue to work
219 return True
221 except ValueError as e:
222 self.logger.warning(
223 "Error evaluating preprocessor condition '%s': %s", condition, e
224 )
225 # Default to True for unknown conditions (backward compatibility)
226 return True
228 def _evaluate_operand(self, operand: str) -> str:
229 """Evaluate an operand, expanding macros if they are defined."""
230 operand = operand.strip()
232 # If it's a defined macro, return its value
233 if operand in self.defined_macros:
234 return self.get_macro_value(operand)
236 # Otherwise, return the operand as-is
237 return operand
239 def parse_preprocessor_blocks(self, tokens: List[Token]) -> List[PreprocessorBlock]:
240 """Parse preprocessor blocks from tokens."""
241 blocks = []
242 stack = []
244 i = 0
245 while i < len(tokens):
246 token = tokens[i]
248 if token.type == TokenType.PREPROCESSOR:
249 directive = self._parse_directive(token.value)
251 if (
252 directive == PreprocessorDirective.IF
253 or directive == PreprocessorDirective.IFDEF
254 or directive == PreprocessorDirective.IFNDEF
255 ):
256 # Start new block
257 condition = self._extract_condition(token.value, directive)
258 block = PreprocessorBlock(
259 directive=directive,
260 condition=condition,
261 start_token=i,
262 end_token=-1, # Will be set when we find #endif
263 is_active=self._should_activate_block(
264 directive, condition, stack
265 ),
266 children=[],
267 parent=stack[-1] if stack else None,
268 )
270 if stack:
271 stack[-1].children.append(block)
272 else:
273 blocks.append(block)
275 stack.append(block)
277 elif directive == PreprocessorDirective.ELIF:
278 if stack:
279 # Update current block
280 current_block = stack[-1]
281 condition = self._extract_condition(token.value, directive)
282 current_block.is_active = self._should_activate_block(
283 directive, condition, stack
284 )
286 elif directive == PreprocessorDirective.ELSE:
287 if stack:
288 # Update current block
289 current_block = stack[-1]
290 current_block.is_active = self._should_activate_block(
291 directive, "", stack
292 )
294 elif directive == PreprocessorDirective.ENDIF:
295 if stack:
296 # End current block
297 current_block = stack.pop()
298 current_block.end_token = i
300 elif directive == PreprocessorDirective.DEFINE:
301 # Handle #define
302 macro_name, macro_value = self._parse_define(token.value)
303 self.add_define(macro_name, macro_value)
305 elif directive == PreprocessorDirective.UNDEF:
306 # Handle #undef
307 macro_name = self._parse_undef(token.value)
308 self.add_undef(macro_name)
310 i += 1
312 return blocks
314 def _parse_directive(self, value: str) -> PreprocessorDirective:
315 """Parse the directive type from a preprocessor token."""
316 value = value.strip()
317 if value.startswith("#ifdef"):
318 return PreprocessorDirective.IFDEF
319 elif value.startswith("#ifndef"):
320 return PreprocessorDirective.IFNDEF
321 elif value.startswith("#if"):
322 return PreprocessorDirective.IF
323 elif value.startswith("#elif"):
324 return PreprocessorDirective.ELIF
325 elif value.startswith("#else"):
326 return PreprocessorDirective.ELSE
327 elif value.startswith("#endif"):
328 return PreprocessorDirective.ENDIF
329 elif value.startswith("#define"):
330 return PreprocessorDirective.DEFINE
331 elif value.startswith("#undef"):
332 return PreprocessorDirective.UNDEF
333 else:
334 return PreprocessorDirective.IF # Default
336 def _extract_condition(self, value: str, directive: PreprocessorDirective) -> str:
337 """Extract the condition from a preprocessor directive."""
338 value = value.strip()
340 if directive == PreprocessorDirective.IFDEF:
341 match = re.search(r"#ifdef\s+([a-zA-Z_][a-zA-Z0-9_]*)", value)
342 return match.group(1) if match else ""
343 elif directive == PreprocessorDirective.IFNDEF:
344 match = re.search(r"#ifndef\s+([a-zA-Z_][a-zA-Z0-9_]*)", value)
345 return match.group(1) if match else ""
346 elif directive == PreprocessorDirective.IF:
347 match = re.search(r"#if\s+(.+)", value)
348 return match.group(1).strip() if match else ""
349 elif directive == PreprocessorDirective.ELIF:
350 match = re.search(r"#elif\s+(.+)", value)
351 return match.group(1).strip() if match else ""
352 else:
353 return ""
355 def _parse_define(self, value: str) -> Tuple[str, str]:
356 """Parse #define directive."""
357 value = value.strip()
358 match = re.search(r"#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*(.+)", value)
359 if match:
360 name = match.group(1)
361 macro_value = match.group(2).strip()
362 return name, macro_value
363 return "", ""
365 def _parse_undef(self, value: str) -> str:
366 """Parse #undef directive."""
367 value = value.strip()
368 match = re.search(r"#undef\s+([a-zA-Z_][a-zA-Z0-9_]*)", value)
369 return match.group(1) if match else ""
371 def _should_activate_block(
372 self,
373 directive: PreprocessorDirective,
374 condition: str,
375 stack: List[PreprocessorBlock],
376 ) -> bool:
377 """Determine if a block should be active based on its directive and condition."""
378 # Check if parent blocks are active
379 if stack and not stack[-1].is_active:
380 return False
382 if directive == PreprocessorDirective.IFDEF:
383 return self.is_defined(condition)
384 elif directive == PreprocessorDirective.IFNDEF:
385 return not self.is_defined(condition)
386 elif directive == PreprocessorDirective.IF:
387 return self.evaluate_condition(condition)
388 elif directive == PreprocessorDirective.ELIF:
389 # For #elif, we need to check if no previous branch was taken
390 if stack:
391 parent = stack[-1]
392 # Check if any previous child was active
393 for child in parent.children:
394 if child.is_active:
395 return False
396 return self.evaluate_condition(condition)
397 return False
398 elif directive == PreprocessorDirective.ELSE:
399 # For #else, we need to check if no previous branch was taken
400 if stack:
401 parent = stack[-1]
402 # Check if any previous child was active
403 for child in parent.children:
404 if child.is_active:
405 return False
406 return True
407 return False
408 else:
409 return False
411 def filter_tokens(self, tokens: List[Token]) -> List[Token]:
412 """Filter tokens based on preprocessor blocks, keeping only active content."""
413 blocks = self.parse_preprocessor_blocks(tokens)
414 filtered_tokens = []
416 i = 0
417 while i < len(tokens):
418 token = tokens[i]
420 # Check if this token is inside a preprocessor block
421 in_active_block = self._is_token_in_active_block(i, blocks)
423 if in_active_block:
424 # Include the token if it's not a preprocessor directive
425 if token.type != TokenType.PREPROCESSOR:
426 filtered_tokens.append(token)
427 else:
428 # Skip tokens that are not in active blocks
429 pass
431 i += 1
433 return filtered_tokens
435 def _is_token_in_active_block(
436 self, token_index: int, blocks: List[PreprocessorBlock]
437 ) -> bool:
438 """Check if a token is inside an active preprocessor block."""
439 # Check all blocks recursively
440 for block in blocks:
441 if self._is_token_in_block(token_index, block):
442 # If token is in this block, return whether the block is active
443 return block.is_active
444 return True # Default to True if not in any block
446 def _is_token_in_block(self, token_index: int, block: PreprocessorBlock) -> bool:
447 """Check if a token is inside a specific block."""
448 if block.start_token <= token_index <= block.end_token:
449 # Check if any child block contains this token
450 for child in block.children:
451 if self._is_token_in_block(token_index, child):
452 # If token is in a child block, return whether the child is active
453 return child.is_active
454 # Token is in this block but not in any child block
455 return True
456 return False
459class PreprocessorManager:
460 """High-level interface for preprocessor management."""
462 def __init__(self):
463 self.evaluator = PreprocessorEvaluator()
464 self.logger = logging.getLogger(__name__)
466 def process_file(
467 self, tokens: List[Token], defines: Optional[Dict[str, str]] = None
468 ) -> List[Token]:
469 """Process a file's tokens through the preprocessor."""
470 if defines:
471 for name, value in defines.items():
472 self.evaluator.add_define(name, value)
474 # Filter tokens based on preprocessor directives
475 filtered_tokens = self.evaluator.filter_tokens(tokens)
477 self.logger.debug(
478 f"Preprocessor: {len(tokens)} tokens -> {len(filtered_tokens)} tokens"
479 )
480 return filtered_tokens
482 def add_defines_from_content(self, tokens: List[Token]):
483 """Extract #define directives from tokens and add them to the evaluator."""
484 for token in tokens:
485 if token.type == TokenType.PREPROCESSOR and token.value.startswith(
486 "#define"
487 ):
488 name, value = self.evaluator._parse_define(token.value)
489 if name:
490 self.evaluator.add_define(name, value)
491 self.logger.debug("Preprocessor: Added define %s = %s", name, value)