I'm able to do simple rules but having trouble evaluating complex rules in nested parenthesis.
Here's nested rule definition I'm trying to evaluate:
definition = '((K00925 K00625),K01895) (K00193+K00197+K00194) (K00577+K00578+K00579+K00580+K00581-K00582-K00583+K00584) (K00399+K00401+K00402) (K22480+K22481+K22482,K03388+K03389+K03390,K08264+K08265,K03388+K03389+K03390+K14127+(K14126+K14128,K22516+K00125))'
Here's the code I have that is almost correct:
import re def rule_splitter(rule: str, split_characters: set = ) -> set: """ Split rule by characters. Args: rule (str): Boolean logical string. split_characters (list): List of characters to split in rule. Returns: set: Unique tokens in a rule. """ rule_decomposed = str(rule) if split_characters: for character in split_characters: character = character.strip() if character: rule_decomposed = rule_decomposed.replace(character, " ") unique_tokens = set(filter(bool, rule_decomposed.split())) return unique_tokens def find_rules(definition: str) -> list: """ Find and extract rules from the definition string. Args: definition (str): Complex boolean logical string with multiple rules. Returns: list: List of extracted rules as strings. """ rules = [] stack = [] current_rule = "" outside_rule = "" for char in definition: if char == '(': if stack: current_rule += char if outside_rule.strip(): rules.append(outside_rule.strip()) outside_rule = "" stack.append(char) elif char == ')': stack.pop() if stack: current_rule += char else: current_rule = f"()" rules.append(current_rule) current_rule = "" else: if stack: current_rule += char else: outside_rule += char # Add any remaining outside_rule at the end of the loop if outside_rule.strip(): rules.append(outside_rule.strip()) return rules def evaluate_rule(rule: str, tokens: set, replace=) -> bool: """ Evaluate a string of boolean logicals. Args: rule (str): Boolean logical string. tokens (set): List of tokens in rule. replace (dict, optional): Replace boolean characters. Defaults to . Returns: bool: Evaluated rule. """ # Handle optional tokens prefixed by '-' rule = re.sub(r'-\w+', '', rule) # Replace characters for standard logical formatting if replace: for character_before, character_after in replace.items(): rule = rule.replace(character_before, character_after) # Split the rule into individual symbols unique_symbols = rule_splitter(rule, replace.values()) # Create a dictionary with the presence of each symbol in the tokens token_to_bool = # Parse and evaluate the rule using a recursive descent parser def parse_expression(expression: str) -> bool: expression = expression.strip() # Handle nested expressions if expression.startswith('(') and expression.endswith(')'): return parse_expression(expression[1:-1]) # Evaluate 'OR' conditions if ' or ' in expression: parts = expression.split(' or ') return any(parse_expression(part) for part in parts) # Evaluate 'AND' conditions elif ' and ' in expression: parts = expression.split(' and ') return all(parse_expression(part) for part in parts) # Evaluate individual token presence else: return token_to_bool.get(expression.strip(), False) return parse_expression(rule) def evaluate_definition(definition: str, tokens: set) -> dict: """ Evaluate a complex definition string involving multiple rules. Args: definition (str): Complex boolean logical string with multiple rules. tokens (set): Set of tokens to check against the rules. Returns: dict: Dictionary with each rule and its evaluated result. """ # Extract individual rules from the definition rules = find_rules(definition) # Evaluate each rule rule_results = <> for rule in rules: try: cleaned_rule = rule[1:-1] if rule.startswith('(') and rule.endswith(')') else rule # Remove outer parentheses if they exist result = evaluate_rule(cleaned_rule, tokens) except SyntaxError: # Handle syntax errors from eval() due to incorrect formatting result = False rule_results[rule] = result return rule_results # Example usage definition = '((K00925 K00625),K01895) (K00193+K00197+K00194) (K00577+K00578+K00579+K00580+K00581-K00582-K00583+K00584) (K00399+K00401+K00402) (K22480+K22481+K22482,K03388+K03389+K03390,K08264+K08265,K03388+K03389+K03390+K14127+(K14126+K14128,K22516+K00125))' tokens = < 'K00925', 'K00625', # 'K01895', 'K00193', 'K00197', 'K00194', 'K00577', 'K00578', 'K00579', 'K00580', 'K00581', # 'K00582', 'K00584', 'K00399', 'K00401', 'K00402', 'K22480', # 'K22481', # 'K22482', 'K03388', 'K03389', 'K03390', # 'K08264', # 'K08265', 'K14127', 'K14126', 'K14128', 'K22516', # 'K00125' >result = evaluate_definition(definition, tokens) # result #
Note that the implementation is splitting the first rule.
Here is the following output I'm expecting:
Here's a graphical representation of the information flow for this complex rule definition:
It should also be able to handle this rule:
rule_edge_case='((K00134,K00150) K00927,K11389)' with the following query tokens: tokens = < 'K00134', 'K00927'>. This should be True because either (K00134 OR K00150) with K00927 are present which is sufficient.
Edit 1: I had an error before and the last rule was actually True and not False.
Edit 2: I've changed "items" to "tokens" and modified which tokens are used for evaluation to capture better edge cases.