1

Given an arbitrary number of TextField inputs (t1, t2, t3, ...) and a custom boolean string input from a JtextArea, I need to check if lines in a file match the custom boolean expression. It needs to support nested parentheses.

Example:

User enters "str1" into t1 , "str2" into t2, "str3" into t3, "str4" into t4, "str5" into t5.

User enters the following into the JTextArea for the custom boolean:

"not ((t1 and not t3) or (t4 and t2)) or t5"

Then based on these inputs, I must filter a file and return lines in the file that match the custom boolean based on a "contains" relationship (e.g. "t1 and not t3" means a line must contain the string t1 and not contain the string t3).

For example a file with the following two lines:

str 5

str4 str2

The filter would only return str5 because it is the only line that matches the custom boolean.

I am having trouble even getting started. I have tried to think of a recursive solution but couldn't come up with anything. Also I tried non-recursive solutions but can't come up with anything either.

There is also the problem of the end result boolean needing to take in a parameter (each line in the file). I thought of maybe producing a sequence of operations to perform rather than a boolean that somehow takes in a parameter. But I can't figure out how to get this sequence in the first place.

Here is what I have now. It is very bad and I am thinking of scrapping this approach.

public class CustomInputParser {
private ArrayList<String> pairs;
private String inp;
private HashMap<Integer,String> atomMap;

public CustomInputParser() {
    this.pairs = null;
    this.inp = "";
    this.atomMap = new HashMap<Integer,String>();
}

public void findAtoms() {
    int i = 0;
    for(String s : this.pairs) {
        String[] indices = s.split(",");
        int begin = Integer.valueOf(indices[0]);
        int end = Integer.valueOf(indices[1]);
        if(!inp.substring(begin+1, end).contains("(")) {

            this.pairs.set(i, this.pairs.get(i) + ",@");
        }
        i++;
    }
    
}

public void computeAtoms() {
    int i = 0;
    for(String s : this.pairs) {
        if(s.contains(",@")) {
            String[] indices = s.split(",");
            int begin = Integer.valueOf(indices[0]);
            int end = Integer.valueOf(indices[1]);
                //this.pairs.set(i,this.pairs.get(i).replace(",a", ""));
                this.pairs.set(i, this.pairs.get(i) + ","+inp.substring(begin+1, end));
                this.atomMap.put(begin,this.pairs.get(i).split(",")[3]+"#"+String.valueOf(end));
            
        }
        i++;
    }
    System.out.println(this.pairs.toString());
    System.out.println(this.atomMap.toString());
}

public void replaceAtoms() {
    int i = 0;
    for(String s : this.pairs) {
        if(!(s.contains("o") || s.contains("a") || s.contains("n"))) {
            String[] indices = s.split(",");
            int begin = Integer.valueOf(indices[0])+1;
            int end = Integer.valueOf(indices[1]);
            for(int j = begin; j < end; j++) {
                if(inp.charAt(j) == '(') {
                    if(atomMap.containsKey(j)) {
                        this.pairs.set(i, this.pairs.get(i) + ","+j+"#"+atomMap.get(j).split("#")[1]+">"+atomMap.get(j).split("#")[0]);
                    }
                    else {
                        this.pairs.set(i,"!"+ this.pairs.get(i));
                    }
                }
            }
            
            
        }
        i++;
    }
    System.out.println(this.pairs.toString());
}
public ArrayList<String> getPairs(String str){
    this.inp = str;
    ArrayList<String> res = new ArrayList<String>();
    char[] arr = str.toCharArray();
    Stack<Integer> s = new Stack<Integer>();
    for(int i = 0; i < arr.length; i++) {
        if(arr[i] == '(') {
            
            s.push(i);
        }
        if(arr[i] == ')') {
            if(s.empty()) {
                return null;
            }
            else {
                Integer start  = s.pop();
                Integer end = Integer.valueOf(i);
                res.add(start.toString() + "," + end.toString());
                
                
            }
        }
        
    }
    
    if(!s.empty()) {
        return null;
    }
    this.pairs = res;
    return res;
    
}


public static void main(String[] args) {
    String x = "((not t1 and ((not t2 or t4) or (t3 or t4))) or (t5 and not t6)) and t7";
    x = x.replace("not", "n").replace("and","a").replace("or", "o").replace("t", "").replace(" ", "");
    System.out.println(x);
    CustomInputParser c = new CustomInputParser();
    System.out.println(c.getPairs(x).toString());
    c.findAtoms();
    c.computeAtoms();
    c.replaceAtoms();
    
}

}

s_herbert
  • 11
  • 5
  • Your explanation is confused and confusing. Edit your question to describe all inputs, all desired outputs, and the general method for getting from one to the other. You haven't done any of this. Some of it's maybe guessable, but, for example, the role of the file is a mystery. – Gene Sep 23 '21 at 00:29
  • This may help. [Shunting Yard Algorithm](https://en.wikipedia.org/wiki/Shunting-yard_algorithm) – WJS Sep 23 '21 at 00:35
  • @Gene I have updated it with better explanation of inputs and desired output. Thanks. – s_herbert Sep 23 '21 at 00:40
  • It's still a mystery. How do you "match the custom boolean" ? You need to describe how you'd do it without code. – Gene Sep 23 '21 at 00:51
  • Oh I think I guess it now. The terms of the boolean denote **presence** in a line of the file. Is that right? If so you really need to add that important point to the question. – Gene Sep 23 '21 at 00:54
  • @Gene you guessed correctly. I just added that edit. Thanks – s_herbert Sep 23 '21 at 01:07
  • @WSJ Thanks for that. The shunting yard algorithm seems promising. – s_herbert Sep 23 '21 at 01:08
  • I find recursive descent parsing to be simpler than shunting yard. A grammar to start with is `E -> T op T | T $; T -> not T | O; O -> var | '(' E ')'` where op is `and` or `or` and $ is "end of input." This should work fine with either RD or SY. – Gene Sep 23 '21 at 03:58
  • A brilliant intro to recursive descent parsing for expression evaluation is given in the [Dragon Book](https://www.thriftbooks.com/w/compiladores-principios-tecnicas-y-herramientas_alfred-v-aho/248872/item/3908261/?gclid=Cj0KCQjwqKuKBhCxARIsACf4XuHRfZym4DannPz0jBy9wT9YBGklY3RaTuV36heHW04nZAdgw6zdi5gaAoKoEALw_wcB#idiq=3908261&edition=2350562) in a very early chapter. If you can get your hands on a copy, much will become clear. – Gene Sep 23 '21 at 04:10
  • @WJS I was able to implement the shunting yard algorithm to convert the input from infix to postfix notation. But I can't figure out how to implement the postfix stack evaluator since all the resources I've come across are for standard calculation ops (PEMDAS). Thanks again for your suggestion, this definitely seems the way to go! – s_herbert Sep 23 '21 at 05:04

3 Answers3

1

The first step is to tokenize the input. Define

enum Token {VAR, LP, RP, NOT, AND, OR, END}

LP and RP are parentheses. Now define a tokenizer class that looks something like this:

class Tokenizer {
  Tokenizer(String input) {...}
  void reset() {...}
  Token getNext() {...}
  String getVarName() {...}
}

Calling getNext() on your example in a loop should return

LP LP NOT VAR AND LP LP NOT VAR OR VAR RP OR LP VAR OR VAR RP RP RP OR LP VAR AND NOT VAR RP RP AND VAR END

Calling getVarName() immediately after a VAR has been returned by getNext() gives you the name of the variable (e.g. "t42").

There are many ways to implement little scanners like this. You should do this first and make sure it's bulletproof by testing. Trying to build a parser on top of a flaky scanner is torture.

As I said in comments, I'd consider recursive descent parsing. If you have a suitable grammar, writing an RD parser is a very short step as the Dragon Book (also mentioned above) shows.

A reasonable grammar (using tokens as above) is

Expr -> Term AND Term
     | Term OR Term 
     | Term END

Term -> NOT Term 
     | Opnd

Opnd -> VAR 
     | LP Expr RP

For example, here is how you'd get started. It shows the first rule converted to a function:

class Evaluator {
  final Tokenizer tokenizer = ...;     // Contains the expression text.
  final Map<String, Boolean> env = ... // Environment: variables to values.

  Token lookAhead;  // Holds the token we're parsing right now.

  Evaluator(Tokenizer tokenizer, Map<String, Boolean> env) { ... }

  void advance() {
    lookAhead = tokenizer.getNext();
  }

  boolean expr() {
    boolean leftHandSide = term();  // Parse the left hand side recursively.
    Token op = lookAhead;           // Remember the operator.
    if (op == Token.END) return leftHandSide; // Oops. That's all.
    advance();                      // Skip past the operator.
    boolean rightHandSide = term(); // Parse the right hand side recursively.
    if (op == Token.AND) return leftHandSide && rightHandSide;  // Evaluate!
    if (op == Token.OR) return leftHandSide || rightHandSide;
    dieWithSyntaxError("Expected op, found " + op);
  }

  boolean term() {...}

  boolean opnd() {...}
    
}

The environment is used when a VAR is parsed. Its boolean value is env.get(tokenizer.getVarName()).

So to process the file, you'll

For each line
   For each variable tX in the expression
      See if the line contains the string tX is bound to in its text field.
         If so, put the mapping tX -> true in the environment
         else put tX -> false
   Reset the tokenizer
   Call Evaluator.evaluate(tokenizer, environment)
   If it returns true, print the line, else skip it.

This is the simplest approach I can think of. About 150 lines. Many optimizations are possible.

Added

Well since I can no longer take away the thrill of discovery, here is my version:

import static java.lang.Character.isDigit;
import static java.lang.Character.isWhitespace;
import java.util.HashMap;
import java.util.Map;
import static java.util.stream.Collectors.toMap;

public class TextExpressionSearch {
  enum Token { VAR, LP, RP, NOT, AND, OR, END }
  
  static class Tokenizer {
    final String input;
    int pos = 0;
    String var;
    
    Tokenizer(String input) {
      this.input = input;
    }
        
    void reset() {
      pos = 0;
      var = null;
    }
    
    String getRead() {
      return input.substring(0, pos);
    }
    
    Token getNext() {
      var = null;
      while (pos < input.length() && isWhitespace(input.charAt(pos))) {
        ++pos;
      }
      if (pos >= input.length()) {
        return Token.END;
      }
      int start = pos++;
      switch (input.charAt(start)) {
      case 't':
        while (pos < input.length() && isDigit(input.charAt(pos))) {
          ++pos;
        }
        var = input.substring(start, pos);
        return Token.VAR;
      case '(':
        return Token.LP;
      case ')':
        return Token.RP;
      case 'n':
        if (input.startsWith("ot", pos)) {
          pos += 2;
          return Token.NOT;
        }
        break;
      case 'a':
        if (input.startsWith("nd", pos)) {
          pos += 2;
          return Token.AND;
        }
        break;
      case 'o':
        if (input.startsWith("r", pos)) {
          pos += 1;
          return Token.OR;
        }
        break;
      }
      throw new AssertionError("Can't tokenize: " + input.substring(start));
    }
  }
  
  static class Evaluator {
    final Tokenizer tokenizer;
    final Map<String, Boolean> env;
    Token lookAhead;
        
    Evaluator(Tokenizer tokenizer, Map<String, Boolean> env) {
      this.tokenizer = tokenizer;
      this.env = env;
      advance();
    }
    
    boolean die(String msg) {
      throw new AssertionError(msg + "\nRead: " + tokenizer.getRead());
    }
    
    void advance() {
      lookAhead = tokenizer.getNext();
    }
    
    void match(Token token) {
      if (lookAhead != token) {
        die("Expected " + token + ", found " + lookAhead);
      }
      advance();
    }
    
    boolean evaluate() {
      boolean exprVal = expr();
      match(Token.END); 
      return exprVal;
    }
    
    boolean expr() {
      boolean lhs = negated();
      switch (lookAhead) {
      case AND:
        advance();
        return negated() && lhs;
      case OR:
        advance();
        return negated() || lhs;
      case END:
        return lhs;
      }
      return die("Expected expr, found " + lookAhead);
    }

    boolean negated() {
      switch (lookAhead) {
      case NOT:
        advance();
        return !negated();
      default:
        return operand();
      }
    }
    
    boolean operand() {
      switch (lookAhead) {
      case VAR:
        if (!env.containsKey(tokenizer.var)) {
          die("Undefined variable: " + tokenizer.var);
        }
        boolean varVal = env.get(tokenizer.var);
        advance();
        return varVal;
      case LP:
        advance();
        boolean exprVal = expr();
        match(Token.RP);
        return exprVal;
      }
      return die("Expected operand, found " + lookAhead);
    }
  }
  
  public static void main(String [] args) {
    String expr = "((not t1 and ((not t2 or t4) or (t3 or t4))) or (t5 and not t6)) and t7";
    Map<String, String> bindings = new HashMap<>();
    bindings.put("t1", "str1");
    bindings.put("t2", "str2");
    bindings.put("t3", "str3");
    bindings.put("t4", "str4");
    bindings.put("t5", "str5");
    bindings.put("t6", "str6");
    bindings.put("t7", "str7");    
    String [] lines = {"str5 str7", "str4 str2"};
    Tokenizer tokenizer = new Tokenizer(expr);
    for (String line : lines) {
      Map<String, Boolean> env = 
          bindings.entrySet().stream()
              .collect(toMap(e -> e.getKey(), e -> line.contains(e.getValue())));
      tokenizer.reset();
      if (new Evaluator(tokenizer, env).evaluate()) {
        System.out.println(line);
      }
    }
  }
}
Gene
  • 46,253
  • 4
  • 58
  • 96
  • I did get a shunting yard implementation working but this seems simpler and more general purpose so I will use this approach. I had never heard of recursive descent parsing before! Thank you for the help. – s_herbert Sep 23 '21 at 06:18
1

You can define a parser that returns a Predicate<String> that tests if a given string satisfies a conditional expression.

static Predicate<String> parse(String s, Map<String, String> map) {
    return new Object() {
        String[] tokens = Pattern.compile("[()]|[a-z][a-z0-9]*")
            .matcher(s).results()
            .map(MatchResult::group)
            .toArray(String[]::new);
        int length = tokens.length;
        int index = 0;
        String token = get();

        String get() {
            return token = index < length ? tokens[index++] : null;
        }

        boolean eat(String expect) {
            if (expect.equals(token)) {
                get();
                return true;
            }
            return false;
        }

        Predicate<String> identifier() {
            String id = token;
            return s -> {
                String value = map.get(id);
                if (value == null)
                    throw new RuntimeException(
                        "identifier '" + id + "' undefined");
                return s.contains(value);
            };
        }

        Predicate<String> factor() {
            boolean not = false;
            Predicate<String> p;
            if (eat("not"))
                not = true;
            switch (token) {
            case "(":
                get();
                p = expression();
                if (!eat(")"))
                    throw new RuntimeException("')' expected");
                break;
            case ")": case "not": case "and": case "or":
                throw new RuntimeException("syntax error at '" + token + "'");
            default:
                p = identifier();
                get();
                break;
            }
            if (not)
                p = p.negate();
            return p;
        }

        Predicate<String> term() {
            Predicate<String> p = factor();
            while (eat("and"))
                p = p.and(factor());
            return p;
        }

        Predicate<String> expression() {
            Predicate<String> p = term();
            while (eat("or"))
                p = p.or(term());
            return p;
        }

        Predicate<String> parse() {
            Predicate<String> p = expression();
            if (token != null)
                throw new RuntimeException("extra tokens string");
            return p;
        }
    }.parse();
}

test case:

@Test
public void testParse() {
    String s = "not ((t1 and not t3) or (t4 and t2)) or t5";
    Map<String, String> map = new HashMap<>(Map.of(
        "t1", "str1",
        "t2", "str2",
        "t3", "str3",
        "t4", "str4",
        "t5", "str5"));
    Predicate<String> p = parse(s, map);
    assertTrue(p.test("str5"));
    assertTrue(p.test("str3"));
    assertTrue(p.test("str1 str3"));
    assertFalse(p.test("str1"));
    assertFalse(p.test("str2 str4"));
    // you can change value of variables.
    assertFalse(p.test("str1 FOO"));
    map.put("t5", "FOO");
    assertTrue(p.test("str1 FOO"));
}

syntax:

expression = term { "or" term }
term       = factor { "and" factor }
factor     = [ "not" ] ( "(" expression ")" | identifier )
identifier = letter { letter | digit }
letter     = "a" | "b" | ... | "z"
digit      = "0" | "1" | ... | "9"
0

For posterity, here is my shunting yard solution which includes input validation:

public class CustomInputParser {
private Stack<Character> ops;
private LinkedList<Character> postFix;
private HashMap<Character, Integer> precedence;
private Stack<Boolean> eval; 
private HashMap<Integer, String> termsMap;
private String customBool;

public CustomInputParser(HashMap<Integer, String> tMap, String custBool) {
    this.ops = new Stack<Character>();
    this.eval = new Stack<Boolean>();
    this.postFix = new LinkedList<Character>();
    this.termsMap = tMap;
    this.customBool = custBool;
    this.precedence = new HashMap<Character, Integer>();
    precedence.put('n', 1);
    precedence.put('a', 2);
    precedence.put('o',3);
    precedence.put('(', 4);
     
}

private int inToPost() {
    char[] expr = convertToArr(this.customBool);
    char c;
    for(int i = 0; i < expr.length; i++) {
        c = expr[i];
        if(isOp(c)) {
            if(processOp(c) != 0) return -1;
        }
        else {
            if(!Character.isDigit(c)) {
                return -1;
            }
            
            
            
            //I made the mistake of using a queue of Characters for postfix initially
            //This only worked for up to 9 operands (multi digit would add mutiple chars to
            // postfix for a single reference.
            //This loops is a lazy workaround: 
            //   1. get the string of the reference (e.g. "11")
            //   2. convert it to int
            //   3. store the char value of the int in postfix
            //   4. when evaluating operands in postfix eval, convert char back to int to get the termsMap key
            String num = "";
            while(i < expr.length) {
                if(!Character.isDigit(expr[i])) {
                    i--;
                    break;
                }
                c = expr[i];
                num += c;
                i++;
            }
            int j = Integer.valueOf(num);
            c = (char) j;
            postFix.offer(c); //enqueue
            
            
        }
    }
    while(!ops.empty()) {
        if(ops.peek() == '(')return -1; //no matching close paren for the open paren
        postFix.offer(ops.pop()); //pop and enqueue all remaining ops from stack
    }
    return 0;
}

private boolean isOp(char c) {
    if(c == '(' || c == ')' || c =='n' || c=='a' || c=='o') {
        return true;
    }
    return false;
}

private int processOp(char c) {
    if (ops.empty() || c == '(') {
        ops.push(c);
    }
    else if(c == ')') {
        while(ops.peek() != '(') {
            postFix.offer(ops.pop()); //pop and equeue ops wrapped in parens
            if(ops.empty()) return -1; //no matching open paren for the close paren
        }
        ops.pop(); // don't enqueue open paren, just remove it from stack
    }
    else if(precedence.get(c) > precedence.get(ops.peek())) {
        postFix.offer(ops.pop()); //pop and enqueue the higher precedence op
        ops.push(c);
    }
    else {
        ops.push(c);
    }
    return 0;
    
}

public boolean evaluate(String s) {
    while(!postFix.isEmpty()) {
        char c = postFix.poll();
        boolean op1, op2;
        switch(c) {
            case 'n':
                op1 = eval.pop();
                eval.push(!op1);
                break;
                
            case 'a':
                op1 = eval.pop();
                op2 = eval.pop();
                eval.push(op1 && op2);
                break;
                
            case 'o':
                op1 = eval.pop();
                op2 = eval.pop();
                eval.push(op1 || op2);
                break;
            
            default:
                int termKey = (int) c;
                String term = this.termsMap.get(termKey);
                eval.push(s.contains(String.valueOf(term)));
                break;
        }
    }
    return eval.pop();
}

private char[] convertToArr(String x) {
    x = x.replace("not", "n").replace("and","a").replace("or", "o").replace("t", "").replace(" ", "");
    return x.toCharArray();
}


public static void main(String[] args) {
    String customBool = "(t1 and not (t2 and t3)) or (t4 and not t5)";
    HashMap<Integer,String> termsMap = new HashMap<Integer, String>();
    termsMap.put(1,"str1");
    termsMap.put(2,"str2");
    termsMap.put(3,"str3");
    termsMap.put(4,"str4");
    termsMap.put(5,"str5");
    CustomInputParser c = new CustomInputParser(termsMap, customBool);
    if(c.inToPost() != 0) {
        System.out.println("invalid custom boolean");
    }
    else {
    System.out.println(c.evaluate("str1str5"));
    }
    
    
}

}

s_herbert
  • 11
  • 5