3

So there is this class called StrSubstitutor in org.apache.commons.lang3.text which can take in a map like this:

Map<String, String> params = new HashMap<String, String>();
params.put("name","Vivek");
params.put("id","900");
params.put("somethingelse","blah");

and a template string like this:

    <data>
       <id>${id}</id>
       <name>${name}</name>
       <something>${somethingelse}</something>
    </data>

to produce an output string like this:

   <data>
       <id>900</id>
       <name>Vivek</name>
       <something>blah</something>
   </data>

What I want is the opposite. Is there a way that I can take the output string and the template to populate the map with the template variable as the key and the corresponding value in the string as the value?

PS - the string that I will use is not necessarily always XML. This is just for an example.

EDIT: I think some are confused as the variable names and tag names were the same. The tags are only illustrative and are irrelevant to the problem. It is the variables inside ${} that I am concerned of. I have added another tag to show what I mean.

Vivek V K
  • 1,100
  • 2
  • 15
  • 33
  • Do you know the list of possible tags (like "id", "name" etc) , or you need a solution for un known tags ? – c0der Oct 02 '16 at 10:32
  • @c0der The tags are irrelevant. If you mean the variables -- yes it is better if it is unknown. Basically I want to parse out anything enclosed by ${} – Vivek V K Oct 02 '16 at 11:55
  • If you want a solution which is generic and efficient you will have to parse this structure. Such simple examples can be parsed by handwritten parsers. If this example is part of a larger specification you might be better considering a parser generator. – CoronA Oct 02 '16 at 12:56

2 Answers2

0

You could use a pattern to convert the format string into a regex and then use this regex to progress the input string, below an example class:

public final class FormatReader {

    private final Pattern formatPattern;
    private final List<String> names;

    private FormatReader(
            final Pattern formatPattern,
            final List<String> names) {
        ////
        this.formatPattern = formatPattern;
        this.names = names;
    }

    public static FormatReader of(
            final String prefix,
            final String suffix,
            final String format) {
        ////
        return of(prefix, suffix, format, true);
    }

    public static FormatReader of(
            final String prefix,
            final String suffix,
            final String format,
            final boolean allowSurroundingWhitespace) {
        ////
        // This method is somewhat ugly...
        final List<String> names = new ArrayList<>();
        final StringBuilder sb = new StringBuilder("(?m)");
        boolean skip = allowSurroundingWhitespace;
        if (skip)
            sb.append("\\s*");
        for (int i = 0, last = 0, prefixLength = prefix.length(), suffixLength = suffix.length();;) {
            if (i == format.length()) {
                if (!skip)
                    sb.append(Pattern.quote(format.substring(last)));
                break;
            }
            if (format.startsWith(prefix, i)) {
                skip = true;
                sb.append(Pattern.quote(format.substring(last, i))).append("(.+)");

                final int off = i + prefixLength;
                names.add(format.substring(off, i = format.indexOf(suffix, off)));
                i += suffixLength;
                continue;
            }
            if (Character.isWhitespace(format.charAt(i))) {
                if (!skip) {
                    skip = true;
                    // Replace '\s*' with '\s+' if at least one whitespace has to be present
                    sb.append(Pattern.quote(format.substring(last, i))).append("\\s*");
                }
            } else if (skip) {
                last = i;
                skip = false;
            }
            i++;
        }
        if (!skip && allowSurroundingWhitespace)
            sb.append("\\s*");
        return new FormatReader(Pattern.compile(sb.toString()), names);
    }

    public Map<String, String> toMap(
            final String input) {
        ////
        final Matcher m = formatPattern.matcher(input);
        if (!m.matches())
            throw new IllegalArgumentException("Argument does not match format");
        final Map<String, String> map = new HashMap<>();
        for (int i = 0; i < m.groupCount();)
            map.put(names.get(i), m.group(++i));
        return map;
    }

    public static void main(
            final String[] args) {
        ////
        final FormatReader r = of("${", "}", ""
                + "   <data>\n"
                + "       <id>${id}</id>\n"
                + "       <name>${name}</name>\n"
                + "   </data>");
        final String s = ""
                + "      <data>\n"
                + "        <id>900</id>           "
                + "    <name>Vivek</name>\n"
                + "  </data>    ";
        // The created pattern (accepts any count of whitespace):
        //                             'id'                     'name'
        // (?m)\s*\Q<data>\E\s*\Q<id>\E(.+)\Q</id>\E\s*\Q<name>\E(.+)\Q</name>\E\s*\Q</data>\E\s*
        System.out.println(r.toMap(s)); // {name=Vivek, id=900}
    }
}
Nevay
  • 784
  • 5
  • 9
  • It works when run blindly. A bit of explanation would help. – Vivek V K Oct 02 '16 at 08:09
  • Okay, I think I understand what it does now. Thanks for the answer! I will see if anyone else comes up with an existing library that can do this before I accept your answer. – Vivek V K Oct 02 '16 at 09:19
  • Okay I now find that the code is sensitive to having a single whitespace messed up. Which makes sense as you cannot then match the string with a template.Unfortunately, this is bit too restrictive for my problem. – Vivek V K Oct 02 '16 at 12:29
0

Here is another option :

import java.util.HashMap;
import java.util.Map;

public class Test{

    public static void main(String[] args){

        //simulate template. Assuming no more than on param in line
        String[] template = new String[]{
                                    "<data>",
                                    "<id>${id}</id>",
                                    "<name>${name}</name>",
                                    "<something>${somethingelse}</something>",
                                    "</data>"
                                    };

        String[] output = new String[]{
                                    "<data>",
                                    "<id>900</id>",
                                    "<name>Vivek</name>",
                                    "<somethingelse>blah</somethingelse>",
                                    "</data>"
                                    };

        Map<String, String> params = getParams(template);

        getValues(params, output);

        for(String key : params.keySet()) {
            System.out.println(key +" : " + params.get(key));
        }
    }

    private static Map<String, String> getParams(String[] template) {

        Map<String, String> params = new HashMap<String, String>();

        for (String line : template) {

            //get location of 3 chars ${}
            int index$ = line.indexOf("$");
            int indexLeftB = line.indexOf("{");
            int indexRightB = line.indexOf("}");

            //make sure all ${} are present
            if((index$ <0) || (indexLeftB <0) || (indexRightB <0) ) {
                continue;
            }

            //make sure they are in the right order
            if( ((indexLeftB - index$) !=1) || (indexRightB < indexLeftB)) {
                continue;
            }

            //get param
            String param = getParamFromLine(line, indexLeftB+1 , indexRightB);

            if(param != null) {

                params.put(param,null);
            }
        }

        return params;
    }

    private static void getValues(Map<String, String> params, String[] output) {

        //iterate over map
        for(String param : params.keySet()) {

            String tag = "<"+param+">"; //like <name>
            String closeTag = "</"+param+">"; //like <name>

            //iterate over output
            for(String line : output) {

                line = line.trim(); //remove all whitespace
                //look for first occurrence of patternToSearch
                int index1 = line.indexOf(tag, 0);
                int index2 = line.indexOf(closeTag, index1);

                //make sure there are 2 occurrences in
                if((index1 < 0) || (index2 < 0)) {
                    continue;
                }

                String value = getParamFromLine(line, index1+ tag.length(), index2);
                if(value != null) {

                    params.put(param, value);
                }
            }
        }
    }

    private static String getParamFromLine(String line, int indexLeftB, int indexRightB) {

        String param = line.substring(indexLeftB, indexRightB);

        return (param.trim().length() == 0) ? null : param.trim();
    }
}
c0der
  • 18,467
  • 6
  • 33
  • 65