3
string emailBody = " holla holla testing is for NewFinancial History:\"xyz\"  dsd  NewFinancial History:\"abc\"  NewEBTDI$:\"abc\"  dsds  ";

   emailBody = string.Join(" ", Regex.Split(emailBody.Trim(), @"(?:\r\n|\n|\r)"));
                var keys = Regex.Matches(emailBody, @"\bNew\B(.+?):", RegexOptions.Singleline).OfType<Match>().Select(m => m.Groups[0].Value.Replace(":", "")).Distinct().ToArray();
                foreach (string key in keys)
                {
                    List<string> valueList = new List<string>();
                    string regex = "" + key + ":" + "\"(?<" + GetCleanKey(key) + ">[^\"]*)\"";

                    var matches = Regex.Matches(emailBody, regex, RegexOptions.Singleline);
                    foreach (Match match in matches)
                    {
                        if (match.Success)
                        {
                            string value = match.Groups[GetCleanKey(key)].Value;
                            if (!valueList.Contains(value.Trim()))
                            {
                                valueList.Add(value.Trim());
                            }
                        }
                    }

 public string GetCleanKey(string key)
        {
            return key.Replace(" ", "").Replace("-", "").Replace("#", "").Replace("$", "").Replace("*", "").Replace("!", "").Replace("@", "")
                .Replace("%", "").Replace("^", "").Replace("&", "").Replace("(", "").Replace(")", "").Replace("[", "").Replace("]", "").Replace("?", "")
                .Replace("<", "").Replace(">", "").Replace("'", "").Replace(";", "").Replace("/", "").Replace("\"", "").Replace("+", "").Replace("~", "").Replace("`", "")
                .Replace("{", "").Replace("}", "").Replace("+", "").Replace("|", "");
        }

In my above code I am trying to get the value next to NewEBTDI$: which is "abc".

When I include $ sign in the pattern, it doesn't search the value next to field name.

If the $ is removed and one just specifies NewEBTDI then it searches the values.

I want to search the value along with the $ sign.

ΩmegaMan
  • 29,542
  • 12
  • 100
  • 122
Savan Patel
  • 357
  • 4
  • 14
  • Please arrange your code properly. It's not readable. –  Jan 21 '16 at 20:17
  • "$" has a special meaning in Regex. Escape it with a \. But in your case you will have to do a String.Replace() method because your regex is generated . You might have other special character as well... – Ric Gaudet Jan 21 '16 at 20:29

2 Answers2

5

The right way of handling characters that have a special meaning in regex, but must be searched as is, is to escape them. You can do this with Regex.Escape. In your case it is the $ sign, which means end of line in regex, if not escaped.

string regex = "" + Regex.Escape(key) + ":" + "\"(?<" + Regex.Escape(GetCleanKey(key))
               + ">[^\"]*)\"";

or

string regex = String.Format("{0}:\"(?<{1}>[^\"]*)\"",
                             Regex.Escape(key),
                             Regex.Escape(GetCleanKey(key)));

or with VS 2015, using string interpolation:

string regex = $"{Regex.Escape(key)}:\"(?<{Regex.Escape(GetCleanKey(key))}>[^\"]*)\"";

(It does look better than that in reality, because the C# editor colors the string parts and the embedded C# expressions differently.)

Olivier Jacot-Descombes
  • 104,806
  • 13
  • 138
  • 188
1

It is unclear what is the ultimate goal, but the $ in a pattern is a pattern escape which means either the end of the line, or the end of the buffer depending if MultiLine is set or not.

Why not just capture the text before the : into a named capture? Then extract the quoted operation value such as:

var data = "...is for NewFinancial History:\"xyz\"  dsd  NewFinancial History:\"abc\"  NewEBTDI$:\"abc\"  dsds";

var pattern = @"
(?<New>New[^:]+)      # Capture all items after `New` that is *not* (`^`) a `:`, one or more.
:                     # actual `:`
\x22                  # actual quote character begin anchor
(?<InQuotes>[^\x22]+) # text that is not a quote, one or more
\x22                  # actual quote ending anchor
";

// IgnorePatternWhitespace allows us to comment the pattern. Does not affect processing.
Regex.Matches(data, pattern, RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture)
     .OfType<Match>()
     .Select(mt => new
     {
         NewText = mt.Groups["New"].Value,
         Text = mt.Groups["InQuotes"].Value
     });

Result

enter image description here

Note I use the hex escape \x22 instead of escaping the \" in the pattern for easier working with it. For it circumvents the C# compiler prematurely escaping a pattern escapes which needs to remain intact.

ΩmegaMan
  • 29,542
  • 12
  • 100
  • 122