71

I need to be able to extract a string between 2 tags for example: "00002" from "morenonxmldata<tag1>0002</tag1>morenonxmldata"

I am using C# and .NET 3.5.

Raktim Biswas
  • 4,011
  • 5
  • 27
  • 32
Ashley
  • 721
  • 1
  • 5
  • 5

10 Answers10

126
  Regex regex = new Regex("<tag1>(.*)</tag1>");
  var v = regex.Match("morenonxmldata<tag1>0002</tag1>morenonxmldata");
  string s = v.Groups[1].ToString();

Or (as mentioned in the comments) to match the minimal subset:

  Regex regex = new Regex("<tag1>(.*?)</tag1>");

Regex class is in System.Text.RegularExpressions namespace.

Mariano Desanze
  • 7,847
  • 7
  • 46
  • 67
Aaron
  • 9,123
  • 5
  • 40
  • 38
73

Solution without need of regular expression:

string ExtractString(string s, string tag) {
     // You should check for errors in real-world code, omitted for brevity
     var startTag = "<" + tag + ">";
     int startIndex = s.IndexOf(startTag) + startTag.Length;
     int endIndex = s.IndexOf("</" + tag + ">", startIndex);
     return s.Substring(startIndex, endIndex - startIndex);
}
gotqn
  • 42,737
  • 46
  • 157
  • 243
Mehrdad Afshari
  • 414,610
  • 91
  • 852
  • 789
14

Extracting contents between two known values can be useful for later as well. So why not create an extension method for it. Here is what i do, Short and simple...

  public static string GetBetween(this string content, string startString, string endString)
    {
        int Start=0, End=0;
        if (content.Contains(startString) && content.Contains(endString))
        {
            Start = content.IndexOf(startString, 0) + startString.Length;
            End = content.IndexOf(endString, Start);
            return content.Substring(Start, End - Start);
        }
        else
            return string.Empty;
    }
Ozesh
  • 6,536
  • 1
  • 25
  • 23
13

A Regex approach using lazy match and back-reference:

foreach (Match match in Regex.Matches(
        "morenonxmldata<tag1>0002</tag1>morenonxmldata<tag2>abc</tag2>asd",
        @"<([^>]+)>(.*?)</\1>"))
{
    Console.WriteLine("{0}={1}",
        match.Groups[1].Value,
        match.Groups[2].Value);
}
Marc Gravell
  • 1,026,079
  • 266
  • 2,566
  • 2,900
7
string input = "Exemple of value between two string FirstString text I want to keep SecondString end of my string";
var match = Regex.Match(input, @"FirstString (.+?) SecondString ").Groups[1].Value;
Matinee LA
  • 71
  • 1
  • 1
3

To get Single/Multiple values without regular expression

// For Single
var value = inputString.Split("<tag1>", "</tag1>")[1];

// For Multiple
var values = inputString.Split("<tag1>", "</tag1>").Where((_, index) => index % 2 != 0);
Vijay Nirmal
  • 5,239
  • 4
  • 26
  • 59
  • This is an amazing one-liner without Regex, which I've been struggling with. Now I don't have to use it. Thank you! – jamheadart Dec 05 '19 at 02:03
  • Sadly I got "An attempt was made to initialize a char constant with more than one character." when I tried to replace with another String. I genuinely wish this solution worked, because Regex is a usability nightmare. – Felipe La Rotta Dec 21 '22 at 00:42
  • 1
    @FelipeLaRotta My bad, it should be `"` instead of `'`. I have updated my answer – Vijay Nirmal Dec 21 '22 at 06:21
2

For future reference, I found this code snippet at http://www.mycsharpcorner.com/Post.aspx?postID=15 If you need to search for different "tags" it works very well.

    public static string[] GetStringInBetween(string strBegin,
        string strEnd, string strSource,
        bool includeBegin, bool includeEnd)           
    {
        string[] result ={ "", "" };
        int iIndexOfBegin = strSource.IndexOf(strBegin);
        if (iIndexOfBegin != -1)
        {
            // include the Begin string if desired
            if (includeBegin)
                iIndexOfBegin -= strBegin.Length;
            strSource = strSource.Substring(iIndexOfBegin
                + strBegin.Length);
            int iEnd = strSource.IndexOf(strEnd);
            if (iEnd != -1)
            {
                // include the End string if desired
                if (includeEnd)
                    iEnd += strEnd.Length;
                result[0] = strSource.Substring(0, iEnd);
                // advance beyond this segment
                if (iEnd + strEnd.Length < strSource.Length)
                    result[1] = strSource.Substring(iEnd
                        + strEnd.Length);
            }
        }
        else
            // stay where we are
            result[1] = strSource;
        return result;
    }
Nime Cloud
  • 6,162
  • 14
  • 43
  • 75
1

I strip before and after data.

 using System;
 using System.Collections.Generic;
 using System.Linq;
 using System.Text;
 using System.Threading.Tasks;
 using System.Text.RegularExpressions;

 namespace testApp
 {
     class Program
     {
         static void Main(string[] args)
         {
             string tempString = "morenonxmldata<tag1>0002</tag1>morenonxmldata";
             tempString = Regex.Replace(tempString, "[\\s\\S]*<tag1>", "");//removes all leading data
             tempString = Regex.Replace(tempString, "</tag1>[\\s\\S]*", "");//removes all trailing data

             Console.WriteLine(tempString);
             Console.ReadLine();
         }
     }
 }
SedJ601
  • 12,173
  • 3
  • 41
  • 59
0

Without RegEx, with some must-have value checking

    public static string ExtractString(string soapMessage, string tag)
    {
        if (string.IsNullOrEmpty(soapMessage))
            return soapMessage;

        var startTag = "<" + tag + ">";
        int startIndex = soapMessage.IndexOf(startTag);
        startIndex = startIndex == -1 ? 0 : startIndex + startTag.Length;
        int endIndex = soapMessage.IndexOf("</" + tag + ">", startIndex);
        endIndex = endIndex > soapMessage.Length || endIndex == -1 ? soapMessage.Length : endIndex;
        return soapMessage.Substring(startIndex, endIndex - startIndex);
    }
Tom
  • 373
  • 3
  • 10
-8
    public string between2finer(string line, string delimiterFirst, string delimiterLast)
    {
        string[] splitterFirst = new string[] { delimiterFirst };
        string[] splitterLast = new string[] { delimiterLast };
        string[] splitRes;
        string buildBuffer;
        splitRes = line.Split(splitterFirst, 100000, System.StringSplitOptions.RemoveEmptyEntries);
        buildBuffer = splitRes[1];
        splitRes = buildBuffer.Split(splitterLast, 100000, System.StringSplitOptions.RemoveEmptyEntries);
        return splitRes[0];
    }


    private void button1_Click(object sender, EventArgs e)
    {
        string manyLines = "Received: from exim by isp2.ihc.ru with local (Exim 4.77) \nX-Failed-Recipients: rmnokixm@gmail.com\nFrom: Mail Delivery System <Mailer-Daemon@isp2.ihc.ru>";
        MessageBox.Show(between2finer(manyLines, "X-Failed-Recipients: ", "\n"));
    }
Vova Popov
  • 1,053
  • 11
  • 11