Often I extract file names from html text data using regex but I heard the html agility pack is good for parsing html data. how can I use html agility pack to extract all url from html data. Can any one guide me with sample code. Thanks.
This is my code sample which works fine.
using System.Text.RegularExpressions;
private ArrayList GetFilesName(string Source)
{
ArrayList arrayList = new ArrayList();
Regex regex = new Regex("(?<=src=\")([^\"]+)(?=\")", 1);
MatchCollection matchCollection = regex.Matches(Source);
foreach (Match match in matchCollection)
{
if (!match.get_Value().StartsWith("http://"))
{
arrayList.Add(Path.GetFileName(match.get_Value()));
}
match.NextMatch();
}
ArrayList arrayList1 = arrayList;
return arrayList1;
}
private string ReplaceSrc(string Source)
{
Regex regex = new Regex("(?<=src=\")([^\"]+)(?=\")", 1);
MatchCollection matchCollection = regex.Matches(Source);
foreach (Match match in matchCollection)
{
string value = match.get_Value();
string str = string.Concat("images/", Path.GetFileName(value));
Source = Source.Replace(value, str);
match.NextMatch();
}
string source = Source;
return source;
}