i Have created C# Console application which read multiple RSS Feed urls, then take values and put in database.
Now i have problem usualy with wordpress rss feed generator where article image is with tag enclosure.
How to take and parse article image url, if in source exist enclosure, with my code i read all image urls which is in description tags.
Here is my code which read and write in database:
using (var xmlReader = XmlReader.Create(izvorURLX))
{
var rssFormatter = new Rss20FeedFormatter();
rssFormatter.ReadFrom(xmlReader);
foreach (SyndicationItem syndicationItem in rssFormatter.Feed.Items)
{
Console.OutputEncoding = Encoding.UTF8;
string link = syndicationItem.Links[0].Uri.ToString();
var statCat1 = Convert.ToString((""));
foreach (var kategorija in syndicationItem.Categories.Take(1))
{
statCat1 = (kategorija.Name);
}
var rr = syndicationItem.AttributeExtensions.Values;
var LastIZV1 = rssFormatter.Feed.LastUpdatedTime.DateTime;
var SiteTitle = rssFormatter.Feed.Title.Text;
var itemFD = Convert.ToString(syndicationItem.Summary.Text);
string clitemFD = Regex.Replace(itemFD, @"<[^>]*>", String.Empty, RegexOptions.IgnoreCase).Trim();
var ItemItem = Convert.ToString(rssFormatter.Feed.Items);
var ff = rssFormatter.Feed.Items.ToString();
var datumIZV0 = syndicationItem.PublishDate.DateTime;
var nula = Convert.ToDateTime("01.01.0001 00:00:00");
var datumIZVX = Convert.ToDateTime(DateTime.Now);
if (datumIZV0 == nula)
{
datumIZVX = Convert.ToDateTime(DateTime.Now);
}
else
{
datumIZVX = Convert.ToDateTime(datumIZV0);
}
XmlDocument doc = new XmlDocument();
doc.Load(izvorURLX);
var imgSRC = Convert.ToString("");
var reg1 = new Regex("src=(?:\"|\')?(?<imgSrc>[^>]*[^/].?:bmp|jpg|jpeg|gif|png))(?:\"|\')?");
var match1 = reg1.Match(itemFD);
if (match1.Success)
{
Uri UrlImage = new Uri(match1.Groups["imgSrc"].Value, UriKind.Absolute);
imgSRC = UrlImage.ToString();
}
var feedXML = Convert.ToString(izvorURLX);
int KatX = Convert.ToInt32(KatIzv);
var statTitle = Convert.ToString(syndicationItem.Title.Text);
var statLink = Convert.ToString(syndicationItem.Links[0].Uri);
SqlConnection conn = new SqlConnection("Server=localhost\\SQLEXPRESS;Database=RSSFeedAgregator;Integrated Security=true");
conn.Open();
var FeedID = Convert.ToInt32(0);
var LastinDB = Convert.ToDateTime("01.01.0001 00:00:00");
string FeedInDB = Convert.ToString("a");
using (SqlCommand cmdX2 = new SqlCommand("SELECT Feed_ID, Izvor, LastUpd, feed, Kategorija, iID, izvTitle, statCat FROM [dbo].[tbl_feeds]", conn))
{
SqlDataReader readerX = cmdX2.ExecuteReader();
while (readerX.Read())
{
Console.OutputEncoding = Encoding.UTF8;
var feedTxt = Convert.ToString(readerX["feed"]);
FeedID = Convert.ToInt32(readerX["Feed_ID"]);
LastinDB = Convert.ToDateTime(readerX["LastUpd"]);
FeedInDB = Convert.ToString(readerX["feed"]);
}
readerX.Close();
}
bool inList = DB.Contains(clitemFD);
var statIMG = Convert.ToString("");
if (inList == false)
{
Console.WriteLine("false: Ne postoi");
using (SqlCommand cmd1 = new SqlCommand("INSERT INTO tbl_feeds VALUES (" + "@Izvor, @LastUpd, @feed, @Kategorija, @iID, @izvTitle, @statCat, @statTitle, @statLink, @statImage)", conn))
{
cmd1.Parameters.AddWithValue("@Izvor", feedXML);
cmd1.Parameters.AddWithValue("@LastUpd", datumIZVX);
cmd1.Parameters.AddWithValue("@feed", clitemFD);
cmd1.Parameters.AddWithValue("@Kategorija", KatX);
cmd1.Parameters.AddWithValue("@iID", IzvID);
cmd1.Parameters.AddWithValue("@izvTitle", SiteTitle);
cmd1.Parameters.AddWithValue("@statCat", statCat1);
cmd1.Parameters.AddWithValue("@statTitle", statTitle);
cmd1.Parameters.AddWithValue("@statLink", statLink);
cmd1.Parameters.AddWithValue("@statImage", imgSRC);
int rows = cmd1.ExecuteNonQuery();
Console.WriteLine("Uspesno dodadeno nov zapis !");
}
conn.Close();
}
else
{
Console.WriteLine("true: Postoi");
}
}
}
Sorry for my Long code, but i thing better to understanding, and i hope anyone can help me.
UPDATED
Also i found this code, this code read Enclosure URL, but just repeat to read and if source have 10 articles, this code reading 10 times all 10 img url, and in database save just last one.
XmlNodeList items = doc.SelectNodes("//item") ;
for (int i = 0; i < items.Count; i++)
{
var encImg = (items[i].SelectSingleNode("enclosure").Attributes["url"].Value);
}
Anyone can modify this code to work property ?