0

i Have created C# Console application which read multiple RSS Feed urls, then take values and put in database.

Now i have problem usualy with wordpress rss feed generator where article image is with tag enclosure.

How to take and parse article image url, if in source exist enclosure, with my code i read all image urls which is in description tags.

Here is my code which read and write in database:

using (var xmlReader = XmlReader.Create(izvorURLX))
{
    var rssFormatter = new Rss20FeedFormatter();
    rssFormatter.ReadFrom(xmlReader);
    foreach (SyndicationItem syndicationItem in rssFormatter.Feed.Items)
    {
    Console.OutputEncoding = Encoding.UTF8;
    string link = syndicationItem.Links[0].Uri.ToString();
    var statCat1 = Convert.ToString((""));
    foreach (var kategorija in syndicationItem.Categories.Take(1))
    {
    statCat1 = (kategorija.Name);  
    }
    var rr = syndicationItem.AttributeExtensions.Values;
    var LastIZV1 = rssFormatter.Feed.LastUpdatedTime.DateTime;
    var SiteTitle = rssFormatter.Feed.Title.Text;
    var itemFD = Convert.ToString(syndicationItem.Summary.Text);
    string clitemFD = Regex.Replace(itemFD, @"<[^>]*>", String.Empty, RegexOptions.IgnoreCase).Trim();
    var ItemItem = Convert.ToString(rssFormatter.Feed.Items);
    var ff = rssFormatter.Feed.Items.ToString();
    var datumIZV0 = syndicationItem.PublishDate.DateTime;
    var nula = Convert.ToDateTime("01.01.0001 00:00:00");
    var datumIZVX = Convert.ToDateTime(DateTime.Now);
     if (datumIZV0 == nula)
    {
    datumIZVX = Convert.ToDateTime(DateTime.Now);
    }
     else
    {
    datumIZVX = Convert.ToDateTime(datumIZV0);
    }
    XmlDocument doc = new XmlDocument();
    doc.Load(izvorURLX);
    var imgSRC = Convert.ToString("");
    var reg1 = new Regex("src=(?:\"|\')?(?<imgSrc>[^>]*[^/].?:bmp|jpg|jpeg|gif|png))(?:\"|\')?");
    var match1 = reg1.Match(itemFD);
    if (match1.Success)
     {
     Uri UrlImage = new Uri(match1.Groups["imgSrc"].Value, UriKind.Absolute);
    imgSRC = UrlImage.ToString();
    }
    var feedXML = Convert.ToString(izvorURLX);
    int KatX = Convert.ToInt32(KatIzv);
    var statTitle = Convert.ToString(syndicationItem.Title.Text);
    var statLink = Convert.ToString(syndicationItem.Links[0].Uri);
    SqlConnection conn = new SqlConnection("Server=localhost\\SQLEXPRESS;Database=RSSFeedAgregator;Integrated Security=true");
    conn.Open();
    var FeedID = Convert.ToInt32(0);
    var LastinDB = Convert.ToDateTime("01.01.0001 00:00:00");
    string FeedInDB = Convert.ToString("a");
    using (SqlCommand cmdX2 = new SqlCommand("SELECT Feed_ID, Izvor, LastUpd, feed, Kategorija, iID, izvTitle, statCat FROM [dbo].[tbl_feeds]", conn))
    {
    SqlDataReader readerX = cmdX2.ExecuteReader();
    while (readerX.Read())
    {
    Console.OutputEncoding = Encoding.UTF8;
    var feedTxt = Convert.ToString(readerX["feed"]);
    FeedID = Convert.ToInt32(readerX["Feed_ID"]);
    LastinDB = Convert.ToDateTime(readerX["LastUpd"]);
    FeedInDB = Convert.ToString(readerX["feed"]);
    }
    readerX.Close();
    }
     bool inList = DB.Contains(clitemFD);
     var statIMG = Convert.ToString("");
     if (inList == false)
     {
    Console.WriteLine("false: Ne postoi");
    using (SqlCommand cmd1 = new SqlCommand("INSERT INTO tbl_feeds VALUES (" + "@Izvor, @LastUpd, @feed, @Kategorija, @iID, @izvTitle, @statCat, @statTitle, @statLink, @statImage)", conn))
    {
    cmd1.Parameters.AddWithValue("@Izvor", feedXML);
    cmd1.Parameters.AddWithValue("@LastUpd", datumIZVX);
    cmd1.Parameters.AddWithValue("@feed", clitemFD);
    cmd1.Parameters.AddWithValue("@Kategorija", KatX);
    cmd1.Parameters.AddWithValue("@iID", IzvID);
    cmd1.Parameters.AddWithValue("@izvTitle", SiteTitle);
    cmd1.Parameters.AddWithValue("@statCat", statCat1);
    cmd1.Parameters.AddWithValue("@statTitle", statTitle);
    cmd1.Parameters.AddWithValue("@statLink", statLink);
    cmd1.Parameters.AddWithValue("@statImage", imgSRC);
    int rows = cmd1.ExecuteNonQuery();
    Console.WriteLine("Uspesno dodadeno nov zapis !");
     }
    conn.Close();
    }
    else
    {
    Console.WriteLine("true: Postoi");
    }
    }  
    }

Sorry for my Long code, but i thing better to understanding, and i hope anyone can help me.

UPDATED

Also i found this code, this code read Enclosure URL, but just repeat to read and if source have 10 articles, this code reading 10 times all 10 img url, and in database save just last one.

XmlNodeList items = doc.SelectNodes("//item") ;

for (int i = 0; i < items.Count; i++)
{
var   encImg = (items[i].SelectSingleNode("enclosure").Attributes["url"].Value);
}

Anyone can modify this code to work property ?

Mister XP
  • 63
  • 1
  • 1
  • 9

1 Answers1

0

I'm honest with you, I can't understand very well your code, so I suggest you tabulate/order your code.

For that, you can use Ctrl+K+ Ctrl+D.

You can check more shortcuts here.


Once say this, you can easily find the enclosure url using this line of code:

string link = "";
foreach (SyndicationItem syndicationItem in rssFormatter.Feed.Items)
{
    Console.OutputEncoding = Encoding.UTF8;
    // You have to check if `syndicationItem.Links` has more than 1 element.
    if (syndicationItem.Links.Count > 0)
    {
        // this is the line that shows you the url of the "enclosure" tag:
        link = syndicationItem.Links[1].Uri.ToString();
    }

    // Prints the Image's src.
    Console.WriteLine("Image src: " + link);
}

The previous code prints me:

Image src: http://a1on.mk/wp-content/uploads/2017/07/turcija-ucenici.jpg
Image src: http://a1on.mk/wp-content/uploads/2017/07/vlada-18juli.jpg
Image src: http://a1on.mk/wp-content/uploads/2017/07/tomas-greminger.jpg
Image src: http://a1on.mk/wp-content/uploads/2014/08/toplo.jpg
Image src: http://a1on.mk/wp-content/uploads/2017/06/grncarov.gif
Image src: http://a1on.mk/wp-content/uploads/2015/04/uprava-finansiska-policija.gif
Image src: http://a1on.mk/wp-content/uploads/2017/05/pritvor-turska.jpg
Image src: http://a1on.mk/wp-content/uploads/2017/07/kosarkari-do20.jpg
Image src: http://a1on.mk/wp-content/uploads/2017/07/vardar-fk-nat.jpg
Image src: http://a1on.mk/wp-content/uploads/2017/07/burgas.jpg
Mauricio Arias Olave
  • 2,259
  • 4
  • 25
  • 70