I create a crawler that gets the news from the other website that i defined before for it ,so i use quartz
to run a task in background.the schedule is defined like this :
public class JobBackground : IJob
{
public void Execute(IJobExecutionContext context)
{
for (int j = 1; j <= 920; j++)
{
NewsRepository newsRepository = new NewsRepository();
GoyaAgent Goyaagent = new GoyaAgent();
Task<List<NewsContent>> lst = Goyaagent.parsing("http://www.gooyait.com/page/"+j);
List<NewsContent> enresult = lst.Result;
foreach (NewsContent newsContent in enresult)
{
News newnews = new News();
newnews.Subject = newsContent.Title;
newnews.NewsDate = DateTime.Now;
newnews.NewsBrief = newsContent.abs;
newnews.NewsText = newsContent.Content;
newnews.ShowOnSlide = "Yes";
newnews.GroupId = 1049;
newnews.NewsImageSmall = newsContent.Img;
newnews.NewsImageBig = newsContent.Img;
newnews.Reference = newsContent.Url;
newnews.UserId = "3";
newnews.Visible = "Yes";
newnews.ViewCounter = 0;
newsRepository.Add(newnews);
if (newsRepository.FindBy(i => i.Reference == newsContent.Url).Count() == 0)
newsRepository.Save();
}
}
}
}
The parsing
function :
public async Task<List<NewsContent>> parsing(string newsArchive)
{
List<NewsContent> lstResult = new List<NewsContent>();
try
{
HttpClient http = new HttpClient();
var response = await http.GetByteArrayAsync(newsArchive);
String source = Encoding.GetEncoding("utf-8").GetString(response, 0, response.Length - 1);
source = WebUtility.HtmlDecode(source);
HtmlDocument resultat = new HtmlDocument();
resultat.LoadHtml(source);
List<HtmlNode> toftitle = resultat.DocumentNode.Descendants().Where
(x =>
(x.Name == "div" && x.Attributes["class"] != null &&
x.Attributes["class"].Value.Contains("main-col"))).ToList();
var li = toftitle[0].Descendants().Where
(x =>
(x.Name == "div" && x.Attributes["class"] != null &&
x.Attributes["class"].Value.Contains("base-box blog-post"))).ToList();
foreach (var item in li)
{
NewsContent newsContent = new NewsContent();
newsContent.Url = item.Descendants("a").ToList()[0].GetAttributeValue("href", null);
newsContent.Img = item.Descendants("img").ToList()[0].GetAttributeValue("src", null);
newsContent.Title = item.Descendants("h2").ToList()[0].InnerText;
newsContent.abs = item.Descendants("p").ToList()[0].InnerText;
//finding main news content
var response1 = await http.GetByteArrayAsync(newsContent.Url);
String source1 = Encoding.GetEncoding("utf-8").GetString(response1, 0, response1.Length - 1);
source1 = WebUtility.HtmlDecode(source1);
HtmlDocument resultat1 = new HtmlDocument();
resultat1.LoadHtml(source1);
HtmlNode doc = resultat1.DocumentNode.SelectSingleNode("//div[@class='entry-content']");
HtmlNode node = doc.SelectSingleNode("//div[@class='yasr-visitor-votes']");
if (node != null)
node.ParentNode.RemoveChild(node);
HtmlNode node1 = doc.SelectSingleNode("//div[@class='post-tags']");
if (node1 != null)
node1.ParentNode.RemoveChild(node1);
HtmlNode node2 =
doc.SelectSingleNode("//div[@class='mom-social-share ss-horizontal border-box']");
if (node2 != null)
node2.ParentNode.RemoveChild(node2);
HtmlNode node3 = doc.SelectSingleNode("//script|//style");
if (node3 != null)
node3.ParentNode.RemoveChild(node3);
newsContent.Content = doc.InnerHtml;
lstResult.Add(newsContent);
}
}
catch (Exception e)
{
}
return lstResult;
}
The schedule starts with this code :
public class JobScheduler
{
public static void Start()
{
IScheduler scheduler = StdSchedulerFactory.GetDefaultScheduler();
scheduler.Start();
IJobDetail job = JobBuilder.Create<JobBackground>().Build();
ITrigger trigger = TriggerBuilder.Create().StartNow()
.WithDailyTimeIntervalSchedule
(s =>
s.WithIntervalInHours(24)
.OnEveryDay()
.StartingDailyAt(TimeOfDay.HourAndMinuteOfDay(0, 0))
)
.Build();
scheduler.ScheduleJob(job, trigger);
}
}
There are several problems :
When i run this code the code never executed because ofITrigger trigger = TriggerBuilder.Create().StartNow()
so i have to wait for 24 hours to see the result why ?
And the second problem ,when i upload the code to the web server it doesn't work .why ?