I've got some trouble parsing some XML-data in C#.
Method summary:
The method takes a keyword and then search for that keyword at www.clinicaltrials.com by using the website URI. For instance:
http://www.clinicaltrials.gov/ct2/results?term=ALL&Search=Search&displayxml=true.
This URI will bring up all clinical studies stored at Clinical Trials as XML. Because of the large amount clinical data they have only 20 studies at each page. So to get to the next page you have to add &pg=2, to go to page two. My codes parses trough all the pages and converts each page to a C# objects.
Problem:
The problem is, when it gets to page 13 it crashes with the following error:
InvalidOperationException was unhandled: There is an error in XML document (155, 23)
When I copy the XML for page 13, 12 or any other page close to page 13 into an XML validator it says its fine. And when I search the xml myself i can't find any errors. I was thinking maybe memory is full, but after 240 objects? If i search for a keyword that retrives less then 13 pages of results it works.
The code I've written to retrive and parse the XML you can read here:
public List<search_resultsClinical_study> SearchStudyByKeyword(string keyword)
{
int currentPage = 1;
double numberOfStudiesOnAPage = 20;
double totalPages = 1; //if not it will crash anyways
List<search_results> searchResult = new List<search_results>();
try
{
while (totalPages >= currentPage)
{
//crashes if search is larger then 13 pages... have to figure out why....
string newUri = URI + "ct2/results?term=" + keyword + "&Search=Search&displayxml=true&pg=" + currentPage ;
System.Xml.Serialization.XmlSerializer reader = new System.Xml.Serialization.XmlSerializer(typeof(search_results));
XmlReader xmlReader = XmlReader.Create(newUri);
search_results studies = new search_results();
studies = (search_results)reader.Deserialize(xmlReader);
searchResult.Add(studies);
totalPages = Math.Ceiling((double)studies.count / numberOfStudiesOnAPage);
currentPage += 1;
}
//return searchResult;
//Append all studies to one list, easier to handle for user
List<search_resultsClinical_study> result = new List<search_resultsClinical_study>();
foreach (search_results sr in searchResult)
{
foreach (search_resultsClinical_study cs in sr.clinical_study)
{
result.Add(cs);
}
}
return result;
}
catch (WebException)
{
Debug.Write("404 - Might be a invalid search term ");
return null;
}
}
The error appear at the following line:
studies = (search_results)reader.Deserialize(xmlReader);
search_result class:
/// <remarks/>
[System.Xml.Serialization.XmlTypeAttribute(AnonymousType = true)]
[System.Xml.Serialization.XmlRootAttribute(Namespace = "", IsNullable = false)]
public partial class search_results
{
private string queryField;
private search_resultsClinical_study[] clinical_studyField;
private uint countField;
/// <remarks/>
public string query
{
get
{
return this.queryField;
}
set
{
this.queryField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlElementAttribute("clinical_study")]
public search_resultsClinical_study[] clinical_study
{
get
{
return this.clinical_studyField;
}
set
{
this.clinical_studyField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlAttributeAttribute()]
public uint count
{
get
{
return this.countField;
}
set
{
this.countField = value;
}
}
}
/// <remarks/>
[System.Xml.Serialization.XmlTypeAttribute(AnonymousType = true)]
public partial class search_resultsClinical_study
{
private byte orderField;
private decimal scoreField;
private string nct_idField;
private string urlField;
private string titleField;
private search_resultsClinical_studyStatus statusField;
private string condition_summaryField;
private string last_changedField;
/// <remarks/>
public byte order
{
get
{
return this.orderField;
}
set
{
this.orderField = value;
}
}
/// <remarks/>
public decimal score
{
get
{
return this.scoreField;
}
set
{
this.scoreField = value;
}
}
/// <remarks/>
public string nct_id
{
get
{
return this.nct_idField;
}
set
{
this.nct_idField = value;
}
}
/// <remarks/>
public string url
{
get
{
return this.urlField;
}
set
{
this.urlField = value;
}
}
/// <remarks/>
public string title
{
get
{
return this.titleField;
}
set
{
this.titleField = value;
}
}
/// <remarks/>
public search_resultsClinical_studyStatus status
{
get
{
return this.statusField;
}
set
{
this.statusField = value;
}
}
/// <remarks/>
public string condition_summary
{
get
{
return this.condition_summaryField;
}
set
{
this.condition_summaryField = value;
}
}
/// <remarks/>
public string last_changed
{
get
{
return this.last_changedField;
}
set
{
this.last_changedField = value;
}
}
}
/// <remarks/>
[System.Xml.Serialization.XmlTypeAttribute(AnonymousType = true)]
public partial class search_resultsClinical_studyStatus
{
private string openField;
private string valueField;
/// <remarks/>
[System.Xml.Serialization.XmlAttributeAttribute()]
public string open
{
get
{
return this.openField;
}
set
{
this.openField = value;
}
}
/// <remarks/>
[System.Xml.Serialization.XmlTextAttribute()]
public string Value
{
get
{
return this.valueField;
}
set
{
this.valueField = value;
}
}
}
XML that fails:
http://www.clinicaltrials.gov/ct2/results?term=ALL&Search=Search&displayxml=true&pg=13
Does anyone got a clue why this error appears? I've also added a XmlSchema and tried to generat the C# class based on the XmlSchema instead!
Thanks for help!!!