2

I found a way to clean an XML file of invalid characters, which works fine, but it is a bit slow. The cleaning takes ~10-20s which is not appreciated by users.

it seems like a huge waste of time to use streamread/write to create a clean file and then use xmlreader, is it possible to clean the line during XMLread or atleast use streamReader as an input to XMLreader to save the time saving the file?

I'm trying to get the team who creates the databases to create clean files before uploading them, but it is a slow process...

XmlReaderSettings settings = new XmlReaderSettings { CheckCharacters = false};
cleanDatabase = createCleanSDDB(database);
using (XmlReader sddbReader = XmlReader.Create(cleanDatabase, settings))
{ //Parse XML... }

    private string createCleanSDDB(String sddbPath)
    {
        string fileName = getTmpFileName(); // get a temporary file name from the OS
        string line;
        string cleanLine;

        using (StreamReader streamReader = new StreamReader(sddbPath, Encoding.UTF8))
        using (StreamWriter streamWriter = new StreamWriter(fileName))
        {
            while ((line = streamReader.ReadLine()) != null)
            {
                cleanLine = getCleanLine(line);
                streamWriter.WriteLine(cleanLine);
            }
        }

        return fileName;
    }

    private string getCleanLine(string dirtyLine)
    {
        const string regexPattern = @"[^\x09\x0A\x0D\x20-\xD7FF\xE000-\xFFFD\x10000-x10FFFF]";
        string cleanLine = Regex.Replace(dirtyLine, regexPattern, "");

        return cleanLine;
    }
Fousk
  • 31
  • 4

0 Answers0