I tried to read .docx and .txt in C# The content from ABC.docx is :
Test1
Test2
My code actually read the ABC.docx but one problem is when the data stored in the sql server the output is like this:
Below is my code:
void WalkDirectoryTree(System.IO.DirectoryInfo root)
{
//System.IO.FileInfo[] files = null;
System.IO.DirectoryInfo[] subDirs = null;
//need to add-in more extension file such as .doc, .ppt, .xlsx
//files = root.GetFiles("*.txt");
var files = root.GetFiles().Where(a => a.Extension.Contains(".docx") || a.Extension.Contains(".txt"));
// files = new string[] { "*.txt", "*.docx" }
//.SelectMany(i => root.GetFiles(i, SearchOption.AllDirectories))
//.ToArray();
//if file is not null, read filename & file extension
if (files != null)
{
foreach (System.IO.FileInfo fi in files)
{
StringBuilder text = new StringBuilder();
Microsoft.Office.Interop.Word.Application word = new Microsoft.Office.Interop.Word.Application();
object miss = System.Reflection.Missing.Value;
//object path = @"I:\def.docx";
object path = fi.FullName;
object readOnly = true;
Microsoft.Office.Interop.Word.Document docs = word.Documents.Open(ref path, ref miss, ref readOnly, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss);
for (int i = 0; i < docs.Paragraphs.Count; i++)
{
text.Append(" \r\n " + docs.Paragraphs[i + 1].Range.Text.ToString());
}
//Get the full patch of the file extension
string[] lines = System.IO.File.ReadAllLines(fi.FullName);
//TextReader reader = new FilterReader(fi.FullName);
//StreamReader m = new StreamReader(fi.FullName);
foreach (string line in lines)
{
String[] substrings = fi.FullName.Split('\\');
string strFileName = string.Empty;
string strFileExtension = string.Empty;
if (substrings.Length > 0)
{
strFileName = substrings[ substrings.Length -1 ];
if( !string.IsNullOrEmpty(strFileName) )
{
string[] extensionSplit = strFileName.Split('.');
if (extensionSplit.Length > 0)
{
strFileExtension = extensionSplit[extensionSplit.Length - 1];
}
}
}
else
{
strFileName = fi.FullName;
}
InsertData(strFileName, line.Replace("'",""), fi.FullName,strFileExtension);
}
}
//After searched from root, continue search from subDirectories
subDirs = root.GetDirectories();
#region Exclude all the hidden files from drives
foreach (System.IO.DirectoryInfo dirInfo in subDirs)
{
if ((dirInfo.Attributes & FileAttributes.Hidden) == 0)
{
WalkDirectoryTree(dirInfo);
}
}
#endregion
}
}
Please advice how to store inside the sql server. Thanks.