1

While read the zip package files name (For Ex: TestDocumentááá), which has diacritic character,
It returns symbol character like below.

Actual Result: TestDocument���.docx
Expected Result: TestDocumentááá.docx

Actual Result


    private async void OnInputFileChange(InputFileChangeEventArgs e)
    {
        foreach (IBrowserFile file in e.GetMultipleFiles())
        {
            await using Stream stream = file.OpenReadStream((int)file.Size);
            MemoryStream memoryStream = new MemoryStream((int)file.Size);
            await stream.CopyToAsync(memoryStream);

            memoryStream.Position = 0;
            
            CancellationTokenSource cancellation = new CancellationTokenSource();
            byte[] buffer = new byte[file.Size];
            string UniqueFileName = Path.GetRandomFileName() + ".bin";
            string tmpfile = Path.GetTempFileName();
            tmpfile = e.File.Name;

            int bytesRead = 0;
            while ((bytesRead = await memoryStream.ReadAsync(buffer, cancellation.Token)) != 0)
            {
                using MemoryStream tmpMemoryStream = new MemoryStream(buffer, 0, bytesRead);

                using (var fs = new FileStream(tmpfile, FileMode.Append))
                {
                    tmpMemoryStream.WriteTo(fs);
                }
            };

            using ZipArchive zipArchive = ZipFile.OpenRead(tmpfile);
            foreach (ZipArchiveEntry entry in zipArchive.Entries)
            {
                // Get file name as Symbolioc character
                // entry.FullName ->> TestDocument���.docx

                //byte[] bytes = Encoding.UTF8.GetBytes(entry.FullName);
                //string text2 = Encoding.UTF8.GetString(bytes);
            }
        }
    }

1 Answers1

2

Looks like an encoding issue. You may want to use this ZipArchive constructor instead of ZipFile.OpenRead where you can specify an encoding explicitely, e.g.:

using var zipArchive = new ZipArchive(stream, ZipArchiveMode.Read, false, 
    Encoding.GetEncoding("IBM437");

Older ZIP files are often encoded in IBM437, so try specifying Encoding.GetEncoding("IBM437") as encoding.

Note that if you are not on .NET Framework 4.x, this requires adding Encoding.RegisterProvider(CodePagesEncodingProvider.Instance) at application startup (add the nuget package System.Text.Encoding.CodePages).

Klaus Gütter
  • 11,151
  • 6
  • 31
  • 36