3

I have 8 3TB WD Red SATA drives sdb through sdi i use in my pool.

My boot and OS drive is a an 850 EVO SSD on sda.

The 8 WD drives are on a Supermicro AOC-SAS2LP-MV8 Add-on Card, 8-Channel SAS/SATA Adapter with 600MB/s per Channel in a PCIE 3.0 x16 running at x8 on a Supermicro ATX DDR4 LGA 1151 C7Z170-OCE-O Motherboard.

My server and ZFS setup is as follows:

[root@nas ~]# cat /etc/redhat-release
CentOS Linux release 7.2.1511 (Core)

[root@nas ~]# uname -a
Linux nas.whittenberg.domain 3.10.0-327.28.2.el7.x86_64 #1 SMP Wed Aug 3 11:11:39 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux

[root@nas ~]# cat /var/log/dmesg | grep ZFS
[    0.793572] ZFS: Loaded module v0.6.5.7-1, ZFS pool version 5000, ZFS filesystem version 5
[root@nas ~]# cat /var/log/dmesg | grep SPL
[    0.777144] SPL: Loaded module v0.6.5.7-1

[root@nas ~]# cat /etc/modprobe.d/zfs.conf
# disable prefetch = 1
options zfs zfs_prefetch_disable=0
# set arc max to 48GB. I have 64GB in my server
options zfs zfs_arc_max=51539607552
# set size to 128k same as file system block size
options zfs zfs_vdev_cache_size=1310720
options zfs zfs_vdev_cache_max=1310720
options zfs zfs_read_chunk_size=1310720
options zfs zfs_vdev_cache_bshift=12
options zfs zfs_read_chunk_size=1310720
# Set thes to 10 so we get better IO at cost of banwidth
options zfs zfs_vdev_async_read_max_active=10
options zfs zfs_vdev_async_read_min_active=10
options zfs zfs_vdev_async_write_max_active=10
options zfs zfs_vdev_async_write_min_active=10
options zfs zfs_vdev_sync_read_max_active=10
options zfs zfs_vdev_sync_read_min_active=10
options zfs zfs_vdev_sync_write_max_active=10
options zfs zfs_vdev_sync_write_min_active=10

[root@nas ~]# zpool status
  pool: myraid
 state: ONLINE
  scan: scrub repaired 0 in 0h16m with 0 errors on Sun Aug  7 01:40:49 2016
config:

        NAME        STATE     READ WRITE CKSUM
        myraid      ONLINE       0     0     0
          mirror-0  ONLINE       0     0     0
            sdb     ONLINE       0     0     0
            sdc     ONLINE       0     0     0
          mirror-1  ONLINE       0     0     0
            sdd     ONLINE       0     0     0
            sde     ONLINE       0     0     0
          mirror-2  ONLINE       0     0     0
            sdf     ONLINE       0     0     0
            sdg     ONLINE       0     0     0
          mirror-3  ONLINE       0     0     0
            sdh     ONLINE       0     0     0
            sdi     ONLINE       0     0     0

errors: No known data errors

[root@nas ~]# zpool iostat -v
               capacity     operations    bandwidth
pool        alloc   free   read  write   read  write
----------  -----  -----  -----  -----  -----  -----
myraid       382G  10.5T    119     35  14.0M   606K
  mirror    95.5G  2.63T     29      8  3.49M   149K
    sdb         -      -     28      8  3.50M   153K
    sdc         -      -     28      8  3.50M   153K
  mirror    95.5G  2.63T     29      8  3.49M   151K
    sdd         -      -     28      8  3.50M   155K
    sde         -      -     28      8  3.50M   155K
  mirror    95.5G  2.63T     29      8  3.49M   152K
    sdf         -      -     28      8  3.50M   156K
    sdg         -      -     28      8  3.50M   156K
  mirror    95.5G  2.63T     29      9  3.49M   155K
    sdh         -      -     28      9  3.50M   159K
    sdi         -      -     28      9  3.50M   159K
----------  -----  -----  -----  -----  -----  -----

[root@nas ~]# zfs get all
NAME    PROPERTY              VALUE                  SOURCE
myraid  type                  filesystem             -
myraid  creation              Sat Aug  6 21:01 2016  -
myraid  used                  382G                   -
myraid  available             10.2T                  -
myraid  referenced            382G                   -
myraid  compressratio         1.05x                  -
myraid  mounted               yes                    -
myraid  quota                 none                   default
myraid  reservation           none                   default
myraid  recordsize            128K                   local
myraid  mountpoint            /myraid                default
myraid  sharenfs              off                    default
myraid  checksum              fletcher4              local
myraid  compression           lz4                    local
myraid  atime                 off                    local
myraid  devices               on                     default
myraid  exec                  on                     default
myraid  setuid                on                     default
myraid  readonly              off                    default
myraid  zoned                 off                    default
myraid  snapdir               hidden                 default
myraid  aclinherit            restricted             default
myraid  canmount              on                     default
myraid  xattr                 on                     default
myraid  copies                1                      default
myraid  version               5                      -
myraid  utf8only              off                    -
myraid  normalization         none                   -
myraid  casesensitivity       sensitive              -
myraid  vscan                 off                    default
myraid  nbmand                off                    default
myraid  sharesmb              off                    default
myraid  refquota              none                   default
myraid  refreservation        none                   default
myraid  primarycache          all                    local
myraid  secondarycache        all                    default
myraid  usedbysnapshots       0                      -
myraid  usedbydataset         382G                   -
myraid  usedbychildren        1.98M                  -
myraid  usedbyrefreservation  0                      -
myraid  logbias               latency                local
myraid  dedup                 off                    local
myraid  mlslabel              none                   default
myraid  sync                  disabled               local
myraid  refcompressratio      1.05x                  -
myraid  written               382G                   -
myraid  logicalused           403G                   -
myraid  logicalreferenced     403G                   -
myraid  filesystem_limit      none                   default
myraid  snapshot_limit        none                   default
myraid  filesystem_count      none                   default
myraid  snapshot_count        none                   default
myraid  snapdev               hidden                 default
myraid  acltype               off                    default
myraid  context               none                   default
myraid  fscontext             none                   default
myraid  defcontext            none                   default
myraid  rootcontext           none                   default
myraid  relatime              off                    default
myraid  redundant_metadata    all                    default
myraid  overlay               off                    default

[root@nas ~]# zpool get all
NAME    PROPERTY                    VALUE                       SOURCE
myraid  size                        10.9T                       -
myraid  capacity                    3%                          -
myraid  altroot                     -                           default
myraid  health                      ONLINE                      -
myraid  guid                        1068639342092444414         default
myraid  version                     -                           default
myraid  bootfs                      -                           default
myraid  delegation                  on                          default
myraid  autoreplace                 off                         default
myraid  cachefile                   -                           default
myraid  failmode                    wait                        default
myraid  listsnapshots               off                         default
myraid  autoexpand                  off                         default
myraid  dedupditto                  0                           default
myraid  dedupratio                  1.00x                       -
myraid  free                        10.5T                       -
myraid  allocated                   382G                        -
myraid  readonly                    off                         -
myraid  ashift                      0                           default
myraid  comment                     -                           default
myraid  expandsize                  -                           -
myraid  freeing                     0                           default
myraid  fragmentation               1%                          -
myraid  leaked                      0                           default
myraid  feature@async_destroy       enabled                     local
myraid  feature@empty_bpobj         enabled                     local
myraid  feature@lz4_compress        active                      local
myraid  feature@spacemap_histogram  active                      local
myraid  feature@enabled_txg         active                      local
myraid  feature@hole_birth          active                      local
myraid  feature@extensible_dataset  enabled                     local
myraid  feature@embedded_data       active                      local
myraid  feature@bookmarks           enabled                     local
myraid  feature@filesystem_limits   enabled                     local
myraid  feature@large_blocks        enabled                     local

[root@nas ~]# zdb | grep ashift
            ashift: 12
            ashift: 12
            ashift: 12
            ashift: 12

[root@nas ~]# lsblk  -t -e 11,1
NAME                ALIGNMENT MIN-IO OPT-IO PHY-SEC LOG-SEC ROTA SCHED RQ-SIZE  RA WSAME
sda                         0    512      0     512     512    0 cfq       128 128    0B
├─sda1                      0    512      0     512     512    0 cfq       128 128    0B
├─sda2                      0    512      0     512     512    0 cfq       128 128    0B
└─sda3                      0    512      0     512     512    0 cfq       128 128    0B
  ├─centos_nas-swap         0    512      0     512     512    0           128 128    0B
  ├─centos_nas-root         0    512      0     512     512    0           128 128    0B
  └─centos_nas-home         0    512      0     512     512    0           128 128    0B
sdb                         0   4096      0    4096     512    1 noop      128 128    0B
├─sdb1                      0   4096      0    4096     512    1 noop      128 128    0B
└─sdb9                      0   4096      0    4096     512    1 noop      128 128    0B
sdc                         0   4096      0    4096     512    1 noop      128 128    0B
├─sdc1                      0   4096      0    4096     512    1 noop      128 128    0B
└─sdc9                      0   4096      0    4096     512    1 noop      128 128    0B
sdd                         0   4096      0    4096     512    1 noop      128 128    0B
├─sdd1                      0   4096      0    4096     512    1 noop      128 128    0B
└─sdd9                      0   4096      0    4096     512    1 noop      128 128    0B
sde                         0   4096      0    4096     512    1 noop      128 128    0B
├─sde1                      0   4096      0    4096     512    1 noop      128 128    0B
└─sde9                      0   4096      0    4096     512    1 noop      128 128    0B
sdf                         0   4096      0    4096     512    1 noop      128 128    0B
├─sdf1                      0   4096      0    4096     512    1 noop      128 128    0B
└─sdf9                      0   4096      0    4096     512    1 noop      128 128    0B
sdg                         0   4096      0    4096     512    1 noop      128 128    0B
├─sdg1                      0   4096      0    4096     512    1 noop      128 128    0B
└─sdg9                      0   4096      0    4096     512    1 noop      128 128    0B
sdh                         0   4096      0    4096     512    1 noop      128 128    0B
├─sdh1                      0   4096      0    4096     512    1 noop      128 128    0B
└─sdh9                      0   4096      0    4096     512    1 noop      128 128    0B
sdi                         0   4096      0    4096     512    1 noop      128 128    0B
├─sdi1                      0   4096      0    4096     512    1 noop      128 128    0B
└─sdi9                      0   4096      0    4096     512    1 noop      128 128    0B

My problem is when i read a file for the first time, over my 10Gb connection (DAC PC to Server), or use rsync from pool to the SSD in the server, I get just over 100MB. If I read the same file a second time i get 1.2GB across the 10Gb DAC connection and 380MB from pool to SSD.

I reboot the server and run a test reading from pool to SSD

[root@nas ~]# rsync -h --progress /myraid/testmovie.avi /home/samba/testmovie.avi
testmovie.avi
       1.08G 100%   79.59MB/s    0:00:12 (xfer#1, to-check=0/1)

sent 1.08G bytes  received 31 bytes  80.21M bytes/sec
total size is 1.08G  speedup is 1.00

Then i do the same after is has been read once

[root@nas ~]# rsync -h --progress /myraid/testmovie.avi /home/samba/testmovie.avi
testmovie.avi
       1.08G 100%  394.54MB/s    0:00:02 (xfer#1, to-check=0/1)

sent 1.08G bytes  received 31 bytes  433.13M bytes/sec
total size is 1.08G  speedup is 1.00

Any pointers? Should i not get the read speed of four drives on the first read?

Thanks!

enter code here
CB40
  • 31
  • 4
  • I just moved over the btrfs and all my problems are gone :) precache read and write speeds over 10Gb smb connection are 500MB and cached reads and writes are over 1GB. Only tuning i did in fstab was "defaults,compress=lzo,autodefrag,x-systemd.device-timeout=0". I created the raid10 with "mkfs.btrfs -f -m raid10 -d raid10 -L myraid /dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg /dev/sdh /dev/sdi". – CB40 Aug 08 '16 at 02:13
  • 1
    I'm not sure I'd recommend btrfs these days, but I'm glad you've solved the issue to your satisfaction. If you were using ZFS, there are a few settings that could have helped you. – ewwhite Aug 08 '16 at 03:31
  • 1
    @CB40 One isolated benchmark caused you to switch from ZFS to BTRFS? What are you going to switch to when BTRFS produces something not quite right? – Andrew Henle Aug 08 '16 at 15:57
  • It was not one isolated benchmark. I have fighting this for months and could never get my non cache read speeds above 112MB using rsync, samba over 10Gb Ethernet, timed using cp, etc.. I tried zfs1 zfs2 raid0, raid 10 with 2 through 4 vdevs. multiple versions of zfs. Lots a tunning. Still would not go over 112MB. I made the switch to brtfs and all those test give me 500MB+ plus speed on non cached files. Did somne reading before making the plunge. – CB40 Aug 08 '16 at 20:16
  • http://drdabbles.us/journal/2014/2/15/my-case-for-btrfs-over-zfs.html https://events.linuxfoundation.org/sites/events/files/slides/Btrfs_1.pdf https://www.diva-portal.org/smash/get/diva2:822493/FULLTEXT01.pdf – CB40 Aug 08 '16 at 20:17
  • That first link in particular looks more like a rebuttal to some other blog post about ZFS, than an actual comparison of Btrfs to ZFS. The second paragraph of the blog post you linked also pretty much confirms what ewwhite alluded to: "BTRFS /.../ is a filesystem under active development whereas ZFS is stable and mature. That should be anybody's first bullet point, BTW. ZFS has been deployed in production environments for a long while with great success, while BTRFS is only now gaining real traction." In a server deployment, "file system under active development" is likely not what you want! – user Aug 17 '16 at 14:24

0 Answers0