Hi there,
I try to run DAOS in a real physical environment, but got problem. Below is the output when i try to start DAOS system with 3 storage nodes.
do anybody help me. if need any informatino, please let me know. thx.
by the way, another question: inside the daos_server.yml file, which files should I put into the client_cert_ dir(/etc/daos/clients), or just leave it empty?
[root@client ~]# clush -w snode[1-3] daos_server start -o /etc/daos/daos_server.yml
snode2: daos_server logging to file /tmp/daos_control.log
snode2: DEBUG 06:28:33.811928 start.go:105: Switching control log level to DEBUG
snode2: DEBUG 06:28:33.812236 netdetect.go:829: Calling ValidateProviderConfig with eno3, ofi+verbs;ofi_rxm
snode2: DEBUG 06:28:33.812276 netdetect.go:880: Input provider string: ofi+verbs;ofi_rxm
snode1: daos_server logging to file /tmp/daos_control.log
snode1: DEBUG 17:27:30.107559 start.go:105: Switching control log level to DEBUG
snode1: DEBUG 17:27:30.107776 netdetect.go:829: Calling ValidateProviderConfig with eno3, ofi+verbs;ofi_rxm
snode1: DEBUG 17:27:30.107811 netdetect.go:880: Input provider string: ofi+verbs;ofi_rxm
snode3: daos_server logging to file /tmp/daos_control.log
snode3: DEBUG 17:28:01.164972 start.go:105: Switching control log level to DEBUG
snode3: DEBUG 17:28:01.165212 netdetect.go:829: Calling ValidateProviderConfig with eno3, ofi+verbs;ofi_rxm
snode3: DEBUG 17:28:01.165251 netdetect.go:880: Input provider string: ofi+verbs;ofi_rxm
snode2: DEBUG 06:28:34.026543 netdetect.go:912: There are 0 hfi1 devices in the system
snode2: DEBUG 06:28:34.026642 netdetect.go:844: Device eno3 supports provider: ofi+verbs;ofi_rxm
snode2: DEBUG 06:28:34.027690 config.go:391: Active config saved to /etc/daos/.daos_server.active.yml (read-only)
snode2: DEBUG 06:28:34.028020 server.go:137: automatic NVMe prepare req: {ForwardableRequest:{Forwarded:false} HugePageCount:4096 PCIWhitelist: TargetUser:root ResetOnly:false}
snode1: DEBUG 17:27:30.326383 netdetect.go:912: There are 0 hfi1 devices in the system
snode1: DEBUG 17:27:30.326540 netdetect.go:844: Device eno3 supports provider: ofi+verbs;ofi_rxm
snode1: DEBUG 17:27:30.327572 config.go:391: Active config saved to /etc/daos/.daos_server.active.yml (read-only)
snode1: DEBUG 17:27:30.327889 server.go:137: automatic NVMe prepare req: {ForwardableRequest:{Forwarded:false} HugePageCount:4096 PCIWhitelist: TargetUser:root ResetOnly:false}
snode3: DEBUG 17:28:01.386394 netdetect.go:912: There are 0 hfi1 devices in the system
snode3: DEBUG 17:28:01.386496 netdetect.go:844: Device eno3 supports provider: ofi+verbs;ofi_rxm
snode3: DEBUG 17:28:01.387691 config.go:391: Active config saved to /etc/daos/.daos_server.active.yml (read-only)
snode3: DEBUG 17:28:01.387992 server.go:137: automatic NVMe prepare req: {ForwardableRequest:{Forwarded:false} HugePageCount:4096 PCIWhitelist: TargetUser:root ResetOnly:false}
snode2: DEBUG 06:28:42.730225 netdetect.go:591: Searching for a device alias for: eno3
snode2: DEBUG 06:28:42.961947 netdetect.go:334: There are 2 children of this parent node.
snode2: DEBUG 06:28:42.962026 netdetect.go:616: Device alias for eno3 is i40iw0
snode3: DEBUG 17:28:10.257330 netdetect.go:591: Searching for a device alias for: eno3
snode2: ERROR: /usr/bin/daos_admin EAL: No available hugepages reported in hugepages-1048576kB
snode3: DEBUG 17:28:10.492866 netdetect.go:334: There are 2 children of this parent node.
snode3: DEBUG 17:28:10.492945 netdetect.go:616: Device alias for eno3 is i40iw1
snode3: ERROR: /usr/bin/daos_admin EAL: No available hugepages reported in hugepages-1048576kB
snode1: DEBUG 17:27:40.062619 netdetect.go:591: Searching for a device alias for: eno3
snode1: DEBUG 17:27:40.294456 netdetect.go:334: There are 2 children of this parent node.
snode1: DEBUG 17:27:40.294538 netdetect.go:616: Device alias for eno3 is i40iw1
snode1: ERROR: /usr/bin/daos_admin EAL: No available hugepages reported in hugepages-1048576kB
snode2: DAOS Control Server (pid 10183) listening on 0.0.0.0:10001
snode2: DEBUG 06:28:45.672802 instance_exec.go:55: instance 0: checking if storage is formatted
snode2: Waiting for DAOS I/O Server instance 0 storage to be ready...
snode2: DEBUG 06:28:45.672850 instance_storage.go:88: /mnt/daos: checking formatting
snode3: DAOS Control Server (pid 11997) listening on 0.0.0.0:10001
snode3: DEBUG 17:28:12.955111 instance_exec.go:55: instance 0: checking if storage is formatted
snode3: Waiting for DAOS I/O Server instance 0 storage to be ready...
snode3: DEBUG 17:28:12.955181 instance_storage.go:88: /mnt/daos: checking formatting
snode1: DAOS Control Server (pid 14644) listening on 0.0.0.0:10001
snode1: DEBUG 17:27:42.844751 instance_exec.go:55: instance 0: checking if storage is formatted
snode1: Waiting for DAOS I/O Server instance 0 storage to be ready...
snode1: DEBUG 17:27:42.844827 instance_storage.go:88: /mnt/daos: checking formatting
snode2: DEBUG 06:28:47.811976 instance_storage.go:104: /mnt/daos (dcpm) needs format: false
snode2: DEBUG 06:28:47.812056 instance_storage.go:135: instance 0: no SCM format required; checking for superblock
snode2: DEBUG 06:28:47.812109 superblock.go:112: /mnt/daos: checking superblock
snode2: DEBUG 06:28:47.813721 instance_storage.go:141: instance 0: superblock not needed
snode2: SCM @ /mnt/daos: 532 GB Total/528 GB Avail
snode2: DEBUG 06:28:47.814325 instance_exec.go:93: instance 0: awaiting DAOS I/O Server init
snode2: DEBUG 06:28:47.814536 exec.go:115: daos_io_server:0 args: [-t 8 -x 0 -f 1 -g daos_server -d /tmp/daos_sockets -s /mnt/daos -n /mnt/daos/daos_nvme.conf -i 10184 -I 0]
snode2: DEBUG 06:28:47.814617 exec.go:116: daos_io_server:0 env: [OFI_INTERFACE=eno3 CRT_TIMEOUT=0 DAOS_MD_CAP=1024 CRT_CTX_SHARE_ADDR=0 CRT_PHY_ADDR_STR=ofi+verbs;ofi_rxm D_LOG_FILE=/tmp/server0.log OFI_PORT=31416 FI_SOCKETS_MAX_CONN_RETRY=1 FI_SOCKETS_CONN_TIMEOUT=2000 OFI_DOMAIN=i40iw0 D_LOG_MASK=ERR]
snode2: Starting I/O server instance 0: /usr/bin/daos_io_server
snode3: DEBUG 17:28:15.086695 instance_storage.go:104: /mnt/daos (dcpm) needs format: false
snode3: DEBUG 17:28:15.086764 instance_storage.go:135: instance 0: no SCM format required; checking for superblock
snode3: DEBUG 17:28:15.086822 superblock.go:112: /mnt/daos: checking superblock
snode3: DEBUG 17:28:15.088428 instance_storage.go:141: instance 0: superblock not needed
snode3: SCM @ /mnt/daos: 532 GB Total/528 GB Avail
snode3: DEBUG 17:28:15.089161 instance_exec.go:93: instance 0: awaiting DAOS I/O Server init
snode3: DEBUG 17:28:15.089521 exec.go:115: daos_io_server:0 args: [-t 8 -x 0 -f 1 -g daos_server -d /tmp/daos_sockets -s /mnt/daos -n /mnt/daos/daos_nvme.conf -i 11998 -I 0]
snode3: DEBUG 17:28:15.089610 exec.go:116: daos_io_server:0 env: [OFI_DOMAIN=i40iw1 DAOS_MD_CAP=1024 FI_SOCKETS_MAX_CONN_RETRY=1 D_LOG_MASK=ERR CRT_PHY_ADDR_STR=ofi+verbs;ofi_rxm OFI_INTERFACE=eno3 OFI_PORT=31416 CRT_CTX_SHARE_ADDR=0 CRT_TIMEOUT=0 FI_SOCKETS_CONN_TIMEOUT=2000 D_LOG_FILE=/tmp/server0.log]
snode3: Starting I/O server instance 0: /usr/bin/daos_io_server
snode2: daos_io_server:0 05/12-06:28:47.96 snode2 Using legacy core allocation algorithm
snode2: instance 0 exited: instance 0 exited prematurely: /usr/bin/daos_io_server (instance 0) exited: exit status 1
snode2: ERROR: removing socket file: removing instance 0 socket file: no dRPC client set (data plane not started?)
snode3: daos_io_server:0 05/11-17:28:15.24 snode3 Using legacy core allocation algorithm
snode3: instance 0 exited: instance 0 exited prematurely: /usr/bin/daos_io_server (instance 0) exited: exit status 1
snode3: ERROR: removing socket file: removing instance 0 socket file: no dRPC client set (data plane not started?)
snode1: DEBUG 17:27:44.741324 instance_storage.go:104: /mnt/daos (dcpm) needs format: false
snode1: DEBUG 17:27:44.741373 instance_storage.go:135: instance 0: no SCM format required; checking for superblock
snode1: DEBUG 17:27:44.741404 superblock.go:112: /mnt/daos: checking superblock
snode1: DEBUG 17:27:44.742695 instance_storage.go:141: instance 0: superblock not needed
snode1: SCM @ /mnt/daos: 532 GB Total/528 GB Avail
snode1: DEBUG 17:27:44.743511 instance.go:382: instance 0: bootstrapping system member: rank 0, addr 10.158.24.33:10001
snode1: DEBUG 17:27:44.743543 instance_exec.go:93: instance 0: awaiting DAOS I/O Server init
snode1: DEBUG 17:27:44.743998 exec.go:115: daos_io_server:0 args: [-t 8 -x 0 -f 1 -g daos_server -d /tmp/daos_sockets -s /mnt/daos -n /mnt/daos/daos_nvme.conf -i 14645 -I 0]
snode1: DEBUG 17:27:44.744066 exec.go:116: daos_io_server:0 env: [FI_SOCKETS_CONN_TIMEOUT=2000 OFI_DOMAIN=i40iw1 DAOS_MD_CAP=1024 D_LOG_MASK=ERR CRT_PHY_ADDR_STR=ofi+verbs;ofi_rxm OFI_PORT=31416 CRT_CTX_SHARE_ADDR=0 CRT_TIMEOUT=0 FI_SOCKETS_MAX_CONN_RETRY=1 D_LOG_FILE=/tmp/server0.log OFI_INTERFACE=eno3]
snode1: Starting I/O server instance 0: /usr/bin/daos_io_server
snode1: daos_io_server:0 05/11-17:27:44.89 snode1 Using legacy core allocation algorithm
snode1: instance 0 exited: instance 0 exited prematurely: /usr/bin/daos_io_server (instance 0) exited: exit status 1
snode1: ERROR: removing socket file: removing instance 0 socket file: no dRPC client set (data plane not started?)