2018-09-01 00:08:36 +00:00
|
|
|
package nvml
|
|
|
|
|
|
|
|
import (
|
|
|
|
"github.com/NVIDIA/gpu-monitoring-tools/bindings/go/nvml"
|
|
|
|
)
|
|
|
|
|
|
|
|
// Initialize nvml library by locating nvml shared object file and calling ldopen
|
|
|
|
func (n *nvmlDriver) Initialize() error {
|
|
|
|
return nvml.Init()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Shutdown stops any further interaction with nvml
|
|
|
|
func (n *nvmlDriver) Shutdown() error {
|
|
|
|
return nvml.Shutdown()
|
|
|
|
}
|
|
|
|
|
|
|
|
// SystemDriverVersion returns installed driver version
|
|
|
|
func (n *nvmlDriver) SystemDriverVersion() (string, error) {
|
|
|
|
return nvml.GetDriverVersion()
|
|
|
|
}
|
|
|
|
|
|
|
|
// DeviceCount reports number of available GPU devices
|
|
|
|
func (n *nvmlDriver) DeviceCount() (uint, error) {
|
|
|
|
return nvml.GetDeviceCount()
|
|
|
|
}
|
|
|
|
|
|
|
|
// DeviceInfoByIndex returns DeviceInfo for index GPU in system device list
|
|
|
|
func (n *nvmlDriver) DeviceInfoByIndex(index uint) (*DeviceInfo, error) {
|
|
|
|
device, err := nvml.NewDevice(index)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
deviceMode, err := device.GetDeviceMode()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return &DeviceInfo{
|
|
|
|
UUID: device.UUID,
|
|
|
|
Name: device.Model,
|
|
|
|
MemoryMiB: device.Memory,
|
|
|
|
PowerW: device.Power,
|
|
|
|
BAR1MiB: device.PCI.BAR1,
|
|
|
|
PCIBandwidthMBPerS: device.PCI.Bandwidth,
|
|
|
|
PCIBusID: device.PCI.BusID,
|
|
|
|
CoresClockMHz: device.Clocks.Cores,
|
|
|
|
MemoryClockMHz: device.Clocks.Memory,
|
|
|
|
DisplayState: deviceMode.DisplayInfo.Mode.String(),
|
|
|
|
PersistenceMode: deviceMode.Persistence.String(),
|
|
|
|
}, nil
|
|
|
|
}
|
2018-09-07 17:13:50 +00:00
|
|
|
|
|
|
|
// DeviceInfoByIndex returns DeviceInfo and DeviceStatus for index GPU in system device list
|
|
|
|
func (n *nvmlDriver) DeviceInfoAndStatusByIndex(index uint) (*DeviceInfo, *DeviceStatus, error) {
|
|
|
|
device, err := nvml.NewDevice(index)
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
status, err := device.Status()
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
return &DeviceInfo{
|
|
|
|
UUID: device.UUID,
|
|
|
|
Name: device.Model,
|
|
|
|
MemoryMiB: device.Memory,
|
|
|
|
PowerW: device.Power,
|
|
|
|
BAR1MiB: device.PCI.BAR1,
|
|
|
|
PCIBandwidthMBPerS: device.PCI.Bandwidth,
|
|
|
|
PCIBusID: device.PCI.BusID,
|
|
|
|
CoresClockMHz: device.Clocks.Cores,
|
|
|
|
MemoryClockMHz: device.Clocks.Memory,
|
|
|
|
}, &DeviceStatus{
|
|
|
|
TemperatureC: status.Temperature,
|
|
|
|
GPUUtilization: status.Utilization.GPU,
|
|
|
|
MemoryUtilization: status.Utilization.Memory,
|
|
|
|
EncoderUtilization: status.Utilization.Encoder,
|
|
|
|
DecoderUtilization: status.Utilization.Decoder,
|
|
|
|
UsedMemoryMiB: status.Memory.Global.Used,
|
|
|
|
ECCErrorsL1Cache: status.Memory.ECCErrors.L1Cache,
|
|
|
|
ECCErrorsL2Cache: status.Memory.ECCErrors.L2Cache,
|
|
|
|
ECCErrorsDevice: status.Memory.ECCErrors.Device,
|
|
|
|
PowerUsageW: status.Power,
|
|
|
|
BAR1UsedMiB: status.PCI.BAR1Used,
|
|
|
|
}, nil
|
|
|
|
}
|