NVIDIA GPU loadable plugin
Source
// - `pendingEnabled` (bool): `true` if ECC will be enabled on the next reboot, `false` if it will be disabled.
/*
** Zabbix
** Copyright (C) 2001-2024 Zabbix SIA
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
**/
package nvml
// Runner defines the interface for an NVML runner.
//
//nolint:interfacebloat
type Runner interface {
// InitNVML initializes the NVML library using the older NVML interface.
Init() error
// InitNVMLv2 initializes the NVML library using the NVML v2 interface.
InitV2() error
// GetDeviceCount retrieves the number of NVIDIA devices using the standard NVML interface.
GetDeviceCount() (uint, error)
// GetDeviceCountV2 retrieves the number of NVIDIA devices using the NVML v2 interface.
GetDeviceCountV2() (uint, error)
// GetDeviceByIndexV2 retrieves a handle to an NVIDIA device by its index using the NVML v2 interface.
GetDeviceByIndexV2(index uint) (Device, error)
// GetDeviceByUUID retrieves a handle to an NVIDIA device by its UUID.
GetDeviceByUUID(uuid string) (Device, error)
// GetNVMLVersion retrieves the version of the NVML library currently in use.
GetNVMLVersion() (string, error)
// GetDriverVersion retrieves the version of the NVIDIA driver currently in use.
GetDriverVersion() (string, error)
// Shutdown NVML and clean up resources
ShutdownNVML() error
// Close releases the resources associated with the loaded library in the Runner.
Close() error
}
// Device defines the methods for interacting with a GPU device.
//
//nolint:interfacebloat
type Device interface {
// GetTemperature retrieves the temperature of the NVIDIA device using the default sensor.
GetTemperature() (int, error)
// GetMemoryInfo retrieves memory information for the NVIDIA device.
GetMemoryInfo() (*MemoryInfo, error)