当我们的环境下拥有多块GPU时,有时候需要指定某一块GPU来运行
这样默认是使用第一块GPU:
torch::jit::script::Module module = torch::jit::load(filename);
module->to(at::kCUDA);
以选择gpuid = 1 的GPU为例:
int gpu_id = 1;
torch::jit::script::Module module = torch::jit::load(filename,torch::Device(torch::DeviceType::CUDA,gpu_id));
module->to(at::kCUDA);
/// Loads a serialized `Module` from the given `istream`.
///
/// The istream must contain a serialized `Module`, exported via
/// `torch::jit::ExportModule` in C++.
TORCH_API Module load(
std::istream& in,
c10::optional<c10::Device> device = c10::nullopt,
ExtraFilesMap& extra_files = default_extra_files);
/// Loads a serialized `Module` from the given `filename`.
///
/// The file stored at the location given in `filename` must contain a
/// serialized `Module`, exported either via `ScriptModule.save()` in
/// Python or `torch::jit::ExportModule` in C++.
TORCH_API Module load(
const std::string& filename,
c10::optional<c10::Device> device = c10::nullopt,
ExtraFilesMap& extra_files = default_extra_files);
/// Loads a serialized `Module` from the given `rai`.
///
/// The reader adapter, which is for customized input stream, must contain a
/// serialized `Module`, exported either via `ScriptModule.save()` in
/// Python or `torch::jit::ExportModule` in C++.
TORCH_API Module load(
std::unique_ptr<caffe2::serialize::ReadAdapterInterface> rai,
c10::optional<c10::Device> device = c10::nullopt,
ExtraFilesMap& extra_files = default_extra_files);
libtorch 加载torchscript模型有三个重载函数,我们这里用模型文件的方式,所以选第二个。
第一个参数传模型文件路径,第二个参数传一个 Device类,看源码:
namespace c10 {
/// An index representing a specific device; e.g., the 1 in GPU 1.
/// A DeviceIndex is not independently meaningful without knowing
/// the DeviceType it is associated; try to use Device rather than
/// DeviceIndex directly.
using DeviceIndex = int16_t;
/// Represents a a compute device on which a tensor is located. A device is
/// uniquely identified by a type, which specifies the type of machine it is
/// (e.g. CPU or CUDA GPU), and a device index or ordinal, which identifies the
/// specific compute device when there is more than one of a certain type. The
/// device index is optional, and in its defaulted state represents (abstractly)
/// "the current device". Further, there are two constraints on the value of the
/// device index, if one is explicitly stored:
/// 1. A negative index represents the current device, a non-negative index
/// represents a specific, concrete device,
/// 2. When the device type is CPU, the device index must be zero.
struct C10_API Device final {
using Type = DeviceType;
/// Constructs a new `Device` from a `DeviceType` and an optional device
/// index.
/* implicit */ Device(DeviceType type, DeviceIndex index = -1)
: type_(type), index_(index) {
validate();
}
/// Constructs a `Device` from a string description, for convenience.
/// The string supplied must follow the following schema:
/// `(cpu|cuda)[:]`
/// where `cpu` or `cuda` specifies the device type, and
/// `:` optionally specifies a device index.
/* implicit */ Device(const std::string& device_string);
/// Returns true if the type and index of this `Device` matches that of
/// `other`.
bool operator==(const Device& other) const noexcept {
return this->type_ == other.type_ && this->index_ == other.index_;
}
/// Returns true if the type or index of this `Device` differs from that of
/// `other`.
bool operator!=(const Device& other) const noexcept {
return !(*this == other);
}
/// Sets the device index.
void set_index(DeviceIndex index) {
index_ = index;
}
/// Returns the type of device this is.
DeviceType type() const noexcept {
return type_;
}
/// Returns the optional index.
DeviceIndex index() const noexcept {
return index_;
}
/// Returns true if the device has a non-default index.
bool has_index() const noexcept {
return index_ != -1;
}
/// Return true if the device is of CUDA type.
bool is_cuda() const noexcept {
return type_ == DeviceType::CUDA;
}
/// Return true if the device is of CPU type.
bool is_cpu() const noexcept {
return type_ == DeviceType::CPU;
}
/// Same string as returned from operator<<.
std::string str() const;
private:
DeviceType type_;
DeviceIndex index_ = -1;
void validate() {
TORCH_CHECK(index_ == -1 || index_ >= 0,
"Device index must be -1 or non-negative, got ", index_);
TORCH_CHECK(!is_cpu() || index_ <= 0,
"CPU device index must be -1 or zero, got ", index_);
}
};
看第一个构造函数,第一个参数设备类型,第二个参数设备索引
设备类型是个枚举:
enum class DeviceType : int16_t {
CPU = 0,
CUDA = 1, // CUDA.
MKLDNN = 2, // Reserved for explicit MKLDNN
OPENGL = 3, // OpenGL
OPENCL = 4, // OpenCL
IDEEP = 5, // IDEEP.
HIP = 6, // AMD HIP
FPGA = 7, // FPGA
MSNPU = 8, // MSNPU
XLA = 9, // XLA / TPU
Vulkan = 10, // Vulkan
// NB: If you add more devices:
// - Change the implementations of DeviceTypeName and isValidDeviceType
// in DeviceType.cpp
// - Change the number below
COMPILE_TIME_MAX_DEVICE_TYPES = 11,
ONLY_FOR_TEST = 20901, // This device type is only for test.
};
我们这里是nvidia显卡,选择torch::DeviceType::CUDA
第二个参数就是显卡id,0代表第一块显卡,1代表第二块显卡