# Convert MNIS h5 transformer model to ggml format
# Load the (state_dict) saved model using PyTorch
# Iterate over all variables and write them to a binary file.
# For each variable, write the following:
# - Number of dimensions (int)
# - Name length (int)
# - Dimensions (int[n_dims])
# - Name (char[name_length])
# - Data (float[n_dims])
# At the start of the ggml file we write the model parameters
- 这个简单的版本没有Name的部分,导出的数据最终如下
ggml-model-f32.bin |
注释 |
0x67676d6c |
magic |
2 |
len(fc1.weight.shape) |
784 |
fc1.weight.shape = (500, 784) |
500 |
fc1.weight.shape = (500, 784) |
data |
fc1.weight |
1 |
len(fc1.bias.shape) |
500 |
fc1.bias.shape = (500, ) |
data |
fc1.bias |
2 |
len(fc2.weight.shape) |
500 |
fc1.weight.shape = (10, 500) |
10 |
fc1.weight.shape =(10, 500) |
data |
fc2.weight |
1 |
len(fc2.bias.shape) |
10 |
fc2.bias.shape =(10,) |
data |
fc1.bias |
import sys
import struct
import json
import numpy as np
import re
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
# 检查是否提供了正确数量的命令行参数
if len(sys.argv) != 2:
print("Usage: convert-h5-to-ggml.py model\n")
# 获取输入h5模型和输出ggml模型的文件路径
state_dict_file = sys.argv[1]
fname_out = "models/mnist/ggml-model-f32.bin"
# 加载PyTorch保存的state_dict模型
state_dict = torch.load(state_dict_file, map_location=torch.device('cpu'))
# 以写入模式打开输出二进制文件
fout = open(fname_out, "wb")
# 在文件中写入魔术数字'ggml',以十六进制格式作为文件标识符
# 使用 Python 的 struct 模块将整数 0x67676d6c 打包为二进制数据的操作。在这里,"i" 表示使用整数格式进行打包。
fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex
# 迭代state_dict中的所有变量
for name in state_dict.keys():
# 从变量中提取数据并将其转换为NumPy数组
data = state_dict[name].squeeze().numpy()
print("Processing variable: " + name + " with shape: ", data.shape)
n_dims = len(data.shape);
# 将变量的维度数量写入二进制文件
fout.write(struct.pack("i", n_dims))
# 将数据转换为float32并将维度写入二进制文件
data = data.astype(np.float32)
for i in range(n_dims):
fout.write(struct.pack("i", data.shape[n_dims - 1 - i]))
# 将数据写入二进制文件
# 关闭二进制文件
print("Done. Output file: " + fname_out)
- NumPy提供的存数组内容的文件操作函数。读取使用fromfile。
$:~/ggml/ggml/examples/mnist$ python3 ./convert-h5-to-ggml.py
./models/mnist/mnist_model.state_dictOrderedDict([('fc1.weight', tensor([[ 0.0130, 0.0034, -0.0287, ..., -0.0268, -0.0352, -0.0056],
[-0.0134, 0.0077, -0.0028, ..., 0.0356, 0.0143, -0.0107],
[-0.0329, 0.0154, -0.0167, ..., 0.0155, 0.0127, -0.0309],
[-0.0216, -0.0302, 0.0085, ..., 0.0301, 0.0073, 0.0153],
[ 0.0289, 0.0181, 0.0326, ..., 0.0107, -0.0314, -0.0349],
[ 0.0273, 0.0127, 0.0105, ..., 0.0090, -0.0007, 0.0190]])), ('fc1.bias', tensor([ 1.9317e-01, -7.4255e-02, 8.3417e-02, 1.1681e-01, 7.5499e-03,
8.7627e-02, -7.9260e-03, 6.8504e-02, 2.2217e-02, 9.7918e-02,
1.5195e-01, 8.3765e-02, 1.4237e-02, 1.0847e-02, 9.6959e-02,
-1.2500e-01, 4.2406e-02, -2.4611e-02, 5.9198e-03, 8.9767e-02,
1.3460e-03, 2.9106e-02, -4.0620e-02, 9.7568e-02, 8.5670e-02])), ('fc2.weight', tensor([[-0.0197, -0.0814, -0.3992, ..., 0.2697, 0.0386, -0.5380],
[-0.4174, 0.0572, -0.1331, ..., -0.2564, -0.3926, -0.0514],
[-0.2988, -0.1119, 0.0517, ..., 0.3296, 0.0800, 0.0651]])), ('fc2.bias', tensor([-0.1008, -0.1179, -0.0558, -0.0626, 0.0385, -0.0222, 0.0188, -0.1296,
0.1507, 0.0033]))])
Processing variable: fc1.weight with shape: (500, 784)
Processing variable: fc1.bias with shape: (500,)
Processing variable: fc2.weight with shape: (10, 500)
Processing variable: fc2.bias with shape: (10,)
Done. Output file: models/mnist/ggml-model-f32.bin
bool mnist_model_load(const std::string & fname, mnist_model & model) {
printf("%s: loading model from '%s'\n", __func__, fname.c_str());
auto fin = std::ifstream(fname, std::ios::binary);
if (!fin) {
fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
return false;
uint32_t magic;
fin.read((char *) &magic, sizeof(magic));
if (magic != GGML_FILE_MAGIC) {
fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str());
return false;
auto & ctx = model.ctx;
size_t ctx_size = 0;
const auto & hparams = model.hparams;
const int n_input = hparams.n_input;
const int n_hidden = hparams.n_hidden;
const int n_classes = hparams.n_classes;
ctx_size += n_input * n_hidden * ggml_type_sizef(GGML_TYPE_F32);
ctx_size += n_hidden * ggml_type_sizef(GGML_TYPE_F32);
ctx_size += n_hidden * n_classes * ggml_type_sizef(GGML_TYPE_F32);
ctx_size += n_classes * ggml_type_sizef(GGML_TYPE_F32);
printf("%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
struct ggml_init_params params = {
ctx_size + 1024*1024,
model.ctx = ggml_init(params);
if (!model.ctx) {
fprintf(stderr, "%s: ggml_init() failed\n", __func__);
return false;
int32_t n_dims;
fin.read(reinterpret_cast<char *>(&n_dims), sizeof(n_dims));
int32_t ne_weight[2] = { 1, 1 };
for (int i = 0; i < n_dims; ++i) {
fin.read(reinterpret_cast<char *>(&ne_weight[i]), sizeof(ne_weight[i]));
model.hparams.n_input = ne_weight[0];
model.hparams.n_hidden = ne_weight[1];
model.fc1_weight = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, model.hparams.n_input, model.hparams.n_hidden);
fin.read(reinterpret_cast<char *>(model.fc1_weight->data), ggml_nbytes(model.fc1_weight));
ggml_set_name(model.fc1_weight, "fc1_weight");
int32_t ne_bias[2] = { 1, 1 };
for (int i = 0; i < n_dims; ++i) {
fin.read(reinterpret_cast<char *>(&ne_bias[i]), sizeof(ne_bias[i]));
model.fc1_bias = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, model.hparams.n_hidden);
fin.read(reinterpret_cast<char *>(model.fc1_bias->data), ggml_nbytes(model.fc1_bias));
ggml_set_name(model.fc1_bias, "fc1_bias");
model.fc1_bias->op_params[0] = 0xdeadbeef;
int32_t n_dims;
fin.read(reinterpret_cast<char *>(&n_dims), sizeof(n_dims));
int32_t ne_weight[2] = { 1, 1 };
for (int i = 0; i < n_dims; ++i) {
fin.read(reinterpret_cast<char *>(&ne_weight[i]), sizeof(ne_weight[i]));
model.hparams.n_classes = ne_weight[1];
model.fc2_weight = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, model.hparams.n_hidden, model.hparams.n_classes);
fin.read(reinterpret_cast<char *>(model.fc2_weight->data), ggml_nbytes(model.fc2_weight));
ggml_set_name(model.fc2_weight, "fc2_weight");
int32_t ne_bias[2] = { 1, 1 };
for (int i = 0; i < n_dims; ++i) {
fin.read(reinterpret_cast<char *>(&ne_bias[i]), sizeof(ne_bias[i]));
model.fc2_bias = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, model.hparams.n_classes);
fin.read(reinterpret_cast<char *>(model.fc2_bias->data), ggml_nbytes(model.fc2_bias));
ggml_set_name(model.fc2_bias, "fc2_bias");
return true;