动机
最近为了学习golang, 发现一个很好的项目 seaweedfs (github link) 。它是一个分布式小文件存储系统,项目的主页已经介绍的比较详细了,架构主要来自于 facebook 的一篇关于图片存储的论文。
Volume
Seaweedfs 有master server 和volume server。volume是具体文件存储的地方。一个volume server 包含一个 reference to Store object (volume_server.go#L20)
/*
* One VolumeServer contains one Store
* One Store contains a disk location list
*/
type Store struct {
Ip string
Port int
PublicUrl string
Locations []*DiskLocation
dataCenter string //optional informaton, overwriting master setting if exists
rack string //optional information, overwriting master setting if exists
connected bool
volumeSizeLimit uint64 //read from the master
masterNodes *MasterNodes
}
/*
* One location contains a volume map
*/
type DiskLocation struct {
Directory string
MaxVolumeCount int
volumes map[VolumeId]*Volume
}
type Volume struct {
Id VolumeId
dir string
Collection string
dataFile *os.File
nm NeedleMapper
needleMapKind NeedleMapType
readOnly bool
SuperBlock
dataFileAccessLock sync.Mutex
lastModifiedTime uint64 //unix time in second
}
在文件系统上,weedfs 会创建 [volumeId].idx file 和 [volumeId].dat 文件, 从后缀名就可以看出来,前者存储的是index 信息,相当于inode, 后者存储的是数据信息。
因为在server load的时候,index data will be loaded in memory, 所以idx 文件应该要小,idx实际上是由sequenced needlevalue 组成。
type NeedleValue struct {
Key Key
Offset uint32 `comment:"Volume offset"` //since aligned to 8 bytes, range is 4G*8=32G
Size uint32 `comment:"Size of the data portion"`
}
下面这个小程序可以用来把index 文件load进内存
import (
"encoding/binary"
"fmt"
"io"
"os"
)
// Key comment
type Key uint64
// NeedleValue comment
type NeedleValue struct {
Key Key
Offset uint32 comment:"Volume offset"
//since aligned to 8 bytes, range is 4G*8=32G
Size uint32 comment:"Size of the data portion"
}
func LoadIndexFile(fileName string) ([]*NeedleValue, error) {
file, err := os.OpenFile(fileName, os.O_RDONLY, 0444)
if err != nil {
fmt.Println(err.Error())
return nil, err
}
defer file.Close()
num := int64(16)
buf := make([]byte, num)
offset := int64(0)
needleValues := make([]*NeedleValue, 0, 1024)
for {
count, e := file.ReadAt(buf, offset)
if int64(count) < num || e == io.EOF {
break
}
if e != nil {
return nil, e
}
needleValues = append(needleValues, readAsNeedleValue(buf))
offset += int64(count)
}
return needleValues, nil
}
func readAsNeedleValue(buf []byte) *NeedleValue {
if len(buf) < 16 {
return &NeedleValue{0, 0, 0}
}
key := Key(binary.BigEndian.Uint64(buf[0:8]))
offset := binary.BigEndian.Uint32(buf[8:12])
size := binary.BigEndian.Uint32(buf[12:16])
nv := &NeedleValue{Key: key, Offset: offset, Size: size}
return nv
}