Swift
字典用来存储无序的相同类型数据的集合,字典会强制检测元素的类型,如果类型不同则会报错。Swift
字典每个值(value
)都关联唯一的键(key
),键作为字典中的这个值数据的标识符。- 和数组中的元素不同,字典中的元素并没有具体顺序,需要通过
key
访问到元素。- 字典的
key
没有类型限制,可以是Int
或String
,但必须是唯一的。- 如果创建一个字典,并赋值给一个变量,则创建的字典是可以修改的。这意味着在创建字典后,可以通过添加、删除、修改的方式改变字典里的元素。
- 如果将一个字典赋值给常量,字典是不可修改的,并且字典的大小和内容都不可以修改。
基本定义
Dictionary
类型的键值对必须遵循Hashable
协议,因为它使用的就是哈希表
哈希表
哈希表(
Hash table
)也叫散列表,是根据关键字(Key value
)⽽直接访问在内存存储位置的数据结构。也就是说,它通过计算⼀个关于键值的函数,将所需查询的数据映射到表中⼀个位置来访问记录,这加快了查找速度。这个映射函数称做散列函数,存放记录的数组称做散列表。
散列函数
散列函数又称散列算法、哈希函数。它的目标是计算
key
在数组中的下标。几种常用的散列函数构造方法:
- 直接寻址法
- 数字分析法
- 平⽅取中法
- 折叠法
- 随机数法
- 除留余数法
哈希冲突
再优秀的哈希算法永远无法避免出现哈希冲突。哈希冲突指的是两个不同的
key
经过哈希计算后得到的数组下标是相同的。哈希冲突几种常用的解决方法:
- 开放定址法
- 拉链法
负载因⼦
负载因子 = 填⼊表中的元素个数 / 散列表的⻓度
一个非空散列表的
负载因子
是填⼊表中的元素个数 / 散列表的⻓度
。这是面临再次散列或扩容时的决策参数,有助于确定散列函数的效率。也就是说,该参数指出了散列函数是否将关键字均匀分布。
内存布局
Dictionary
是否是连续的内存地址空间?如果不是,当前元素和元素之前的关系如何确定?如何计算⼀个元素的地址?
通过字⾯量方式创建⼀个字典:
var dic = ["1": "Kody", "2": "Hank", "3": "Cooci", "4" : "Cat"]
源码分析
上述代码,通过字⾯量方式创建
Dictionary
,在源码中可以直接搜索literal
打开
Dictionary.swift
文件,找到init(dictionaryLiteral elements:)
的定义:
@inlinable
@_effects(readonly)
@_semantics("optimize.sil.specialize.generic.size.never")
public init(dictionaryLiteral elements: (Key, Value)...) {
let native = _NativeDictionary(capacity: elements.count)
for (key, value) in elements {
let (bucket, found) = native.find(key)
_precondition(!found, "Dictionary literal contains duplicate keys")
native._insert(at: bucket, key: key, value: value)
}
self.init(_native: native)
}
- 创建了一个
_NativeDictionary
的实例对象- 遍历
elements
,通过实例对象的find
方法,找到key
值对应的bucket
bucket
相当于要插入的位置- 将
key
、value
循环插入到bucket
中- 最后调用
init
方法found
:布尔类型,如果字典初始化时,字面量里包含重复key
值,运行时报错Dictionary literal contains duplicate keys
打开
NativeDictionary.swift
文件,找到_NativeDictionary
的定义:
@usableFromInline
@frozen
internal struct _NativeDictionary {
@usableFromInline
internal typealias Element = (key: Key, value: Value)
/// See this comments on __RawDictionaryStorage and its subclasses to
/// understand why we store an untyped storage here.
@usableFromInline
internal var _storage: __RawDictionaryStorage
/// Constructs an instance from the empty singleton.
@inlinable
internal init() {
self._storage = __RawDictionaryStorage.empty
}
/// Constructs a dictionary adopting the given storage.
@inlinable
internal init(_ storage: __owned __RawDictionaryStorage) {
self._storage = storage
}
@inlinable
internal init(capacity: Int) {
if capacity == 0 {
self._storage = __RawDictionaryStorage.empty
} else {
self._storage = _DictionaryStorage.allocate(capacity: capacity)
}
}
#if _runtime(_ObjC)
@inlinable
internal init(_ cocoa: __owned __CocoaDictionary) {
self.init(cocoa, capacity: cocoa.count)
}
@inlinable
internal init(_ cocoa: __owned __CocoaDictionary, capacity: Int) {
if capacity == 0 {
self._storage = __RawDictionaryStorage.empty
} else {
_internalInvariant(cocoa.count <= capacity)
self._storage =
_DictionaryStorage.convert(cocoa, capacity: capacity)
for (key, value) in cocoa {
insertNew(
key: _forceBridgeFromObjectiveC(key, Key.self),
value: _forceBridgeFromObjectiveC(value, Value.self))
}
}
}
#endif
}
_NativeDictionary
是一个结构体- 使用
init(capacity:)
方法初始化,如果capacity
容量等于0
,使用__RawDictionaryStorage.empty
。否则使用_DictionaryStorage
,通过allocate
方法,按容量大小分配内存空间
打开
DictionaryStorage.swift
文件,找到_DictionaryStorage
的定义:
@usableFromInline
final internal class _DictionaryStorage
: __RawDictionaryStorage, _NSDictionaryCore {
// This type is made with allocWithTailElems, so no init is ever called.
// But we still need to have an init to satisfy the compiler.
@nonobjc
override internal init(_doNotCallMe: ()) {
_internalInvariantFailure("This class cannot be directly initialized")
}
deinit {
guard _count > 0 else { return }
if !_isPOD(Key.self) {
let keys = self._keys
for bucket in _hashTable {
(keys + bucket.offset).deinitialize(count: 1)
}
}
if !_isPOD(Value.self) {
let values = self._values
for bucket in _hashTable {
(values + bucket.offset).deinitialize(count: 1)
}
}
_count = 0
_fixLifetime(self)
}
@inlinable
final internal var _keys: UnsafeMutablePointer {
@inline(__always)
get {
return self._rawKeys.assumingMemoryBound(to: Key.self)
}
}
@inlinable
final internal var _values: UnsafeMutablePointer {
@inline(__always)
get {
return self._rawValues.assumingMemoryBound(to: Value.self)
}
}
internal var asNative: _NativeDictionary {
return _NativeDictionary(self)
}
#if _runtime(_ObjC)
@objc
internal required init(
objects: UnsafePointer,
forKeys: UnsafeRawPointer,
count: Int
) {
_internalInvariantFailure("This class cannot be directly initialized")
}
@objc(copyWithZone:)
internal func copy(with zone: _SwiftNSZone?) -> AnyObject {
return self
}
@objc
internal var count: Int {
return _count
}
@objc(keyEnumerator)
internal func keyEnumerator() -> _NSEnumerator {
return _SwiftDictionaryNSEnumerator(asNative)
}
@objc(countByEnumeratingWithState:objects:count:)
internal func countByEnumerating(
with state: UnsafeMutablePointer<_SwiftNSFastEnumerationState>,
objects: UnsafeMutablePointer?, count: Int
) -> Int {
defer { _fixLifetime(self) }
let hashTable = _hashTable
var theState = state.pointee
if theState.state == 0 {
theState.state = 1 // Arbitrary non-zero value.
theState.itemsPtr = AutoreleasingUnsafeMutablePointer(objects)
theState.mutationsPtr = _fastEnumerationStorageMutationsPtr
theState.extra.0 = CUnsignedLong(hashTable.startBucket.offset)
}
// Test 'objects' rather than 'count' because (a) this is very rare anyway,
// and (b) the optimizer should then be able to optimize away the
// unwrapping check below.
if _slowPath(objects == nil) {
return 0
}
let unmanagedObjects = _UnmanagedAnyObjectArray(objects!)
var bucket = _HashTable.Bucket(offset: Int(theState.extra.0))
let endBucket = hashTable.endBucket
_precondition(bucket == endBucket || hashTable.isOccupied(bucket),
"Invalid fast enumeration state")
var stored = 0
for i in 0.. AnyObject? {
guard let nativeKey = _conditionallyBridgeFromObjectiveC(aKey, Key.self)
else { return nil }
let (bucket, found) = asNative.find(nativeKey)
guard found else { return nil }
let value = asNative.uncheckedValue(at: bucket)
return _bridgeAnythingToObjectiveC(value)
}
@objc(getObjects:andKeys:count:)
internal func getObjects(
_ objects: UnsafeMutablePointer?,
andKeys keys: UnsafeMutablePointer?,
count: Int) {
_precondition(count >= 0, "Invalid count")
guard count > 0 else { return }
var i = 0 // Current position in the output buffers
switch (_UnmanagedAnyObjectArray(keys), _UnmanagedAnyObjectArray(objects)) {
case (let unmanagedKeys?, let unmanagedObjects?):
for (key, value) in asNative {
unmanagedObjects[i] = _bridgeAnythingToObjectiveC(value)
unmanagedKeys[i] = _bridgeAnythingToObjectiveC(key)
i += 1
guard i < count else { break }
}
case (let unmanagedKeys?, nil):
for (key, _) in asNative {
unmanagedKeys[i] = _bridgeAnythingToObjectiveC(key)
i += 1
guard i < count else { break }
}
case (nil, let unmanagedObjects?):
for (_, value) in asNative {
unmanagedObjects[i] = _bridgeAnythingToObjectiveC(value)
i += 1
guard i < count else { break }
}
case (nil, nil):
// Do nothing.
break
}
}
#endif
}
_DictionaryStorage
是一个类,继承自__RawDictionaryStorage
,遵循_NSDictionaryCore
协议_DictionaryStorage
使用final
关键字修饰,子类不可重写
找到
__RawDictionaryStorage
的定义:
@_fixed_layout
@usableFromInline
@_objc_non_lazy_realization
internal class __RawDictionaryStorage: __SwiftNativeNSDictionary {
// NOTE: The precise layout of this type is relied on in the runtime to
// provide a statically allocated empty singleton. See
// stdlib/public/stubs/GlobalObjects.cpp for details.
/// The current number of occupied entries in this dictionary.
@usableFromInline
@nonobjc
internal final var _count: Int
/// The maximum number of elements that can be inserted into this set without
/// exceeding the hash table's maximum load factor.
@usableFromInline
@nonobjc
internal final var _capacity: Int
/// The scale of this dictionary. The number of buckets is 2 raised to the
/// power of `scale`.
@usableFromInline
@nonobjc
internal final var _scale: Int8
/// The scale corresponding to the highest `reserveCapacity(_:)` call so far,
/// or 0 if there were none. This may be used later to allow removals to
/// resize storage.
///
/// FIXME: Shrink storage on deletion
@usableFromInline
@nonobjc
internal final var _reservedScale: Int8
// Currently unused, set to zero.
@nonobjc
internal final var _extra: Int16
/// A mutation count, enabling stricter index validation.
@usableFromInline
@nonobjc
internal final var _age: Int32
/// The hash seed used to hash elements in this dictionary instance.
@usableFromInline
internal final var _seed: Int
/// A raw pointer to the start of the tail-allocated hash buffer holding keys.
@usableFromInline
@nonobjc
internal final var _rawKeys: UnsafeMutableRawPointer
/// A raw pointer to the start of the tail-allocated hash buffer holding
/// values.
@usableFromInline
@nonobjc
internal final var _rawValues: UnsafeMutableRawPointer
// This type is made with allocWithTailElems, so no init is ever called.
// But we still need to have an init to satisfy the compiler.
@nonobjc
internal init(_doNotCallMe: ()) {
_internalInvariantFailure("This class cannot be directly initialized")
}
@inlinable
@nonobjc
internal final var _bucketCount: Int {
@inline(__always) get { return 1 &<< _scale }
}
@inlinable
@nonobjc
internal final var _metadata: UnsafeMutablePointer<_HashTable.Word> {
@inline(__always) get {
let address = Builtin.projectTailElems(self, _HashTable.Word.self)
return UnsafeMutablePointer(address)
}
}
// The _HashTable struct contains pointers into tail-allocated storage, so
// this is unsafe and needs `_fixLifetime` calls in the caller.
@inlinable
@nonobjc
internal final var _hashTable: _HashTable {
@inline(__always) get {
return _HashTable(words: _metadata, bucketCount: _bucketCount)
}
}
}
__RawDictionaryStorage
类,定义基础数据结构,例如_count
、_capacity
- 其中
_scale
为2
的n
次方数,参与计算_bucketCount
_seed
为哈希种子
找到
allocate
的定义:
@usableFromInline
@_effects(releasenone)
static internal func allocate(capacity: Int) -> _DictionaryStorage {
let scale = _HashTable.scale(forCapacity: capacity)
return allocate(scale: scale, age: nil, seed: nil)
}
- 调用
_HashTable
的scale
方法,计算scale
- 调用
allocate
方法,传入scale
打开
HashTable.swift
文件,找到_HashTable
的定义:
@usableFromInline
@frozen
internal struct _HashTable {
@usableFromInline
internal typealias Word = _UnsafeBitset.Word
@usableFromInline
internal var words: UnsafeMutablePointer
@usableFromInline
internal let bucketMask: Int
@inlinable
@inline(__always)
internal init(words: UnsafeMutablePointer, bucketCount: Int) {
_internalInvariant(bucketCount > 0 && bucketCount & (bucketCount - 1) == 0,
"bucketCount must be a power of two")
self.words = words
// The bucket count is a power of two, so subtracting 1 will never overflow
// and get us a nice mask.
self.bucketMask = bucketCount &- 1
}
@inlinable
internal var bucketCount: Int {
@inline(__always) get {
return bucketMask &+ 1
}
}
@inlinable
internal var wordCount: Int {
@inline(__always) get {
return _UnsafeBitset.wordCount(forCapacity: bucketCount)
}
}
}
words
可以理解为二进制位,记录当前位置是否插入元素bucketMask
等于bucketCount-1
,而bucketCount
是2
的n
次方数,所以bucketMask
相当于掩码_HashTable
并不直接存储数据,而是存储二进制位
回到
DictionaryStorage.swift
文件,找到allocate
的定义:
static internal func allocate(
scale: Int8,
age: Int32?,
seed: Int?
) -> _DictionaryStorage {
// The entry count must be representable by an Int value; hence the scale's
// peculiar upper bound.
_internalInvariant(scale >= 0 && scale < Int.bitWidth - 1)
let bucketCount = (1 as Int) &<< scale
let wordCount = _UnsafeBitset.wordCount(forCapacity: bucketCount)
let storage = Builtin.allocWithTailElems_3(
_DictionaryStorage.self,
wordCount._builtinWordValue, _HashTable.Word.self,
bucketCount._builtinWordValue, Key.self,
bucketCount._builtinWordValue, Value.self)
let metadataAddr = Builtin.projectTailElems(storage, _HashTable.Word.self)
let keysAddr = Builtin.getTailAddr_Word(
metadataAddr, wordCount._builtinWordValue, _HashTable.Word.self,
Key.self)
let valuesAddr = Builtin.getTailAddr_Word(
keysAddr, bucketCount._builtinWordValue, Key.self,
Value.self)
storage._count = 0
storage._capacity = _HashTable.capacity(forScale: scale)
storage._scale = scale
storage._reservedScale = 0
storage._extra = 0
if let age = age {
storage._age = age
} else {
// The default mutation count is simply a scrambled version of the storage
// address.
storage._age = Int32(
truncatingIfNeeded: ObjectIdentifier(storage).hashValue)
}
storage._seed = seed ?? _HashTable.hashSeed(for: storage, scale: scale)
storage._rawKeys = UnsafeMutableRawPointer(keysAddr)
storage._rawValues = UnsafeMutableRawPointer(valuesAddr)
// Initialize hash table metadata.
storage._hashTable.clear()
return storage
}
- 通过
&
运算符计算bucketCount
wordCount
通过bucketCount
计算要记录的二进制位allocWithTailElems_3
在类的后面分配连续的内存空间,分别存储Key
和Value
Dictionary内存布局
Dictionary
包含DictionaryStorage
类DictionaryStorage
类包含metaData
、refCount
、_count
、_capacity
- 中间
64位
存储_scale
、_reservedScale
、_extra
、_age
- 接下来存储的
_seed
、_rawKeys
、_rawValues
、metaAddr
- 其中
_rawKeys
、_rawValues
存储的是数组地址
通过
LLDB
调试来验证⼀下:通过字⾯量方式创建⼀个字典:
var dic = ["1": "Kody", "2": "Hank", "3": "Cooci", "4" : "Cat"]
通过
withUnsafePointer
打印dic
内存地址
通过
x/8g
查看dic
的内存地址,里面包含了一个堆上的地址0x000000010067aed0
通过
x/8g
查看内存地址0x000000010067aed0
0x00007fff99540800
:metaData
元类型0x0000000000000002
:refCoutn
引用计数0x0000000000000004
:_count
0x0000000000000006
:_capacity
0xcf1ba4a800000003
:64位
连续内存,包含_scale
、_reservedScale
、_extra
、_age
0x000000010067aed0
:_seed
种子,以对象创建的地址作为随机数的开始0x000000010067af18
:_rawKeys
0x000000010067af98
:_rawValues
通过
x/16g
查看_rawKeys
内存地址
- 连续内存中并不是连续存储的,
0x0000000000000032
转为十进制
是50
,对应的是key
值2
的ASCII码
- 后面的
0xe100000000000000
表示key
值2
的字符串长度- 同理后面的
0x0000000000000033
、0x0000000000000031
、0x0000000000000034
分别对应的key
值是3
、1
、4
通过
x/16g
查看_rawValues
内存地址
- 和
_rawKeys
同理,前面的0x000000006b6e6148
、0x00000069636f6f43
、0x0000000079646f4b
、0x0000000000746143
表示value
值- 后面的
0xe400000000000000
、0xe500000000000000
、0xe400000000000000
、0xe300000000000000
表示value
值的字符串长度
Dictionary插⼊⼀个值
var dic = ["1": "Kody", "2": "Hank", "3": "Cooci", "4" : "Cat"]
dic["5"] = "Swift"
打开
Dictionary.swift
文件,找到subscript
的定义:
@inlinable
public subscript(
key: Key, default defaultValue: @autoclosure () -> Value
) -> Value {
@inline(__always)
get {
return _variant.lookup(key) ?? defaultValue()
}
@inline(__always)
_modify {
let (bucket, found) = _variant.mutatingFind(key)
let native = _variant.asNative
if !found {
let value = defaultValue()
native._insert(at: bucket, key: key, value: value)
}
let address = native._values + bucket.offset
defer { _fixLifetime(self) }
yield &address.pointee
}
}
get
方法里,_variant
是一个关联值的枚举类型- 通过
_variant
的lookup
方法,找到key
值是否存在
打开
NativeDictionary.swift
文件,找到lookup
的定义:
@inlinable
@inline(__always)
func lookup(_ key: Key) -> Value? {
if count == 0 {
// Fast path that avoids computing the hash of the key.
return nil
}
let (bucket, found) = self.find(key)
guard found else { return nil }
return self.uncheckedValue(at: bucket)
}
- 通过
find
方法,传入key
,找到bucket
和found
找到
find
的定义:
@inlinable
@inline(__always)
internal func find(_ key: Key) -> (bucket: Bucket, found: Bool) {
return _storage.find(key)
}
调用
_storage
的find
方法,传入key
值
打开
DictionaryStorage.swift
文件,找到find
的定义:
@_alwaysEmitIntoClient
@inline(never)
internal final func find(_ key: Key) -> (bucket: _HashTable.Bucket, found: Bool) {
return find(key, hashValue: key._rawHashValue(seed: _seed))
}
- 调用
key
的_rawHashValue
方法,传入_seed
种子- 本质上就是通过
key
值得到hashValue
- 调用
find
方法,传入key
和hashValue
找到
find(_ key:, hashValue:)
的定义:
@_alwaysEmitIntoClient
@inline(never)
internal final func find(_ key: Key, hashValue: Int) -> (bucket: _HashTable.Bucket, found: Bool) {
let hashTable = _hashTable
var bucket = hashTable.idealBucket(forHashValue: hashValue)
while hashTable._isOccupied(bucket) {
if uncheckedKey(at: bucket) == key {
return (bucket, true)
}
bucket = hashTable.bucket(wrappedAfter: bucket)
}
return (bucket, false)
}
}
- 调用
hashTable
的idealBucket
方法,传入hashValue
,返回bucket
- 通过
_isOccupied
方法判断bucket
是否被占用
打开
HashTable.swift
文件,找到idealBucket
的定义:
@inlinable
@inline(__always)
internal func idealBucket(forHashValue hashValue: Int) -> Bucket {
return Bucket(offset: hashValue & bucketMask)
}
hashValue
和bucketMask
进行&
运算,计算index
保存到bucket
中
找到
_isOccupied
的定义:
@inlinable
@inline(__always)
internal func _isOccupied(_ bucket: Bucket) -> Bool {
_internalInvariant(isValid(bucket))
return words[bucket.word].uncheckedContains(bucket.bit)
}
- 与原有的二进制位进行计算,使用
uncheckedContains
返回0
或1
,查看是否包含当前bucket
找到
bucket(wrappedAfter bucket:)
的定义:
@inlinable
@inline(__always)
internal func bucket(wrappedAfter bucket: Bucket) -> Bucket {
// The bucket is less than bucketCount, which is power of two less than
// Int.max. Therefore adding 1 does not overflow.
return Bucket(offset: (bucket.offset &+ 1) & bucketMask)
}
bucket.offset
进行+1
操作,再和bucketMask
进行&
运算,然后重新构建bucket
打开
NativeDictionary.swift
文件,找到uncheckedValue
的定义:
@inlinable
@inline(__always)
internal func uncheckedValue(at bucket: Bucket) -> Value {
defer { _fixLifetime(self) }
_internalInvariant(hashTable.isOccupied(bucket))
return _values[bucket.offset]
}
_values
是连续存储的数组,直接通过bucket.offset
作为index
获取指定位置的元素
找到
setValue
的定义:
@inlinable
internal mutating func setValue(
_ value: __owned Value,
forKey key: Key,
isUnique: Bool
) {
let (bucket, found) = mutatingFind(key, isUnique: isUnique)
if found {
(_values + bucket.offset).pointee = value
} else {
_insert(at: bucket, key: key, value: value)
}
}
- 通过
mutatingFind
找到bucket
和found
- 如果存在,使用
_values
加上bucket.offset
,将value
覆盖- 如果不存在,使用
_insert
方法,在当前bucket
位置插入key
和value
Dictionary
插⼊⼀个值的过程:
- 通过
key
找到key. hashValue
- 通过
key. hashValue
和bucketMask
进行&
运算,计算出index
- 通过
index
查找是否有对应key
值,如果有采用开放定址法查找下一个bucket
是否为空,如果为空返回对应元素- 对于
setValue
也是一样,通过key
值先查找,再赋值
使用Swift代码实现简单的HashTable
struct HashTable {
typealias Element = (key: Key, value: Value)
typealias Bucket = [Element]
var buckets: [Bucket]
var count = 0
init(capacity: Int){
buckets = Array(repeating: [Element](), count: capacity)
}
func index(key: Key) -> Int {
return abs(key.hashValue) % buckets.count
}
subscript(key: Key) -> Value? {
get {
return getValue(key: key)
}
set {
if let value = newValue {
updateValue(value, key)
}
else{
removeValue(key)
}
}
}
func getValue(key: Key) -> Value? {
let index = self.index(key: key)
for element in buckets[index] {
if(element.key == key){
return element.value
}
}
return nil
}
mutating func updateValue(_ value: Value, _ key: Key) -> Value? {
let index = self.index(key: key)
for (i, element) in buckets[index].enumerated() {
if(element.key == key){
let originValue = element.value
buckets[index][i].value = value
return originValue
}
}
buckets[index].append((key: key, value: value))
count += 1
return nil
}
mutating func removeValue(_ key: Key) {
let index = self.index(key: key)
for (i, element) in buckets[index].enumerated() {
if(element.key == key){
buckets[index].remove(at: i)
count -= 1
}
}
}
}
var ht = HashTable(capacity: 5)
ht["1"] = "Kody"
print(ht["1"])
//输出以下内容:
//Optional("Kody")