最新IP地址数据库 来自 qqzeng.com
利用二分逼近法(bisection method) ,解析800多万IP 只需几十秒, 比较高效!
原来的顺序查找算法 效率比较低
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
readonly
string
ipBinaryFilePath =
"qqzengipdb.dat"
;
readonly
byte
[] dataBuffer, indexBuffer;
readonly
uint
[] index =
new
uint
[256];
readonly
int
dataLength;
public
IpLocation()
{
try
{
FileInfo file =
new
FileInfo(ipBinaryFilePath);
dataBuffer =
new
byte
[file.Length];
using
(var fin =
new
FileStream(file.FullName, FileMode.Open, FileAccess.Read))
{
fin.Read(dataBuffer, 0, dataBuffer.Length);
}
var offset_len = BytesToLong(dataBuffer[0], dataBuffer[1], dataBuffer[2], dataBuffer[3]);
//Big Endian
indexBuffer =
new
byte
[offset_len];
Array.Copy(dataBuffer, 4, indexBuffer, 0, offset_len);
dataLength = (
int
)offset_len;
for
(
int
loop = 0; loop < 256; loop++)
{
//索引 四字节 LITTLE_ENDIAN
index[loop] = BytesToLong(indexBuffer[loop * 4 + 3], indexBuffer[loop * 4 + 2], indexBuffer[loop * 4 + 1], indexBuffer[loop * 4]);
}
}
catch
{ }
}
public
string
[] Find(
string
ip)
{
var ips = ip.Split(
'.'
);
uint
ip_prefix =
uint
.Parse(ips[0]);
uint
find_uint32 = BytesToLong(
byte
.Parse(ips[0]),
byte
.Parse(ips[1]),
byte
.Parse(ips[2]),
byte
.Parse(ips[3]));
//BIG_ENDIAN
// LITTLE_ENDIAN
int
max_len = 0;
int
resultOffset =-1;
int
resultLegth =-1;
uint
start = index[ip_prefix] * 8 + 1024;
if
(ip_prefix != 255)
{
max_len = (
int
)index[ip_prefix + 1] * 8 + 1024;
}
else
{
max_len = (
int
)index[255] * 8 + 1024+1;
}
for
(; start < max_len; start += 8)
{
// 前四位 结束ip 后三位 偏移 最后一位 内容长度
uint
endipNum = BytesToLong(indexBuffer[start + 0], indexBuffer[start + 1], indexBuffer[start + 2], indexBuffer[start + 3]);
//BIG_ENDIAN
if
(endipNum >= find_uint32)
{
resultOffset =(
int
) BytesToLong((
byte
)0, indexBuffer[start + 6], indexBuffer[start + 5], indexBuffer[start + 4]);
//LITTLE_ENDIAN
resultLegth = 0xFF & indexBuffer[start + 7];
// 长度
break
;
}
}
if
(resultOffset==-1||resultLegth==-1)
{
return
new
string
[] {
"N/A"
};
}
var areaBytes =
new
byte
[resultLegth];
Array.Copy(dataBuffer, dataLength + resultOffset - 1024, areaBytes, 0, resultLegth);
return
Encoding.UTF8.GetString(areaBytes).Split(
' '
);
}
private
static
uint
BytesToLong(
byte
a,
byte
b,
byte
c,
byte
d)
{
return
((
uint
)a << 24) | ((
uint
)b << 16) | ((
uint
)c << 8) | (
uint
)d;
}
public
static
string
long2IP(
long
longIP)
{
StringBuilder sb =
new
StringBuilder(
""
);
sb.Append(longIP >> 24);
sb.Append(
"."
);
//将高8位置0,然后右移16为
sb.Append((longIP & 0x00FFFFFF) >> 16);
sb.Append(
"."
);
sb.Append((longIP & 0x0000FFFF) >> 8);
sb.Append(
"."
);
sb.Append((longIP & 0x000000FF));
return
sb.ToString();
}
}
|
改进版 采用二分逼近 算法(类似二分查找,但又不同) 性能提升很大
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
public
string
[] Find(
string
ip)
{
var ips = ip.Split(
'.'
);
uint
ip_prefix =
uint
.Parse(ips[0]);
uint
find_uint32 = BytesToLong(
byte
.Parse(ips[0]),
byte
.Parse(ips[1]),
byte
.Parse(ips[2]),
byte
.Parse(ips[3]));
//BIG_ENDIAN
uint
max_len = 0;
int
resultOffset = -1;
int
resultLegth = -1;
uint
start = index[ip_prefix];
if
(ip_prefix != 255)
{
max_len = index[ip_prefix + 1];
}
else
{
max_len = index[255];
}
uint
num = max_len - start;
uint
my_index = BinarySearch(start, max_len, find_uint32);
start = my_index * 8 + 1024;
resultOffset = (
int
)BytesToLong((
byte
)0, indexBuffer[start + 6], indexBuffer[start + 5], indexBuffer[start + 4]);
//LITTLE_ENDIAN
resultLegth = 0xFF & indexBuffer[start + 7];
// 长度
if
(resultOffset == -1 || resultLegth == -1)
{
return
new
string
[] {
"N/A"
};
}
var areaBytes =
new
byte
[resultLegth];
Array.Copy(dataBuffer, dataLength + resultOffset - 1024, areaBytes, 0, resultLegth);
return
Encoding.UTF8.GetString(areaBytes).Split(
' '
);
}
/// <summary>
/// 二分逼近
/// </summary>
public
uint
BinarySearch(
uint
low,
uint
high,
uint
k)
{
uint
M = 0;
while
(low <= high)
{
uint
mid = (low + high) / 2;
uint
endipNum = GetStartIp(mid);
if
(endipNum >= k)
{
M = mid;
//mid有可能是解
high = mid - 1;
}
else
low = mid + 1;
}
return
M;
}
|
有了上面高效算法 解析出来800多万数据 也很快 再用一个简单的ling 统计一下即可
1
2
3
|
var cn_result= from r
in
list
group r by r.cn into g
select
new
{ key = g.Key, cnt = g.Count() };
|