练习使用更传统的系统 - 不要使用 graph-specific solutions, 比如 GraphQL 或者 图数据库 Neo4j
便利转换索引:
没有上百万 User 和 数十亿 friend 关系的限制,我们可以解决最短路径问题通过使用 BFS 算法
class Graph(Graph):
def shortest_path(self, source, dest):
if source is None or dest is None:
return None
if source is dest:
return [source.key]
prev_node_keys = self._shortest_path(source, dest)
if prev_node_keys is None:
return None
else:
path_ids = [dest.key]
prev_node_key = prev_node_keys[dest.key]
while prev_node_key is not None:
path_ids.append(prev_node_key)
prev_node_key = prev_node_keys[prev_node_key]
return path_ids[::-1]
def _shortest_path(self, source, dest):
queue = deque()
queue.append(source)
prev_node_keys = {source.key: None}
source.visit_state = State.visited
while queue:
node = queue.popleft()
if node is dest:
return prev_node_keys
prev_node = node
for adj_node in node.adj_nodes.values():
if adj_node.visit_state == State.unvisted:
queue.append(adj_node)
prev_node_keys[adj_node.key] = prev_node.key
adj_node.visit_state = State.visited
return Node
我们不能把所有的 User 都放在同一个机器里面,我们需要 分片的 User (通过 Person Server)
而且使用 Lookup Service 去访问他们
adjacent_node
adjecent_node
数据(匹配给定的 id)Lookup Service 实现:
class LookupService(object):
def __init__(self):
self.lookup = self._init_lookup() # key: person_id, value: person_server
def _init_lookup(self):
...
def lookup_person_server(self, person_id):
return self.lookup[person_id]
Person Server 实现:
class PersonServer(object):
def __init__(self):
self.people = {} # key: person_id, value: person
def add_person(self, person):
...
def people(self, ids):
results = []
for id in ids:
if id in self.people:
results.append(self.people[id])
return results
Person 实现:
class Person(object):
def __init__(self, id, name, friend_ids):
self.id = id
self.name = name
self.friend_ids = friend_ids
User Graph Service 实现:
class UserGraphService(object):
def __init__(self, lookup_service):
self.lookup_service = lookup_service
def person(self, person_id):
person_server = self.lookup_service.lookup_person_server(person_id)
return person_server.people([person_id])
def shortest_path(self, source_key, dest_key):
if source_key is None or dest_key is None:
return None
if source_key is dest_key:
return [source_key]
prev_node_keys = self._shortest_path(source_key, dest_key)
if prev_node_keys is None:
return None
else:
# Iterate through the path_ids backwards, starting at dest_key
path_ids = [dest_key]
prev_node_key = prev_node_keys[dest_key]
while prev_node_key is not None:
path_ids.append(prev_node_key)
prev_node_key = prev_node_keys[prev_node_key]
# Reverse the list since we iterated backwards
return path_ids[::-1]
def _shortest_path(self, source_key, dest_key, path):
# Use the id to get the Person
source = self.person(source_key)
# Update our bfs queue
queue = deque()
queue.append(source)
# prev_node_keys keeps track of each hop from
# the source_key to the dest_key
prev_node_keys = {source_key: None}
# We'll use visited_ids to keep track of which nodes we've
# visited, which can be different from a typical bfs where
# this can be stored in the node itself
visited_ids = set()
visited_ids.add(source.id)
while queue:
node = queue.popleft()
if node.key is dest_key:
return prev_node_keys
prev_node = node
for friend_id in node.friend_ids:
if friend_id not in visited_ids:
friend_node = self.person(friend_id)
queue.append(friend_node)
prev_node_keys[friend_id] = prev_node.key
visited_ids.add(friend_id)
return None
我们可以使用 public REST API:
$ curl https://social.com/api/v1/friend_search?person_id=1234
Response:
{
"person_id": "100",
"name": "foo",
"link": "https://social.com/foo",
},
{
"person_id": "53",
"name": "bar",
"link": "https://social.com/bar",
},
{
"person_id": "1234",
"name": "baz",
"link": "https://social.com/baz",
}
我们可以有以下优化点: