注1:以下所有代码块均可直接在https://dbpedia.org/sparql中运行。
注2:SPARQL对大小写不敏感。
前缀汇总:
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
查询DBpedia中Class数量。
SELECT COUNT(DISTINCT ?class_name) AS ?total_num
WHERE {
?class_name rdf:type owl:Class
}
查询所有存在的Class。
SELECT *
WHERE {
?class_name rdf:type owl:Class
}
查询所有存在实体的Class类别。
SELECT (COUNT(DISTINCT ?Class_name) AS ?Class_num)
WHERE {
?Class_name rdf:type owl:Class.
?entity_name rdf:type ?Class_name.
}
查询所有存在实体的主类(rdf:type为owl:Thing)。
SELECT ?main_class (COUNT(DISTINCT ?entity_name) AS ?entity_num)
WHERE{
?main_class rdf:type owl:Class.
?main_class rdfs:subClassOf owl:Thing.
?entity_name rdf:type ?main_class.
}
GROUP BY ?main_class
ORDER BY desc (?entity_num)
查询每个Class及其实体数量并按照实体数量降序排序(此查询返回的Class类别不包括不含有实体的Class类别)。
SELECT ?Class_name (COUNT(?entity_name) AS ?entity_num)
WHERE {
?Class_name rdf:type owl:Class.
?entity_name rdf:type ?Class_name.
}
GROUP BY ?Class_name
ORDER BY desc (?entity_num)
查询Work类中的实体数量。
SELECT *
WHERE {
?entity_name rdf:type dbo:Work.
}
或(其余前缀均可按照类似方式使用。)
SELECT COUNT(DISTINCT ?entity_name)
WHERE {
?entity_name rdf:type <http://dbpedia.org/ontology/Work>.
}
查询Work类中具备英文摘要的实体数量。
SELECT COUNT(DISTINCT ?entity_name)
WHERE {
?entity_name rdf:type <http://dbpedia.org/ontology/Work>.
?entity_name dbo:abstract ?abstract.
FILTER(LANGMATCHES(LANG(?abstract), "en"))
}
查询Work类中不具备英文摘要的实体数量(注意语言匹配中用"en"或"EN"均可)。
SELECT COUNT(DISTINCT ?entity_name)
WHERE {
?entity_name rdf:type <http://dbpedia.org/ontology/Work>.
OPTIONAL{
?entity_name dbo:abstract ?abstract.
FILTER(LANGMATCHES(LANG(?abstract), "en"))
}
FILTER(!bound(?abstract))
}
查询每个Class中具有英文摘要的实体数量(此查询所需时间较长,建议将SPARQL嵌入代码,每次查询仅对一个Class执行)。
SELECT ?Class_name (COUNT(DISTINCT ?entity_name) AS ?entity_num_with_en_abstract)
WHERE {
?Class_name rdf:type owl:Class.
?entity_name rdf:type ?Class_name.
?entity_name dbo:abstract ?abstract.
FILTER(LANGMATCHES(LANG(?abstract), "en"))
}
GROUP BY ?Class_name
ORDER BY desc (?entity_num_with_en_abstract)
综合应用实例
from SPARQLWrapper import SPARQLWrapper, JSON
import json
sparql = SPARQLWrapper("http://dbpedia.org/sparql")
sparql.setQuery("""
SELECT ?main_class (COUNT(DISTINCT ?entity_name) AS ?entity_num)
WHERE{
?main_class rdf:type owl:Class.
?main_class rdfs:subClassOf owl:Thing.
?entity_name rdf:type ?main_class.
}
GROUP BY ?main_class
ORDER BY desc (?entity_num)
""")
sparql.setReturnFormat(JSON)
sparql.setTimeout(9223372036)
results = sparql.query().convert()
class_dict = {
}
for result in results["results"]["bindings"]:
if 'http://dbpedia.org/ontology/' in result['main_class']['value']:
class_dict[result['main_class']['value']] = -1
f = open('/home/njucs/Desktop/class_dict_keys.txt', 'w')
for item in class_dict.keys():
print(item.ljust(50))
print(item.ljust(50), file = f)
f.close()
f = open('/home/njucs/Desktop/class_dict.txt', 'w')
seq = 0
for key in class_dict.keys():
sparql = SPARQLWrapper("http://dbpedia.org/sparql")
sparql.setQuery("""
SELECT (COUNT(DISTINCT ?entity_name) AS ?num)
WHERE {
?entity_name rdf:type <""" + '{0}'.format(key) + """>.
?entity_name dbo:abstract ?abstract.
FILTER(LANGMATCHES(LANG(?abstract), "en"))
}
""")
sparql.setReturnFormat(JSON)
sparql.setTimeout(9223372036)
results = sparql.query().convert()
class_dict[key] = int(results["results"]["bindings"][0]["num"]["value"])
seq += 1
print(seq, "\t", key.ljust(50), class_dict[key])
print(key, class_dict[key], file=f)
f.close()
json_str = json.dumps(class_dict)
f = open('/home/njucs/Desktop/class_dict.json', 'w')
f.write(json_str)
f.close()
keys = list(class_dict.keys())
values = list(class_dict.values())
ordered_list = list(zip(keys, values))
ordered_list = sorted(ordered_list, key = (lambda x:x[1]), reverse = True)
json_str = json.dumps(ordered_list)
f = open('/home/njucs/Desktop/ordered_list.json', 'w')
f.write(json_str)
f.close()