Magnetico 磁力信息收集器,Mysql数据库版

一个土耳其程序员正在忙于创建一个名为“magnetico”的匿名和自主托管的BitTorrent搜索引擎。它允许具有互联网连接的用户访问DHT空间中的大量洪流,而不需要依赖任何中央实体。随着进一步的改进,magnetico可能有助于BitTorrent生态系统的完全分散化。

什么是magnetico?

    magnetico利用BitTorrent的DTH来查找内容和重要的对等体。它旨在创建个人torrent搜索引擎,并保持其创建的索引完全私有。

magnetico是第一个为最终用户设计的自主BitTorrent DHT搜索引擎套件。套件由两个包组成:

magneticod:自动BitTorrent DHT抓取器和元数据抓取器。

magneticow:轻薄的web界面。

magneticod在后台运行,并且DTH网络从对等体获取数据,magneticow是用于搜索和查看种子的轻量级Web界面。

直接通过pip安装的magneticod是使用sqlite数据库的,不利于分布式收集、大型数据库使用,这里介绍如果使用mysql来存储收集到的数据。

代码

# magneticod - Autonomous BitTorrent DHT crawler and metadata fetcher.
# Copyright (C) 2017  Mert Bora ALPER 
# Dedicated to Cemile Binay, in whose hands I thrived.
#
# This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General
# Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along with this program.  If not, see
# .
import logging
import pymysql
import time
import typing
import os

from magneticod import bencode
from .constants import PENDING_INFO_HASHES


class Database:

    def __init__(self, database) -> None:
        self.__db_conn = self.__connect(database)

        # We buffer metadata to flush many entries at once, for performance reasons.
        # list of tuple (info_hash, name, total_size, discovered_on)
        self.__pending_metadata = []  # type: typing.List[typing.Tuple[bytes, str, int, int]]
        # list of tuple (info_hash, size, path)
        self.__pending_files = []  # type: typing.List[typing.Tuple[bytes, int, bytes]]

    @staticmethod
    def __connect(database) -> pymysql.Connection:
        db_conn = pymysql.connect("localhost", "root", "password", "dht", charset="utf8")

        # You need create tables by your self.

        # CREATE TABLE `files` (
        #   `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
        #   `torrent_id` int(11) unsigned NOT NULL,
        #   `size` bigint(20) NOT NULL,
        #   `path` text NOT NULL,
        #   PRIMARY KEY (`id`)
        # ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4

        # CREATE TABLE `torrents` (
        #   `id` bigint(11) unsigned NOT NULL AUTO_INCREMENT,
        #   `info_hash` varchar(64) NOT NULL,
        #   `name` text NOT NULL,
        #   `total_size` bigint(20) NOT NULL DEFAULT '0',
        #   `discovered_on` int(11) NOT NULL DEFAULT '0',
        #   `created_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
        #   `updated_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
        #   `actived_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
        #   `visited_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
        #   PRIMARY KEY (`id`),
        #   KEY `info_hash_index` (`info_hash`)
        # ) ENGINE=InnoDB AUTO_INCREMENT=0 DEFAULT CHARSET=utf8mb4

        # And you need update this code for connect your mysql server.
        return db_conn

    def add_metadata(self, info_hash: bytes, metadata: bytes) -> bool:
        files = []
        discovered_on = int(time.time())
        info_hash = info_hash.hex()
        try:
            info = bencode.loads(metadata)  # type: dict

            assert b"/" not in info[b"name"]
            name = info[b"name"].decode("utf-8")

            if b"files" in info:  # Multiple File torrent:
                for file in info[b"files"]:
                    assert type(file[b"length"]) is int
                    # Refuse trailing slash in any of the path items
                    assert not any(b"/" in item for item in file[b"path"])
                    path = "/".join(i.decode("utf-8") for i in file[b"path"])
                    files.append([info_hash, file[b"length"], path])
            else:  # Single File torrent:
                assert type(info[b"length"]) is int
                files.append([info_hash, info[b"length"], name])
        # TODO: Make sure this catches ALL, AND ONLY operational errors
        except (bencode.BencodeDecodingError, AssertionError, KeyError, AttributeError, UnicodeDecodeError, TypeError):
            return False

        self.__pending_metadata.append([info_hash, name, sum(f[1] for f in files), discovered_on])
        self.__pending_files += files

        logging.info("Added: `%s`", name)

        # Automatically check if the buffer is full, and commit to the SQLite database if so.
        if len(self.__pending_metadata) >= PENDING_INFO_HASHES:
            self.__commit_metadata()

        return True

    def is_infohash_new(self, info_hash):
        info_hash = info_hash.hex()
        if info_hash in [x[0] for x in self.__pending_metadata]:
            return False
        cur = self.__db_conn.cursor()
        try:
            cur.execute("SELECT count(info_hash) FROM torrents where info_hash = %s;", [info_hash])
            x, = cur.fetchone()
            ise = (x == 0)
            if not ise:
                cur.execute("UPDATE torrents SET actived_at = now() where info_hash = %s;", [info_hash])
                self.__db_conn.commit()
            return ise
        finally:
            cur.close()

    def __commit_metadata(self) -> None:
        cur = self.__db_conn.cursor()

        # Insert metadata
        for metadata in self.__pending_metadata:
            try:
                # Insert torrents
                cur.execute(
                    "INSERT INTO torrents (info_hash, name, total_size, discovered_on) VALUES (%s, %s, %s, %s);",
                    metadata
                )
                self.__db_conn.commit()
            except pymysql.err.IntegrityError as e:
                self.__db_conn.rollback()
                logging.exception("%s metadata is could NOT commit metadata to the database.", metadata[0])
            except:
                self.__db_conn.rollback()
                logging.exception("%s metadata is could NOT commit metadata to the database.", metadata[0])

        # And insert files
        try:
            cur.executemany(
                "INSERT INTO files (torrent_id, size, path) VALUES ((SELECT id FROM torrents WHERE info_hash=%s), %s, %s);",
                self.__pending_files
            )

            self.__db_conn.commit()
            logging.info("%d metadata (%d files) are committed to the database.", len(self.__pending_metadata), len(self.__pending_files))
            self.__pending_metadata.clear()
            self.__pending_files.clear()
        except:
            self.__db_conn.rollback()
            logging.exception("Could NOT commit metadata to the database! (%d metadata are pending)",
                              len(self.__pending_metadata))
        finally:
            cur.close()

    def close(self) -> None:
        if self.__pending_metadata:
            self.__commit_metadata()
        self.__db_conn.close()

在此之前,你需要通过pip方式安装Magnetico,并安装Mysql 服务器,之后通过pip安装PyMysql这个插件。修改数据库信息后替换Magnetico自带persistence.py文件,一般在/usr/local/lib/python3.5/dist-packages/magneticod目录。

建立两个表,在代码注释中有,建立好索引。

你可能感兴趣的:(Magnetico 磁力信息收集器,Mysql数据库版)