简单画了个图:

分享自己的页游运维架构_第1张图片


首先,后端程序及客户端都是分成三个版本:内部测试版,线上测试版,线上稳定版。线上测试版是小范围更新,经过一天测试没问题,然后再推到线上稳定版,更新其他服,一般游戏也都是按这个流程来更新的。


运维管理后台,记录了区服信息,提供各种简单API接口给各脚本使用。


然后批量维护脚本,create_list.py是根据运维管理后台提供的API,根据输入的参数(平台,区服范围)生成一份cqbyupdate.py需要使用的iplist文件,然后cqbyupdate.py根据这份ip文件执行相应的操作。


saltstack,是用于全服修改一些配置使用,例如批量修改zabbix的配置,批量修改nginx的配置 等等。


rsync,用于数据同步,例如给游戏服拉取最新版本。


游戏服最关键的只有一个control.py脚本,该脚本集成了管理单个游戏区服的所有操作,根据传进去的版本参数及动作参数执行对应的操作。


整套架构的优点是全服维护可用cqbyupdate.py脚本操作,如果临时游戏服上想做些什么更新,可用单服脚本control.py操作,比较灵活;缺点是对中心机依赖比较高,万一中心机岩了,就麻烦大了,所以搞了一台备份中心机。这套架构已经上线开服3000+


control.py单服维护脚本:

#!/usr/bin/python
#coding=utf-8

import subprocess
import shutil
import os
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import optparse
import ConfigParser
import time
import jinja2
import urllib2
import json
import socket
try:
    import fcntl
except:
    pass
import struct
import MySQLdb

def get_ip_address(ifname):
    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    return socket.inet_ntoa(fcntl.ioctl(
        s.fileno(),
        0x8915,  # SIOCGIFADDR
        struct.pack('256s', ifname[:15])
    )[20:24])


class Cqby:

    def __init__(self, version, platform, platformid, id):
        self.version = version
        self.platform = platform
        self.platformid = platformid
        self.id = id

        #工作目录:
        self.workdir = '/data/init'

        #定义游戏程序目录:
        self.gamedir = '/data/game/game%s' % self.id
        try:
            os.makedirs('/data/game')
        except:
            print "目录已存在"

        #当前游戏聊天监控目录:
        self.chatdir = '/data/game/chat%s' % self.id

        #定义游戏端口:
        if int(self.id) > 50000:
            self.gameport = str(self.id)
        else:
            self.gameport = 20000 + int(self.id)
            self.gameport = str(self.gameport)

        try:
            self.localip=get_ip_address('eth0')
        except:
            self.localip=get_ip_address('em1')

        #定义数据库名称:
        self.dbname = 'game%s' % self.id

        #定义管理员使用的数据库帐号密码:
        self.admindbuser = 'root'
        self.admindbpass = '123456'

        #定义备份目录:
        self.backup = '/data/backup'
        try:
            os.makedirs(self.backup)
        except:
            print "目录已经存在"

        #建立日志目录:
        self.gamelogdir = '/data/gamelogs/chuanqi/%s/S%s' % (self.platform, self.id)
        if not os.path.isdir(self.gamelogdir):
            os.makedirs(self.gamelogdir)
        subprocess.call('chown www:www -R /data/gamelogs',shell=True)

        #程序配置文件模板:
        self.binConfigDir = '%s/bin' % self.gamedir
        self.binConfigFiles = ['socket.jinja2']

        self.confConfigDir = '%s/conf' % self.gamedir
        self.confConfigFiles = ['jade.cfg.jinja2']

        self.independentConfigDir = '%s/conf/independent' % self.gamedir
        self.independentConfigFiles = [
            'auth.properties.jinja2',
            'debug.properties.jinja2',
            'fcm.properties.jinja2',
            'gm.properties.jinja2',
            'net.properties.jinja2',
            'server.properties.jinja2',
            'whiteList.properties.jinja2',
            'onlineLimit.properties.jinja2',
        ]

        self.miscConfigDir = '%s/conf/config/common' % self.gamedir
        self.miscConfigFiles = [
            'misc.properties.jinja2',
        ]

        #数据库权限:
        baselist = ['127.0.0.1',]
        payIPListAll = {
            '37wan': [],
            'liebao': [],
            '2345': [],
            'yilewan': [],
            'renrenwang': [],
            '6711': [],
            '1360': [],
            'duowan': [],
            'baidu': [],
            'lianyun': [],
            'tencent': []
        }
        try:
            self.platformPayList = payIPListAll[self.platform]
        except:
            self.platformPayList = payIPListAll['lianyun']

        self.payList = baselist + self.platformPayList

        self.mergelist = self.__getMerge()

    def __getMerge(self):
        '''获取合服列表'''
        i = 0
        while True:
            try:
                if i >= 3:
                    print "请求超时!!!!!!"
                    sys.exit(2)
                url = 'http://yw.admin.xxx.com/yunwei/api/getmergetarget/%s/%s/' % (self.platform, self.id)
                request = urllib2.urlopen(url)
                response = request.read().split(',')
            except Exception, e:
                print "请求合服信息失败:" + str(e)
                print "正在重试。。。"
                i = i + 1
            else:
                break
        return response

    def createDatabase(self):
        '''创建数据库'''
        try:
            print "正在创建数据库:%s" % self.dbname
            cmd = ''' /usr/local/mysql/bin/mysql -u'%s' -p'%s' -e "create database %s DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci" ''' % (self.admindbuser, self.admindbpass, self.dbname)
            ret = subprocess.call(cmd,shell=True)
            print "执行状态:%s" % ret
            if ret:
                print "创建数据库失败,请确认!"
                sys.exit(2)
        except Exception,e:
            print "捕捉到异常:",e
            sys.exit(2)

    def updateDB(self, filename):
        ''' 导入数据库文件 '''
        try:
            print "正在导入SQL文件:%s" % filename
            cmd = ''' /usr/local/mysql/bin/mysql -u'%s' -p'%s' %s < %s ''' % (self.admindbuser, self.admindbpass, self.dbname, filename)
            ret = subprocess.call(cmd, shell=True)
            print "执行状态:%s" % ret
        except Exception,e:
            print "捕捉到异常:",e
            sys.exit(2)

    def dumpDatabase(self):
        ''' 备份数据库 '''
        try:
            print "正在备份数据库:%s" % self.dbname
            curTime = time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))
            cmd = ''' /usr/local/mysql/bin/mysqldump -u'%s' -p'%s' %s > %s ''' % (self.admindbuser, self.admindbpass, self.dbname, '%s/%s-%s.sql' % (self.backup,curTime,self.dbname))
            ret = subprocess.call(cmd, shell=True)
            print "执行状态:%s" % ret
        except Exception,e:
            print "捕捉到异常:",e

    def dropDatabase(self):
        ''' 删除数据库 '''
        try:
            print "正在删除数据库:%s" % self.dbname
            cmd = ''' /usr/local/mysql/bin/mysql -u'%s' -p'%s' -e "drop database %s" ''' % (self.admindbuser, self.admindbpass, self.dbname)
            ret = subprocess.call(cmd, shell=True)
            print "执行状态:%s" % ret
        except Exception,e:
            print "捕捉到异常:",e

    def createGameDir(self):
        ''' 创建游戏目录 '''
        try:
            print "正在检测目录是否存在:%s" % self.gamedir
            if os.path.isdir(self.gamedir):
                print "目录已存在,请检查参数!"
                sys.exit(2)
            else:
                print "正在复制程序文件至:%s" % self.gamedir
                shutil.copytree('%s/%s/server' % (self.workdir, self.version), self.gamedir)
        except Exception,e:
            print "捕捉到异常:",e
            sys.exit(2)

    def dropGameDir(self):
        ''' 清理游戏目录 '''
        try:
            print "正在删除游戏目录:%s" % self.gamedir
            if os.path.isdir(self.gamedir):
                shutil.rmtree(self.gamedir)
        except Exception,e:
            print "遇到错误:",e

    def dropGameLogDir(self):
        ''' 清理游戏日志目录 '''
        try:
            print "正在删除日志目录:%s" % self.gamelogdir
            if os.path.isdir(self.gamelogdir):
                shutil.rmtree(self.gamelogdir)
        except Exception,e:
            print "遇到错误:",e

    def createConfig(self, configdir, configlist):
        '''创建程序配置'''
        try:
            print "正在生成配置文件:%s" % configdir
            url = 'http://yw.admin.xxx.com/yunwei/api/getmem/%s/%s' % (self.platform, self.id)
            response = urllib2.urlopen(url)
            mem = response.read()
            env = jinja2.Environment(loader=jinja2.FileSystemLoader(configdir))
            for gateconfig in configlist:
                print gateconfig
                template = env.get_template(gateconfig)
                f = open('%s/%s' % (configdir,gateconfig.rstrip('.jinja2')), 'w')
                f.write(
                    template.render(
                        version=self.version,
                        platformid=self.platformid,
                        platform=self.platform,
                        gameid=self.id,
                        gameport=self.gameport,
                        gamedir=self.gamedir,
                        dbuser='game',
                        dbpass='game123456',
                        dbname=self.dbname,
                        paylist=self.platformPayList,
                        mem=mem,
                        mergelist=self.mergelist,
                    )
                )
                f.close()
        except Exception,e:
            print "生成配置文件遇到错误:",e
            sys.exit(2)

    def updateconfig(self):
        self.createConfig(self.binConfigDir, self.binConfigFiles)
        os.chmod('%s/bin/socket' % self.gamedir,0755) 
        self.createConfig(self.confConfigDir, self.confConfigFiles)
        self.createConfig(self.independentConfigDir, self.independentConfigFiles)
        #self.createConfig(self.miscConfigDir, self.miscConfigFiles)

    def updategame(self):
        print "正在更新游戏程序。。。"
        cmd = ''' rsync -avzP --exclude="socket" --exclude="log"  --exclude="onlineLimit.properties" --exclude="jade.cfg" --exclude="auth.properties" --exclude="debug.properties" --exclude="fcm.properties" --exclude="gm.properties" --exclude="net.properties" --exclude="server.properties" --exclude="whiteList.properties"  %s/%s/server/  %s/ ''' % (self.workdir,self.version,self.gamedir)
        print cmd
        result = subprocess.call(cmd, shell=True)
        return result

    def start(self):
        print "给JSVC添加执行权限:"
        os.chmod('%s/bin/jsvc' % self.gamedir,0755)
        print "正在启动服务:"
        cmd = '''cd %s/bin ; ./socket  start ''' % self.gamedir
        result = subprocess.call(cmd, shell=True)
        return result
 
    def stop(self):
        print "正在关闭服务:"
        cmd = '''cd %s/bin ; ./socket  stop ''' % self.gamedir
        result = subprocess.call(cmd, shell=True)
        return result

    def clearnow(self):

        self.dumpDatabase()

        self.updateDB('%s/%s/server/sql/database.sql' % (self.workdir,self.version))

        self.dropGameLogDir()

    def clear(self):
        try:
            conn = MySQLdb.connect(user=self.admindbuser, passwd=self.admindbpass, host='localhost', db=self.dbname, unix_socket='/tmp/mysql.sock')
            cursor = conn.cursor(cursorclass = MySQLdb.cursors.DictCursor)
            sql = ''' select * from Player '''
            sum = cursor.execute(sql)
            cursor.close()
            conn.close()
            print "数据库Player表有:%s" % sum
            if int(sum) > 30:
                print "Player表记录总数大于30!请确认后再执行清档操作!!!"
                sys.exit(2)
            else:
                print "Player表记录总数小于30,可以执行清档操作!"
                self.stop()
                self.clearnow()
                self.start()
        except Exception,e:
            print "连接数据库错误:%s" % e
            sys.exit(2)


    def create(self):
        '''一键搭服'''
        self.createDatabase()
        self.updateDB('%s/%s/server/sql/database.sql' % (self.workdir,self.version))
        self.mysqlgrant()
        self.createGameDir()
        self.updateconfig()
        self.createchat()
        self.nginxlogs()

    def drop(self):
        self.dumpDatabase()
        self.dropDatabase()
        self.dropGameDir()
        self.dropGameLogDir()
        self.dropchat()

    def onekey(self):
        '''一键更新'''
        self.stop()
        time.sleep(10)
        self.updategame()
        self.start()

    def mysqlgrant(self):
        '''添加数据库授权'''
        print "正在添加数据库授权:"

        for ip in self.payList:
            print "正在添加%s权限" % ip
            cmd = ''' /usr/local/mysql/bin/mysql -u'%s' -p'%s' -e "grant all privileges on *.* to game@'%s' Identified by 'cqbygame'" ''' % (self.admindbuser, self.admindbpass, ip)
            subprocess.call(cmd, shell=True)

        cmd = ''' /usr/local/mysql/bin/mysql -u'%s' -p'%s' -e "grant select on *.* to db@'119.131.244.178' identified by 'lizhenjie';" ''' % (self.admindbuser, self.admindbpass)
        subprocess.call(cmd, shell=True)

if __name__ == "__main__":
    active_list = ['create', 'drop', 'updateconfig', 'start', 'stop', 'clear', 'updategame', 'updateDB','onekey','mysqlgrant','clearnow']
    gamever_list = ['test','37dev','37stable']

    usage = ''' usage: %prog -p platform
                       %prog -v version -i id -a action
                       %prog -v version -i id -a updateDB -s sqlfile
    '''
    parser = optparse.OptionParser(
        usage   = usage,
        version = "%prog 2.0"
    )

    setplat_opts = optparse.OptionGroup(
        parser, '设置服务器平台标识',
        '一台硬件服务器设置一次即可。'
    )

    setplat_opts.add_option(
        '-p','--platform',
        dest="platform",
        help="平台名称"
    )

    parser.add_option_group(setplat_opts)

    tools_opts = optparse.OptionGroup(
        parser, '服务器日常功能',
    )

    tools_opts.add_option(
        '-v','--ver',
        dest="ver",
        help="版本目录",
        type="choice" ,
        choices=gamever_list,
        default=gamever_list[1]
    )
    tools_opts.add_option(
        '-i','--id',
        dest='id',
        help="服务器ID"
    )
    tools_opts.add_option(
        '-a','--action',
        dest='action',
        help="执行动作",
        type="choice" ,
        choices=active_list
    )
    tools_opts.add_option(
        '-s','--sql',
        dest='sql',
        help="SQL文件(可选,配合updateDB使用)"
    )

    parser.add_option_group(tools_opts)

    options, args = parser.parse_args()

    err_msg = '参数不对,请输--help查看详细说明!'

    ini = 'platform.ini'
    if options.platform:
        apiurl = 'http://yw.admin.xxx.com/yunwei/api/getplatforminfo/'
        ini = 'platform.ini'
        result = urllib2.urlopen(apiurl)
        response = json.loads(result.read())
        for code, id in response.items():
            if options.platform == code:
                platformid = id
                print "正在设置服务器标识为:%s-%s" % (platformid, options.platform)
                cfd = open(ini, 'w')
                conf = ConfigParser.ConfigParser()
                conf.add_section('platforminfo')
                conf.set('platforminfo','name',options.platform)
                conf.set('platforminfo','id',platformid)
                conf.write(cfd)
                cfd.close()
                break
        sys.exit(0)

    if options.id and options.ver and options.action:

        cf = ConfigParser.ConfigParser()
        cf.read(ini)
        platform = cf.get('platforminfo','name')
        platformid = cf.get('platforminfo','id')

        cqby = Cqby(options.ver, platform, platformid, options.id)
        run_function = getattr(cqby,options.action)

        if options.action in ['updateDB',]:
            run_function('%s/server/sql/%s' % (options.ver,options.sql))
        else:
            run_function()
    else:
        parser.error(err_msg)


cqbyupdate.py批量维护脚本:

#!/usr/bin/python
#coding:utf-8

import threading
import Queue
import subprocess
import optparse
import logging
import logging.config
import datetime
import os
import sys

reload(sys)
sys.setdefaultencoding('utf-8')

#test:
import time


#logging.basicConfig(level = logging.DEBUG,format='(%(threadName)-10s) %(message)s',)
logging.config.fileConfig("logger.conf")
logger = logging.getLogger("root")
logger2 = logging.getLogger("file")
queue = Queue.Queue()

Failed_List = []

class Ahdts(threading.Thread):
    def __init__(self, queue):
        super(Ahdts,self).__init__()
        self.queue = queue
        self.workdir = '/data/init'

        #建立日志目录:
        log_path = 'updatelog'
        today = datetime.date.today()
        self.log_path_today = '%s/%s' % (log_path,today)
        if not os.path.isdir(self.log_path_today):
            try:
                os.makedirs(self.log_path_today)
            except Exception,e:
                print e
                sys.exit(2)

    def run(self):
        while True:
            global action
            global sqlfile

            item = self.queue.get()

            value = item.strip().split(',')
            platform = value[0]
            id = value[1]
            ip = value[2]
            port = value[3]
            opentime = value[4]

            logging.debug("%10s %6s %15s %15s %10s ThreadingStart!" % (platform,id,ip,action,ver))


            if action == 'rsync':
                cmd = ''' cd %s ; ./rsync ''' % self.workdir
            elif action == 'ntp':
                cmd = ''' cd %s ; ./TimeClient.py ''' % self.workdir
            elif action in ['updateDB',]:
                cmd = '''  cd %s ; ./control.py -i %s -a %s -v %s -s %s ''' % (self.workdir, id, action, ver, sqlfile)
            elif action == 'platform':
                cmd = '''  cd %s ; ./control.py -p %s ''' % (self.workdir, platform)
            else:
                cmd = '''  cd %s ; ./control.py -i %s -a %s -v %s ''' % (self.workdir, id, action, ver)
            sshcmd = ''' ssh root@%s -n "%s" ''' % (ip, cmd)
            with open('%s/%s-%s-%s-%s.log' % (self.log_path_today, platform, id, ver, action), 'a') as logfile:
                exitcode = subprocess.call(sshcmd,shell=True,stdout=logfile, stderr=subprocess.STDOUT)

            if exitcode == 0:
                logger2.debug('%10s %6s %15s %15s %10s %s' % (platform, id, ip, action, ver, cmd))

            rettxt = '%10s %6s %15s %15s %10s ThreadingEnd! ExitCode:%s' % (platform,id,ip,action,ver,exitcode)

            if exitcode:
                Failed_List.append(rettxt)

            logging.debug(rettxt)

            self.queue.task_done()


if __name__ == "__main__":
    action_list = ['rsync','create','drop','start','stop','clear','updateconfig','updategame','updateDB','onekey']
    gamever_list = ['test','37dev','37stable']

    usage = ''' usage: %prog --file  --action 
    Forexample: %prog -f game-test.ini -a create
                %prog -f game-test.ini -a onekey
                %prog -f game-test.ini -a updateDB -s test.sql
    '''
    parser = optparse.OptionParser(
        usage   = usage,
        version = "%prog 1.4"
    )

    parser.add_option('-f','--file',dest="file",help="IP文件列表")
    parser.add_option('-a','--action',dest="action",help="执行动作",type="choice",choices=action_list)
    parser.add_option('-v','--ver', dest='ver',help="版本目录标识",type="choice",choices=gamever_list)
    parser.add_option('-s','--sql', dest='sql',help="待更新的SQL文件")
    options, args = parser.parse_args()

    err_msg = '参数不对,请输--help查看详细说明!'

    if options.action and options.ver and options.file:
        with open(options.file) as file:
            content = file.readlines()

        action = options.action
        ver = options.ver
        sqlfile = options.sql

        maxThreadNum = 200
        if len(content) < 100:
            maxThreadNum = len(content)

        for i in range(maxThreadNum):
            t = Ahdts(queue)
            t.setDaemon(True)
            t.start()

        logging.debug("%10s %6s %15s %15s %10s" % ('PlatForm','ID','IP','Action','Version'))

        iplist = []
        for i in content:
            ii = i.strip().split(',')
            ip = ii[2]
            if action in ['rsync','platform'] and ip in iplist:
                continue
            queue.put(i)
            iplist.append(ip)
        queue.join()

        #打印执行失败列表:
        print '=' * 20 + '执行失败列表' + '=' * 20
        if Failed_List:
            for i in Failed_List:
                print i
        else:
            print "None"
        print '=' * 52

        logging.debug("Done")
    else:
        print err_msg


批量维护脚本其实就是ssh远程过去游戏服执行control.py脚本,后面看能不能改成用socket的方式去连接,把socket的东西练练手,整套东西感觉还是比较简单。