在Openstack中实现节点高可用的方案中,探测物理节点宕机现象,在程序中调用了socket RAW 协议,但是必须具备root 用户权限,而openstack程序调用是nova用户,于是做了这样的事情,希望能帮助大家
1, 程序调用的错误
2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task Traceback (most recent call last): 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task File"/usr/lib/python2.6/site-packages/nova/openstack/common/periodic_task.py", line 180, inrun_periodic_tasks 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task task(self, context) 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task File"/usr/lib/python2.6/site-packages/nova/extend/manager.py", line 91, inhealth_check_host 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task ifnot icmp_ping.icmp_ping(node_resource[s]['host_ip']) and\ 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task File"/usr/lib/python2.6/site-packages/nova/extend/icmp_ping.py", line 157, inicmp_ping 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task delay = do_one(dest_addr, CONF.timeout) 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task File"/usr/lib/python2.6/site-packages/nova/extend/icmp_ping.py", line 136, indo_one 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task ping_socket =socket.socket(socket.AF_INET, socket.SOCK_RAW, icmp) 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task File"/usr/lib/python2.6/site-packages/eventlet/greenio.py", line 116, in__init__ 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task fd =_original_socket(family_or_realsock, *args, **kwargs) 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task File"/usr/lib64/python2.6/socket.py", line 184, in__init__ 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task _sock =_realsocket(family, type, proto) 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task error: [Errno 1] Operation notpermitted
2,icmp_ping 的实现,这是网上一个哥们写的ICMP 的实,下次找到贴上去,,我拿过来做了一些修改,希望作者不要见怪
URL: https://mail.python.org/pipermail/tutor/2009-November/072706.html
# vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2010 United States Government as represented by the # Administrator of the National Aeronautics and Space Administration. # All Rights Reserved. # Copyright (c) 2010 Citrix Systems, Inc. # Copyright 2011 Ken Pepple # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. """ To detect physical nodes is active, according to the sock. Write the daemon ICMP protocol """ importos importsys #import socket importstruct importselect importtime importtraceback fromoslo.config importcfg fromnova.openstack.common importlog as logging fromeventlet.green importsocket icmp_opts =[ cfg.StrOpt('timeout', default=3, help='Setting socket timeout'), cfg.StrOpt('count', default=9, help='Setting the IMCP PING'), ] CONF =cfg.CONF CONF.register_opts(icmp_opts) LOG =logging.getLogger(__name__) # From /usr/include/linux/icmp.h; your milage may vary. ICMP_ECHO_REQUEST =8# Seems to be the same on Solaris. defchecksum(source_string): """ I'm not too confident that this is right but testing seems to suggest that it gives the same answers as in_cksum in ping.c """ sum=0 countTo =(len(source_string)/2)*2 count =0 whilecount<countTo: thisVal =ord(source_string[count +1])*256+ord(source_string[count]) sum=sum+thisVal sum=sum& 0xffffffff# Necessary? count =count +2 ifcountTo<len(source_string): sum=sum+ord(source_string[len(source_string) -1]) sum=sum& 0xffffffff# Necessary? sum=(sum>> 16) + (sum& 0xffff) sum=sum+(sum>> 16) answer =~sum answer =answer & 0xffff # Swap bytes. Bugger me if I know why. answer =answer >> 8| (answer << 8& 0xff00) returnanswer defreceive_one_ping(ping_socket, ID, timeout): """ receive the ping from the socket. """ timeLeft =timeout whileTrue: startedSelect =time.clock() whatReady =select.select([ping_socket], [], [], timeLeft) howLongInSelect =(time.clock() -startedSelect) ifwhatReady[0] ==[]: # Timeout return timeReceived =time.clock() recPacket, addr =ping_socket.recvfrom(1024) icmpHeader =recPacket[20:28] type, code, checksum, packetID, sequence =struct.unpack( "bbHHh", icmpHeader ) ifpacketID ==ID: bytesInDouble =struct.calcsize("d") timeSent =struct.unpack("d", recPacket[28:28+bytesInDouble])[0] returntimeReceived -timeSent timeLeft =timeLeft -howLongInSelect iftimeLeft <=0: return defsend_one_ping(ping_socket, dest_addr, ID): """ Send one ping to the given >dest_addr<. """ dest_addr = socket.gethostbyname(dest_addr) # Header is type (8), code (8), checksum (16), id (16), sequence (16) my_checksum =0 # Make a dummy heder with a 0 checksum. header =struct.pack("bbHHh", ICMP_ECHO_REQUEST, 0, my_checksum, ID, 1) bytesInDouble =struct.calcsize("d") data =(192-bytesInDouble) *"Q" data =struct.pack("d", time.clock()) +data # Calculate the checksum on the data and the dummy header. my_checksum =checksum(header +data) header =struct.pack( "bbHHh", ICMP_ECHO_REQUEST, 0, socket.htons(my_checksum), ID, 1 ) packet =header +data ping_socket.sendto(packet, (dest_addr, 1)) # Don't know about the 1 defdo_one(dest_addr, timeout): """ Returns either the delay (in seconds) or none on timeout. """ icmp =socket.getprotobyname("icmp") try: ping_socket =socket.socket(socket.AF_INET, socket.SOCK_RAW, icmp) #ping_socket = socket.socket(socket.AF_INET, socket.SOCK_RAW, icmp) exceptsocket.error, (errno, msg): iferrno ==1: # Operation not permitted msg =msg +( " - Note that ICMP messages can only be sent from processes" " running as root." ) LOG.error(socket.error(msg)) raise# raise the original error my_ID =os.getpid() & 0xFFFF send_one_ping(ping_socket, dest_addr, my_ID) delay =receive_one_ping(ping_socket, my_ID, timeout) ping_socket.close() returndelay deficmp_ping(dest_addr): active =False fori inxrange(CONF.count): try: delay = do_one(dest_addr, CONF.timeout) exceptsocket.gaierror, e: active =False break ifdelay == None: active =False else: active =True return active
3,我单独写了一个openstack 的组件,来实现高可用(nova-extend),用他来调用 ICMP_PING
"""Starter script for Nova Extend.""" importsys fromoslo.config importcfg fromnova importconfig fromnova importobjects fromnova.openstack.common importlog as logging fromnova importservice fromnova importutils CONF =cfg.CONF CONF.import_opt('topic', 'nova.extend.api', group='extend') defmain(): objects.register_all() config.parse_args(sys.argv) logging.setup("nova") utils.monkey_patch()
4, nova-extend 启动服务的时候会调用nova.extend.manager.ExtendManager
ifnot icmp_ping.icmp_ping(node_resource[s]['host_ip']) and\ utils.is_false(CONF.extend_high_availability): LOG.info("The compute node [%s] is down "%s) instances_uuid =db.instances_uuid_by_host(context,s)
5, 程序就会报错,
2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task Traceback (most recent call last): 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task File"/usr/lib/python2.6/site-packages/nova/openstack/common/periodic_task.py", line 180, inrun_periodic_tasks 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task task(self, context) 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task File"/usr/lib/python2.6/site-packages/nova/extend/manager.py", line 91, inhealth_check_host 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task ifnot icmp_ping.icmp_ping(node_resource[s]['host_ip']) and\ 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task File"/usr/lib/python2.6/site-packages/nova/extend/icmp_ping.py", line 157, inicmp_ping 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task delay = do_one(dest_addr, CONF.timeout) 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task File"/usr/lib/python2.6/site-packages/nova/extend/icmp_ping.py", line 136, indo_one 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task ping_socket =socket.socket(socket.AF_INET, socket.SOCK_RAW, icmp) 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task File"/usr/lib/python2.6/site-packages/eventlet/greenio.py", line 116, in__init__ 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task fd =_original_socket(family_or_realsock, *args, **kwargs) 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task File"/usr/lib64/python2.6/socket.py", line 184, in__init__ 2014-03-1309:33:08.4081916TRACE nova.openstack.common.periodic_task _sock =_realsocket(family, type, proto) 2
6 ,我用python 调用是OK 的,但是openstack调用依然报错,我以为是协程限制导致,岂不是,我是这样处理的
1, copy /usr/bin/python /usr/bin/python-extend
2, chown root:root /usr/bin/python-extend
3, chmod ug+s /usr/bin/python-extend
4, 修改/usr/bin/nova-extend
#!/usr/bin/python-extend
5,重启服务就OK 了