1.在pg库主机上部署,每5分钟执行一次,插入到我的测试pg库内
[root@mysqltest tina_shell]# cat jk_pg.sh
#!/bin/bash
#适用于中转库192.168.12.8和12.2
running_port=`netstat -nat|grep "LISTEN"|grep "5432"|sed -n 2p|awk -F : '{print $4}'`
jk_host=`ifconfig |grep "inet addr:192.168"|awk '{print $2}'|awk -F : '{print $2}'`
record_time=`date +"%Y-%m-%d %H:%M:%S"`
waiting_count=`ps -ef|grep postgres|grep -v startup |grep waiting|wc -l`
streaming=`ps -ef|grep wal|grep streaming |awk '{print $15}'`
#tbjk=`ps -ef|grep postgres|grep startup|grep waiting|wc -l`
cipan=`df -ah |grep % |grep -v tmpfs|grep -v boot`
usersum=`ps -ef|grep postgres |grep -E "engine|fenxi|sqluser" |wc -l`
#echo $jk_host $record_time $waiting_count $streaming $tbjk >>/tmp/pg_check_state.log
psql -h 192.168.12.31 -U postgres -p 1922 -d tina -c "insert into jk_pg(jk_host,record_time,waiting_count,streaming,running_port,cipan,usersum) values('$jk_host','$record_time','$waiting_count','$streaming','$running_port','$cipan','$usersum');"
2.部署crontab
cat /etc/crontab
0 20 * * * root sh /tina_shell/backup.sh
4 * * * * root sh /tina_shell/pg_delete_archivelog.sh
*/5 * * * * root sh /tina_shell/jk_pg.sh
3.建表
CREATE TABLE jk_pg
(
id serial NOT NULL,
jk_host character varying, -- 监控主机的ip地址
record_time timestamp without time zone, -- 监控的时间
waiting_count integer, -- 发生waiting等待的进程数ps -ef|grep postgres|grep -v startup |grep waiting|wc -l
streaming character varying, -- 正在进行同步的日志ps -ef|grep wal|grep streaming |awk '{print $13}'
usersum integer, -- 当前连接用户总数(sqluser、engine、fenxi)
tbjk integer, -- ps -ef|grep postgres|grep startup|grep waiting|wc -l
running_port integer, -- 检测pg运行是否正常,如果没有显示5432端口,那pg就挂了
cipan character varying, -- 磁盘情况
locks character varying, -- 锁表情况
beizhu character varying -- 填写一些异常的备注
)
WITH (
OIDS=FALSE
);
COMMENT ON TABLE jk_pg IS '自制监控表-tina';
查看监控数据
tina=# select * from jk_pg order by record_time desc,jk_host desc limit 4;
id | jk_host | record_time | waiting_count | streaming | usersum | tbjk | running_port | cipan | locks | beizhu
------+----------------+---------------------+---------------+--------------+---------+------+--------------+------------------------------------------------------+-------+--------
7654 | 192.168.12.2 | 2016-01-13 11:00:01 | 0 | F2B/CE5349B0 | 161 | | 5432 | Filesystem Size Used Avail Use% Mounted on +| |
| | | | | | | | /dev/sda2 104G 21G 78G 22% / +| |
| | | | | | | | /dev/sdc1 917G 540G 331G 63% /opt/db_backup+| |
| | | | | | | | /dev/sdb 939G 370G 522G 42% /home/pgsql | |
7655 | 192.168.12.1 | 2016-01-13 11:00:01 | 0 | F2B/CEE173E8 | 26 | 0 | 5432 | Filesystem Size Used Avail Use% Mounted on +| |
| | | | | | | | /dev/sda3 103G 6.1G 92G 7% / +| |
| | | | | | | | /dev/sdb1 939G 285G 606G 32% /home/pgsql | |
7653 | 192.168.12.8 | 2016-01-13 11:00:01 | 0 | | 30 | | 5432 | Filesystem Size Used Avail Use% Mounted on +| |
| | | | | | | | /dev/sda3 27G 1.9G 24G 8% / +| |
| | | | | | | | /dev/sda2 29G 4.1G 24G 15% /var +| |
| | | | | | | | /dev/sdb1 252G 118G 122G 50% /home | |
2)pg统计库所有表的行数
[root@pg-ro tmp]# cat tinadb.sh
#!/bin/bash
#2015-11-3 tina
date=`date +"%Y-%m-%d %H:%M:%S"`
echo "begin time is: $date" >>/tmp/tongji.log
tables=$(psql -U postgres -d tinadb -c "select tablename from pg_tables where schemaname='public' order by tablename;"|grep -v "tablename" |grep -v "rows"|grep -v "\-")
#echo $tables >>/tmp/tongji.log
for table in $tables
do
echo $table >>/tmp/tongji.log
psql -U postgres -d tinadb -c "select count(*) from $table;" |grep -v "count" |grep -v "row"|grep -v "\-">>/tmp/tongji.log
done
#echo "ok!" >>/tmp/tongji.log
查看--并直接粘贴到execl表格中
[root@pg-ro tmp]# cat /tmp/tongji.log |awk 'NF==1{printf "%s ", $1;next}1'
begin time is: 2015-11-03 14:12:12
t1 11024
t2 8267537
t3 1684
t4 2
统计其他库,直接用vi替换功能替换db名即可:
替换 :%s/tinadb/dbname/g
3)pg 定期vacuum和reindex脚本
[root@pg tina_shell]# cat pg_tinadb_vacuum.sh
#!/bin/bash
#2014-10-22 tina
date=`date +"%Y-%m-%d %H:%M:%S"`
echo "begin time is: $date" >>/tmp/pg_tinadb_vacuum.log
tables=$(psql -U postgres -d tinadb -c "select tablename from pg_tables where schemaname='public';" |grep -v "tablename" |grep -v "rows"|grep -v "\-")
echo $tables >>/tmp/pg_tinadb_vacuum.log
indexes=$(psql -U postgres -d tinadb -c "select indexname from pg_indexes where schemaname='public' and indexname not like '%pkey';"|grep -v "indexname"|grep -v "\-" |grep -v "row")
for table in $tables
do
psql -U postgres -d tinadb -c "vacuum full $table;">>/tmp/pg_tinadb_vacuum.log
echo "table $table has finished vacuum.">>/tmp/pg_tinadb_vacuum.log
done
for index in $indexes
do
psql -U postgres -d tinadb -c "reindex index $index;">>/tmp/pg_tinadb_vacuum.log
echo "index $index has finished reindex.">>/tmp/pg_tinadb_vacuum.log
done
查看后台日志:
[root@pg tmp]# tail -f pg_tinadb_vacuum.log
begin time is: 2016-01-13 11:38:26
VACUUM
table t1 has finished vacuum.
VACUUM
table t2 has finished vacuum.
VACUUM
table t3 has finished vacuum.
VACUUM
table t4 has finished vacuum.
REINDEX
index t1_rin_idx has finished reindex.
建议:如果库中存在大表,就单独手动操作,不然可能会导致执行时长时间锁表,影响其他业务。
4)pg日常备份脚本
[root@mysqltest tina_shell]# cat backup.sh
#!/bin/bash
#本地备份保存目录
bkdir=/home/bk_pg
day=`date +"%Y%m%d"`
#直接指定备份哪些,也可以通过pg_database查询所有非模板和系统db进行自动备份
DB="tinadb testdb"
cd $bkdir
#result=0
if [ -f $bkdir/pg.md5 ]
then
rm -f $bkdir/pg.md5
fi
for db in $DB
do
pg_dump --host localhost --port 5432 --username "postgres" --format custom --blobs --encoding UTF8 --verbose $db --file $bkdir/$db.$day.backup &> $bkdir/bk.log
pgret=$?
if [ "$pgret" -ne "0" ]
then
echo "$pgtime $db backup fail" >> $bkdir/pg.md5
exit 1
else
md5sum $bkdir/$db.$day.backup >> $bkdir/pg.md5
fi
done
#上传ftp,异地保存一份备份
lftp backup.work <
lcd $bkdir
cd 12.8_pg
put tinadb.$day.backup
put testdb.$day.backup
put pg.md5
exit
END
#删除两天前的备份
find $bkdir/ -type f -mtime +2 -exec rm -f {} \;
5)简易的pg主从同步检测脚本1
[root@mysqltest tina_shell]# cat pg_check_sync.sh
#!/bin/bash
#check pg database whether is running
pg_port=`netstat -nat|grep "LISTEN"|grep "5432"|sed -n 2p|awk -F : '{print $4}'|awk '{gsub(/ /,"")}1'`
host_ip=`ifconfig |grep "inet addr:192.168"|awk '{print $2}'|awk -F : '{print $2}'`
date=`date +"%Y-%m-%d %H:%M:%S"`
echo $date >>/tmp/pg_check_state.log
if [ "$pg_port" = "5432" ]
then
echo "$host_ip postgresql is running" >> /tmp/pg_check_state.log
else
echo "Warnning -$host_ip postgresql is not running!" >>/tmp/pg_check_state.log
fi
#check the role of the host
pg_role1=`ps -ef |grep wal| awk '{print $10}'|grep "sender"`
pg_role2=`ps -ef |grep wal| awk '{print $10}'|grep "receiver"`
pg_slave_ip=`ps -ef|grep wal|grep sender|awk '{print $13}'|awk -F "(" '{print $1}'`
if [ "$pg_role1" == "sender" -a "$pg_role2" == "" ]
then
echo "$host_ip is master host and $pg_slave_ip is slave host" >>/tmp/pg_check_state.log
else if [ "$pg_role1" == "" -a "$pg_role2" == "receiver" ]
then echo "$host_ip is postgresql slave host.Please execute the shell in the master host!" >>/tmp/pg_check_state.log
else
echo "check whether the database has slave host" >>/tmp/pg_check_state.log
fi
fi
#check whether the slave is synchronous
pg_sync_status=$(su - postgres -c "psql -c 'select state from pg_stat_replication;'|sed -n 3p")
if [ "$pg_sync_status" = " streaming" ]
then echo "the slave is synchronous" >>/tmp/pg_check_state.log
else
echo "warnning - please check the sync status of slave database " >>/tmp/pg_check_state.log
fi
执行结果:
1.单节点
[root@mysqltest tina_shell]# cat /tmp/pg_check_state.log
2016-01-13 15:04:53
192.168.12.8 postgresql is running
check whether the database has slave host ----请检查该pg库是否有从库
2.主节点
[root@pg tina_shell]# cat /tmp/pg_check_state.log
2016-01-13 15:03:31
192.168.12.2 postgresql is running
192.168.12.2 is master host and 192.168.12.1 is slave host
the slave is synchronous ----主从同步
3.从节点
[root@pg tina_shell]# cat /tmp/pg_check_state.log
2016-01-13 15:00:44
192.168.12.1 postgresql is running
192.168.12.1 is postgresql slave host.Please execute the shell in the master host! ---此ip上pg是从库,请在主库上执行脚本
6)简易的pg主从同步检测脚本2
root@pg /usr/lib64/nagios/plugins]#cat check_pgsync.sh
#!/bin/bash
# nrpe command: check pg sql and sync state.
# customer config
pgport=
pgdbname=
pgdbuser=
# default value.
pgport=${pgport:-5432}
pgdbname=${pgdbname:-postgres}
pgdbuser=${pgdbuser:-postgres}
if [ -z "$pgport" ]; then
echo "error: pgport no defined"
exit 4
fi
msg_ok="OK - pg is running and slave is synchronous."
msg_warn="WARNING - pg is running but slave synchronous fail."
msg_crit="CRITIAL - pg is not running on port: $pgport"
# check pg running
if netstat -ntple | grep -q "[:]$pgport"; then
# check slave db host.
if ps -ef | grep -q "[w]al receiver process"; then
echo "error: it seems you are running me in slave db host."
fi
# check slave synchronous
if psql -d "$pgdbname" -U "$pgdbuser" \
-c 'select state from pg_stat_replication;' \
| grep -q "[s]treaming"
then
echo "$msg_ok"
exit 0
else
echo "$msg_warn"
exit 1
fi
else
echo "$msg_crit"
exit 2
fi
exit 5
1.单节点
[root@mysqltest tina_shell]# ./check_pgsync.sh
WARNING - pg is running but slave synchronous fail.
2.主节点
[root@pg tina_shell]# ./check_pgsync.sh
OK - pg is running and slave is synchronous.
3.从节点
[root@pg-ro tina_shell]# ./check_pgsync.sh
error: it seems you are running me in slave db host.
WARNING - pg is running but slave synchronous fail.
7)pg主从切换shell脚本(闲来无事写的,不建议部署生产)
主库:192.168.10.232
从库:192.168.10.233
环境:主从同步,主库突然挂掉
脚本都部署好之后,只需要在主从执行第一个脚本,就会触发后面脚本的操作,一步到位。
(部分参数需要提前设置好)
1、检测主库是否正常启动,如果不是正常启动,就去执行从库的切换脚本
[postgres@localhost tmp]$ cat pg_check_master.sh
#!/bin/bash
#check the master pg whether is running
pg_port=`netstat -nat|grep "LISTEN"|grep "5432"|sed -n 2p|awk -F : '{print $4}'|awk '{gsub(/ /,"")}1'`
host_ip=`ifconfig |grep "inet addr:192.168"|awk '{print $2}'|awk -F : '{print $2}'`
date=`date +"%Y-%m-%d %H:%M:%S"`
echo $date >>/tmp/pg_check_master.log
if [ "$pg_port" = "5432" ]
then
echo "$host_ip postgresql is running" >> /tmp/pg_check_master.log
else
echo "Warnning -$host_ip postgresql is not running!" >>/tmp/pg_check_master.log
echo "the slave is switching to the master ...please waiting" >>/tmp/pg_check_master.log
ssh 192.168.10.233 "sh /tmp/pg_switch.sh"
fi
2、创建从库的触发文件,将从库启动成主库(触发文件,主库和从库的名字最好不要设置成一样的,以免不好区分)
[postgres@localhost tmp]$ cat pg_switch.sh
#!/bin/bash
#swtch slave to master
date=`date +"%Y-%m-%d %H:%M:%S"`
echo $date >>/tmp/pg_switch.log
cd /pg/data
rm -fr recovery.done
touch /tmp/pg.trigger.456
sleep 20s
if [ -f '/pg/data/recovery.done' ]
then echo "the slave has switched to the master successful!" >> /tmp/pg_switch.log
echo "the old master is going to switch to the new slave!">>/tmp/pg_switch.log
his_file=`ls -lt /pg/data/pg_xlog/0000000*.history |sed -n 1p|awk '{print $9}'`
scp $his_file [email protected]:/pg/data/pg_xlog
ssh 192.168.10.232 "sh /tmp/start_new_slave.sh"
else
echo "warnning:the slave has switched fail!">>/tmp/pg_switch.log
fi
3、注意recovery.conf会随着主从的变化而消失,因此我们可以先将内容写好的文件备份到上一级目录
内容包含如下:
vi /pg/recovery.conf.bak
recovery_target_timeline = 'latest'
standby_mode = 'on'
primary_conninfo = 'host=192.168.10.233 port=5432 user=postgres password=tina'
trigger_file = '/tmp/pg.trigger.456'
4、有了时间线文件、有了recovery.conf,检查一下pg_hba.conf,就可以直接启动pg新从库了,并做一个主从同步的检查。
[root@localhost tmp]# cat start_new_slave.sh
#!/bin/bash
date=`date +"%Y-%m-%d %H:%M:%S"`
echo $date >>/tmp/start_new_slave.log
chown postgres.postgres /pg/data/pg_xlog/*.history
cp /pg/recovery.conf.bak /pg/data/recovery.conf
chown postgres.postgres recovery.conf
su - postgres -c "pg_ctl -D /pg/data start" >>/tmp/start_new_slave.log 2&>1
pg_slave_status=`ps -ef |grep wal| awk '{print $10}'|grep "receiver"`
if [ "$pg_slave_status" = "receiver" ]
then
echo "the slave sync is ok!" >>/tmp/start_new_slave.log
else
echo "error:please check the slave whether is running or not!" >>/tmp/start_new_slave.log
fi
8)pg删除归档日志
[root@pg tina_shell]# cat pg_delete_archivedlog.sh
#!/bin/bash
find /home/pgsql/backup_new/archived_log/ -type f -mtime +2 -exec rm {} \;
9)常用拼接sql
select 'select count(*) from '||tablename||';' from pg_tables where schemaname='public';
select 'alter table '||tablename||' add constraint u_'||tablename||' unique(sample_h);' from pg_tables where tablename like 't_wh20%';