故事的开始
## 发现错误日志信息
LOG:process 12345 still waiting for ShareLock on transaction 541005530 after 1000.146 ms
DETAIL:Process holding the lock: 12045. Wait queue: 12345.
CONTEXT:while updating tuple (809991,21) in relation "table_name"
STATEMENT:UPDATE/*...*/ table_name set ... where ...
LOG:process 12445 still waiting for ExclusiveLock on tuple (809991,21) of relation 19673 of database 14007 after 1000.102 ms
DETAIL: Process holding the lock: 12345. Wait queue: 12445.
STATEMENT:UPDATE/*...*/ table_name2 set ... where ...
相关SQL查询
-- 查看处于等待锁状态
select * from pg_locks where not granted;
-- 查看线程信息,state处于idle in transaction
select * from pg_stat_activity;
## 服务器查看
ps -ef |grep idle |grep -v grep
原因排查
-- 查找被锁的原因
with
t_wait as -- 查看当前出现等待锁的执行SQL
(
select a.mode,a.locktype,a.database,a.relation,a.page,a.tuple,a.classid,a.granted,a.objid,a.objsubid,a.pid,
a.virtualtransaction,a.virtualxid,a.transactionid,a.fastpath,
b.state,b.query,b.xact_start,b.query_start,b.usename,b.datname,b.client_addr,b.client_port,b.application_name
from pg_locks a,pg_stat_activity b
where a.pid=b.pid and not a.granted
),
t_run as -- 查询正在执行的SQL信息
(
select a.mode,a.locktype,a.database,a.relation,a.page,a.tuple,a.classid,a.granted,a.objid,a.objsubid,a.pid,
a.virtualtransaction,a.virtualxid,a.transactionid,a.fastpath,
b.state,b.query,b.xact_start,b.query_start,b.usename,b.datname,b.client_addr,b.client_port,b.application_name
from pg_locks a,pg_stat_activity b
where a.pid=b.pid and a.granted
),
t_overlap as
(
select r.* from t_wait w join t_run r on
(
r.locktype is not distinct from w.locktype and
r.database is not distinct from w.database and
r.relation is not distinct from w.relation and
r.page is not distinct from w.page and
r.tuple is not distinct from w.tuple and
r.virtualxid is not distinct from w.virtualxid and
r.transactionid is not distinct from w.transactionid and
r.classid is not distinct from w.classid and
r.objid is not distinct from w.objid and
r.objsubid is not distinct from w.objsubid and
r.pid <> w.pid
)
),
t_unionall as
(
select r.* from t_overlap r
union all
select w.* from t_wait w
)
-- 相关信息输出
select locktype,datname,relation::regclass,page,tuple,virtualxid,transactionid::text,classid::regclass,objid,objsubid,
string_agg(
'Pid: '||case when pid is null then 'NULL' else pid::text end||chr(10)||
'Lock_Granted: '||case when granted is null then 'NULL' else granted::text end||' , Mode: '||case when mode is null then 'NULL' else mode::text end||' , FastPath: '||case when fastpath is null then 'NULL' else fastpath::text end||' , VirtualTransaction: '||case when virtualtransaction is null then 'NULL' else virtualtransaction::text end||' , Session_State: '||case when state is null then 'NULL' else state::text end||chr(10)||
'Username: '||case when usename is null then 'NULL' else usename::text end||' , Database: '||case when datname is null then 'NULL' else datname::text end||' , Client_Addr: '||case when client_addr is null then 'NULL' else client_addr::text end||' , Client_Port: '||case when client_port is null then 'NULL' else client_port::text end||' , Application_Name: '||case when application_name is null then 'NULL' else application_name::text end||chr(10)||
'Xact_Start: '||case when xact_start is null then 'NULL' else xact_start::text end||' , Query_Start: '||case when query_start is null then 'NULL' else query_start::text end||' , Xact_Elapse: '||case when (now()-xact_start) is null then 'NULL' else (now()-xact_start)::text end||' , Query_Elapse: '||case when (now()-query_start) is null then 'NULL' else (now()-query_start)::text end||chr(10)||
'SQL (Current SQL in Transaction): '||chr(10)||
case when query is null then 'NULL' else query::text end,
chr(10)||'--------'||chr(10)
order by
( case mode
when 'INVALID' then 0
when 'AccessShareLock' then 1
when 'RowShareLock' then 2
when 'RowExclusiveLock' then 3
when 'ShareUpdateExclusiveLock' then 4
when 'ShareLock' then 5
when 'ShareRowExclusiveLock' then 6
when 'ExclusiveLock' then 7
when 'AccessExclusiveLock' then 8
else 0
end ) desc,
(case when granted then 0 else 1 end)
) as lock_conflict
from t_unionall
group by
locktype,datname,relation,page,tuple,virtualxid,transactionid::text,classid,objid,objsubid;
-- 文档来源:https://blog.csdn.net/weixin_34126215/article/details/89569410
场景重现
-- session 1执行更新
begin;
update aa set city='珠海' where id =3;
-- session 2查看锁表pg_locks视图信息
select * from pg_locks where not granted;
locktype | database | relation | page | tuple | virtualxid | transactionid | classid | objid | objsubid | virtualtransaction | pid | mode |granted | fastpath
---------------+----------+----------+------+-------+------------+---------------+---------+-------+----------+--------------------+------+-----------+---------+----------
transactionid | | | | | | 730 | | | | 6/17 | 4760 | ShareLock |f | f
(1 row)
-- session 3执行删除,会一直等待session 1提交后才会执行
delete from aa where id =3;
-- session 2中查看锁冲突信息
SELECT locktype,pg_locks.pid,virtualtransaction,transactionid,nspname,relname,mode,granted,
case
WHEN granted='false' then 'get_lock'
WHEN granted='true' then 'wait_lock' END lock_satus,
cast(date_trunc('second',query_start) AS timestamp) AS query_start,query
FROM pg_locks LEFT OUTER
JOIN pg_class ON (pg_locks.relation = pg_class.oid) LEFT OUTER
JOIN pg_namespace ON (pg_namespace.oid = pg_class.relnamespace), pg_stat_activity
WHERE NOT pg_locks.pid=pg_backend_pid() AND pg_locks.pid=pg_stat_activity.pid AND transactionid is NOT null
ORDER BY query_start;
locktype | pid | virtualtransaction | transactionid | nspname | relname | mode | granted | lock_satus | query_start |
query
---------------+------+--------------------+---------------+---------+---------+---------------+---------+------------+---------------------+-----------------------------
transactionid | 4351 | 3/44 | 730 | | | ExclusiveLock | t | wait_lock | 2022-01-10 15:13:10 | select * from aa
transactionid | 4760 | 6/17 | 731 | | | ExclusiveLock | t | wait_lock | 2022-01-10 15:17:53 | delete from aa where id =3;
transactionid | 4760 | 6/17 | 730 | | | ShareLock | f | get_lock | 2022-01-10 15:17:53 | delete from aa where id =3;
(3 rows)
-- 查看fastpath='f'的信息
select * from pg_locks where fastpath='f';
locktype | database | relation | page | tuple | virtualxid | transactionid | classid | objid | objsubid | virtualtransaction | pid | mode
| granted | fastpath
---------------+----------+----------+------+-------+------------+---------------+---------+-------+----------+--------------------+------+---------------------+---------+----------
transactionid | | | | | | 731 | | | | 6/17 | 4760 | ExclusiveLock | t | f
transactionid | | | | | | 730 | | | | 3/44 | 4351 | ExclusiveLock | t | f
tuple | 13287 | 16384 | 0 | 9 | | | | | | 6/17 | 4760 | AccessExclusiveLock | t | f
transactionid | | | | | | 730 | | | | 6/17 | 4760 | ShareLock | f | f
(4 rows)
-- 或者使用上面with sql查看获取直观结果
locktype | datname | relation | page | tuple | virtualxid | transactionid | classid | objid | objsubid |
---------------+----------+----------+------+-------+------------+---------------+---------+-------+----------+------------------------------------------------------------------------------------------
transactionid | postgres | | | | | 730 | | | | Pid: 4351
+
| | | | | | | | | | Lock_Granted: true , Mode: ExclusiveLocke: idle in transaction +
| | | | | | | | | | Username: postgres , Database: postgresation_Name: DBeaver 6.2.2 - Main +
| | | | | | | | | | Xact_Start: 2022-01-10 15:12:58.620982+0: 00:05:04.82749 , Query_Elapse: 00:04:53.219844 +
| | | | | | | | | | SQL (Current SQL in Transaction):
+
| | | | | | | | | | select * from aa
+
| | | | | | | | | | --------
+
| | | | | | | | | | Pid: 4760
+
| | | | | | | | | | Lock_Granted: false , Mode: ShareLock ,active +
| | | | | | | | | | Username: postgres , Database: postgresql +
| | | | | | | | | | Xact_Start: 2022-01-10 15:17:53.551801+0: 00:00:09.896671 , Query_Elapse: 00:00:09.896671+
| | | | | | | | | | SQL (Current SQL in Transaction): +
| | | | | | | | | | delete from aa where id =3;
(1 row)
-- 解决办法
-- 1)提交session 1的事务
-- 2)杀掉session 1的线程
-- 杀掉阻塞的进程,不能使用kill -9操作系统命令
select pg_terminate_backend(4351);
pg_terminate_backend
----------------------
t
(1 row)
-- kill掉线程后,session3正常执行
delete from aa where id =3;
DELETE 1
参考文档:
1)https://blog.csdn.net/weixin_34126215/article/details/89569410
2)https://blog.csdn.net/rudygao/article/details/49251807?utm_medium=distribute.pc_relevant.none-task-blog-2~default~baidujs_title~default-1.no_search_link&spm=1001.2101.3001.4242.2&utm_relevant_index=4
3)https://blog.csdn.net/weixin_33872566/article/details/89613817