什么?有个 SQL 执行了 8 秒!
哪里出了问题?臣妾不知道啊,得找 DBA 啊。
DBA 人呢?离职了!!擦!!!
程序员在无处寻求帮助时,就得想办法自救,努力让自己变成 "伪 DBA"。
SELECT r.session_id ,t.os_thread_id ,r.command FROM sys.dm_exec_requests AS r JOIN sys.dm_os_workers AS w ON r.task_address = w.task_address JOIN sys.dm_os_threads AS t ON t.thread_address = w.thread_address ORDER BY session_id;
SELECT t1.resource_type AS [lock type] ,DB_NAME(resource_database_id) AS [database] ,t1.resource_associated_entity_id AS [blk object] ,t1.request_mode AS [lock req] ,t1.request_session_id AS [waiter sid] ,t2.wait_duration_ms AS [wait time] ,( SELECT [text] FROM sys.dm_exec_requests AS r WITH (NOLOCK) CROSS APPLY sys.dm_exec_sql_text(r.[sql_handle]) WHERE r.session_id = t1.request_session_id ) AS [waiter_batch] ,( SELECT SUBSTRING(qt.[text], r.statement_start_offset / 2, ( CASE WHEN r.statement_end_offset = - 1 THEN LEN(CONVERT(NVARCHAR(max), qt.[text])) * 2 ELSE r.statement_end_offset END - r.statement_start_offset ) / 2) FROM sys.dm_exec_requests AS r WITH (NOLOCK) CROSS APPLY sys.dm_exec_sql_text(r.[sql_handle]) AS qt WHERE r.session_id = t1.request_session_id ) AS [waiter_stmt] ,t2.blocking_session_id AS [blocker sid] ,( SELECT [text] FROM sys.sysprocesses AS p CROSS APPLY sys.dm_exec_sql_text(p.[sql_handle]) WHERE p.spid = t2.blocking_session_id ) AS [blocker_stmt] FROM sys.dm_tran_locks AS t1 WITH (NOLOCK) INNER JOIN sys.dm_os_waiting_tasks AS t2 WITH (NOLOCK) ON t1.lock_owner_address = t2.resource_address OPTION (RECOMPILE);
这个查询需要执行多次,即使这样可能什么也查不到,为什么呢?因为没有死锁发生啊!
SELECT L.request_session_id AS SPID ,DB_NAME(L.resource_database_id) AS DatabaseName ,O.NAME AS LockedObjectName ,P.object_id AS LockedObjectId ,L.resource_type AS LockedResource ,L.request_mode AS LockType ,ST.TEXT AS SqlStatementText ,ES.login_name AS LoginName ,ES.host_name AS HostName ,TST.is_user_transaction AS IsUserTransaction ,AT.[name] AS TransactionName ,CN.auth_scheme AS AuthenticationMethod FROM sys.dm_tran_locks L JOIN sys.partitions P ON P.hobt_id = L.resource_associated_entity_id JOIN sys.objects O ON O.object_id = P.object_id JOIN sys.dm_exec_sessions ES ON ES.session_id = L.request_session_id JOIN sys.dm_tran_session_transactions TST ON ES.session_id = TST.session_id JOIN sys.dm_tran_active_transactions AT ON TST.transaction_id = AT.transaction_id JOIN sys.dm_exec_connections CN ON CN.session_id = ES.session_id CROSS APPLY sys.dm_exec_sql_text(CN.most_recent_sql_handle) AS ST WHERE resource_database_id = db_id() ORDER BY L.request_session_id
上面是另一个侦测脚本。
如果需要清理已存在的 Wait 信息,则可执行:
DBCC SQLPERF('sys.dm_os_wait_stats', CLEAR);
然后,可以从清理后的时间点开始统计 Wait 信息。
WITH [Waits] AS ( SELECT wait_type ,wait_time_ms / 1000.0 AS [WaitS] ,(wait_time_ms - signal_wait_time_ms) / 1000.0 AS [ResourceS] ,signal_wait_time_ms / 1000.0 AS [SignalS] ,waiting_tasks_count AS [WaitCount] ,100.0 * wait_time_ms / SUM(wait_time_ms) OVER () AS [Percentage] ,ROW_NUMBER() OVER ( ORDER BY wait_time_ms DESC ) AS [RowNum] FROM sys.dm_os_wait_stats WITH (NOLOCK) WHERE [wait_type] NOT IN ( N'BROKER_EVENTHANDLER' ,N'BROKER_RECEIVE_WAITFOR' ,N'BROKER_TASK_STOP' ,N'BROKER_TO_FLUSH' ,N'BROKER_TRANSMITTER' ,N'CHECKPOINT_QUEUE' ,N'CHKPT' ,N'CLR_AUTO_EVENT' ,N'CLR_MANUAL_EVENT' ,N'CLR_SEMAPHORE' ,N'DBMIRROR_DBM_EVENT' ,N'DBMIRROR_EVENTS_QUEUE' ,N'DBMIRROR_WORKER_QUEUE' ,N'DBMIRRORING_CMD' ,N'DIRTY_PAGE_POLL' ,N'DISPATCHER_QUEUE_SEMAPHORE' ,N'EXECSYNC' ,N'FSAGENT' ,N'FT_IFTS_SCHEDULER_IDLE_WAIT' ,N'FT_IFTSHC_MUTEX' ,N'HADR_CLUSAPI_CALL' ,N'HADR_FILESTREAM_IOMGR_IOCOMPLETION' ,N'HADR_LOGCAPTURE_WAIT' ,N'HADR_NOTIFICATION_DEQUEUE' ,N'HADR_TIMER_TASK' ,N'HADR_WORK_QUEUE' ,N'KSOURCE_WAKEUP' ,N'LAZYWRITER_SLEEP' ,N'LOGMGR_QUEUE' ,N'ONDEMAND_TASK_QUEUE' ,N'PWAIT_ALL_COMPONENTS_INITIALIZED' ,N'QDS_PERSIST_TASK_MAIN_LOOP_SLEEP' ,N'QDS_CLEANUP_STALE_QUERIES_TASK_MAIN_LOOP_SLEEP' ,N'REQUEST_FOR_DEADLOCK_SEARCH' ,N'RESOURCE_QUEUE' ,N'SERVER_IDLE_CHECK' ,N'SLEEP_BPOOL_FLUSH' ,N'SLEEP_DBSTARTUP' ,N'SLEEP_DCOMSTARTUP' ,N'SLEEP_MASTERDBREADY' ,N'SLEEP_MASTERMDREADY' ,N'SLEEP_MASTERUPGRADED' ,N'SLEEP_MSDBSTARTUP' ,N'SLEEP_SYSTEMTASK' ,N'SLEEP_TASK' ,N'SLEEP_TEMPDBSTARTUP' ,N'SNI_HTTP_ACCEPT' ,N'SP_SERVER_DIAGNOSTICS_SLEEP' ,N'SQLTRACE_BUFFER_FLUSH' ,N'SQLTRACE_INCREMENTAL_FLUSH_SLEEP' ,N'SQLTRACE_WAIT_ENTRIES' ,N'WAIT_FOR_RESULTS' ,N'WAITFOR' ,N'WAITFOR_TASKSHUTDOWN' ,N'WAIT_XTP_HOST_WAIT' ,N'WAIT_XTP_OFFLINE_CKPT_NEW_LOG' ,N'WAIT_XTP_CKPT_CLOSE' ,N'XE_DISPATCHER_JOIN' ,N'XE_DISPATCHER_WAIT' ,N'XE_TIMER_EVENT' ) AND waiting_tasks_count > 0 ) SELECT MAX(W1.wait_type) AS [WaitType] ,CAST(MAX(W1.WaitS) AS DECIMAL(16, 2)) AS [Wait_Sec] ,CAST(MAX(W1.ResourceS) AS DECIMAL(16, 2)) AS [Resource_Sec] ,CAST(MAX(W1.SignalS) AS DECIMAL(16, 2)) AS [Signal_Sec] ,MAX(W1.WaitCount) AS [Wait Count] ,CAST(MAX(W1.Percentage) AS DECIMAL(5, 2)) AS [Wait Percentage] ,CAST((MAX(W1.WaitS) / MAX(W1.WaitCount)) AS DECIMAL(16, 4)) AS [AvgWait_Sec] ,CAST((MAX(W1.ResourceS) / MAX(W1.WaitCount)) AS DECIMAL(16, 4)) AS [AvgRes_Sec] ,CAST((MAX(W1.SignalS) / MAX(W1.WaitCount)) AS DECIMAL(16, 4)) AS [AvgSig_Sec] FROM Waits AS W1 INNER JOIN Waits AS W2 ON W2.RowNum <= W1.RowNum GROUP BY W1.RowNum HAVING SUM(W2.Percentage) - MAX(W1.Percentage) < 99 -- percentage threshold OPTION (RECOMPILE);
关于 Wait Type 的描述可参考:
SELECT AVG(current_tasks_count) AS [Avg Task Count] ,AVG(runnable_tasks_count) AS [Avg Runnable Task Count] ,AVG(pending_disk_io_count) AS [Avg Pending DiskIO Count] FROM sys.dm_os_schedulers WITH (NOLOCK) WHERE scheduler_id < 255 OPTION (RECOMPILE);
上面的 SQL 需要运行多次来查看结果,如果某个值持续的大于 10 则说明可能有问题,需要进一步调查。
SELECT o.[name] AS [table_name] ,i.[name] AS [index_name] ,ios.index_id ,ios.partition_number ,SUM(ios.row_lock_wait_count) AS [total_row_lock_waits] ,SUM(ios.row_lock_wait_in_ms) AS [total_row_lock_wait_in_ms] ,SUM(ios.page_lock_wait_count) AS [total_page_lock_waits] ,SUM(ios.page_lock_wait_in_ms) AS [total_page_lock_wait_in_ms] ,SUM(ios.page_lock_wait_in_ms) + SUM(row_lock_wait_in_ms) AS [total_lock_wait_in_ms] FROM sys.dm_db_index_operational_stats(DB_ID(), NULL, NULL, NULL) AS ios INNER JOIN sys.objects AS o WITH (NOLOCK) ON ios.[object_id] = o.[object_id] INNER JOIN sys.indexes AS i WITH (NOLOCK) ON ios.[object_id] = i.[object_id] AND ios.index_id = i.index_id WHERE o.[object_id] > 100 GROUP BY o.[name] ,i.[name] ,ios.index_id ,ios.partition_number HAVING SUM(ios.page_lock_wait_in_ms) + SUM(row_lock_wait_in_ms) > 0 ORDER BY total_lock_wait_in_ms DESC OPTION (RECOMPILE);
SELECT * FROM sys.dm_os_latch_stats WHERE wait_time_ms > 10 ORDER BY 2 DESC; SELECT * FROM sys.dm_os_wait_stats WHERE waiting_tasks_count > 20 AND wait_type LIKE '%LATCH%' ORDER BY 3 DESC; SELECT * FROM sys.dm_os_spinlock_stats ORDER BY [spins] DESC; WITH [Latches] AS ( SELECT [latch_class] ,[wait_time_ms] / 1000.0 AS [WaitS] ,[waiting_requests_count] AS [WaitCount] ,100.0 * [wait_time_ms] / SUM([wait_time_ms]) OVER () AS [Percentage] ,ROW_NUMBER() OVER ( ORDER BY [wait_time_ms] DESC ) AS [RowNum] FROM sys.dm_os_latch_stats WHERE [latch_class] NOT IN (N'BUFFER') --AND [wait_time_ms] > 0 ) SELECT MAX([W1].[latch_class]) AS [LatchClass] ,CAST(MAX([W1].[WaitS]) AS DECIMAL(14, 2)) AS [Wait_S] ,MAX([W1].[WaitCount]) AS [WaitCount] ,CAST(MAX([W1].[Percentage]) AS DECIMAL(14, 2)) AS [Percentage] ,CAST((MAX([W1].[WaitS]) / MAX([W1].[WaitCount])) AS DECIMAL(14, 4)) AS [AvgWait_S] FROM [Latches] AS [W1] INNER JOIN [Latches] AS [W2] ON [W2].[RowNum] <= [W1].[RowNum] GROUP BY [W1].[RowNum] HAVING SUM([W2].[Percentage]) - MAX([W1].[Percentage]) < 95;-- percentage threshold
/* SQL Server Wait Information from sys.dm_os_wait_stats Copyright (C) 2014, Brent Ozar Unlimited. See http://BrentOzar.com/go/eula for the End User Licensing Agreement. */ /********************************* Let's build a list of waits we can safely ignore. *********************************/ IF OBJECT_ID('tempdb..#ignorable_waits') IS NOT NULL DROP TABLE #ignorable_waits; GO create table #ignorable_waits (wait_type nvarchar(256) PRIMARY KEY); GO /* We aren't using row constructors to be SQL 2005 compatible */ set nocount on; insert #ignorable_waits (wait_type) VALUES ('REQUEST_FOR_DEADLOCK_SEARCH'); insert #ignorable_waits (wait_type) VALUES ('SQLTRACE_INCREMENTAL_FLUSH_SLEEP'); insert #ignorable_waits (wait_type) VALUES ('SQLTRACE_BUFFER_FLUSH'); insert #ignorable_waits (wait_type) VALUES ('LAZYWRITER_SLEEP'); insert #ignorable_waits (wait_type) VALUES ('XE_TIMER_EVENT'); insert #ignorable_waits (wait_type) VALUES ('XE_DISPATCHER_WAIT'); insert #ignorable_waits (wait_type) VALUES ('FT_IFTS_SCHEDULER_IDLE_WAIT'); insert #ignorable_waits (wait_type) VALUES ('LOGMGR_QUEUE'); insert #ignorable_waits (wait_type) VALUES ('CHECKPOINT_QUEUE'); insert #ignorable_waits (wait_type) VALUES ('BROKER_TO_FLUSH'); insert #ignorable_waits (wait_type) VALUES ('BROKER_TASK_STOP'); insert #ignorable_waits (wait_type) VALUES ('BROKER_EVENTHANDLER'); insert #ignorable_waits (wait_type) VALUES ('SLEEP_TASK'); insert #ignorable_waits (wait_type) VALUES ('WAITFOR'); insert #ignorable_waits (wait_type) VALUES ('DBMIRROR_DBM_MUTEX') insert #ignorable_waits (wait_type) VALUES ('DBMIRROR_EVENTS_QUEUE') insert #ignorable_waits (wait_type) VALUES ('DBMIRRORING_CMD'); insert #ignorable_waits (wait_type) VALUES ('DISPATCHER_QUEUE_SEMAPHORE'); insert #ignorable_waits (wait_type) VALUES ('BROKER_RECEIVE_WAITFOR'); insert #ignorable_waits (wait_type) VALUES ('CLR_AUTO_EVENT'); insert #ignorable_waits (wait_type) VALUES ('DIRTY_PAGE_POLL'); insert #ignorable_waits (wait_type) VALUES ('HADR_FILESTREAM_IOMGR_IOCOMPLETION'); insert #ignorable_waits (wait_type) VALUES ('ONDEMAND_TASK_QUEUE'); insert #ignorable_waits (wait_type) VALUES ('FT_IFTSHC_MUTEX'); insert #ignorable_waits (wait_type) VALUES ('CLR_MANUAL_EVENT'); insert #ignorable_waits (wait_type) VALUES ('SP_SERVER_DIAGNOSTICS_SLEEP'); insert #ignorable_waits (wait_type) VALUES ('QDS_CLEANUP_STALE_QUERIES_TASK_MAIN_LOOP_SLEEP'); insert #ignorable_waits (wait_type) VALUES ('QDS_PERSIST_TASK_MAIN_LOOP_SLEEP'); GO /* Want to manually exclude an event and recalculate?*/ /* insert #ignorable_waits (wait_type) VALUES (''); */ /********************************* What are the highest overall waits since startup? *********************************/ SELECT TOP 25 os.wait_type, SUM(os.wait_time_ms) OVER (PARTITION BY os.wait_type) as sum_wait_time_ms, CAST( 100.* SUM(os.wait_time_ms) OVER (PARTITION BY os.wait_type) / (1. * SUM(os.wait_time_ms) OVER () ) AS NUMERIC(12,1)) as pct_wait_time, SUM(os.waiting_tasks_count) OVER (PARTITION BY os.wait_type) AS sum_waiting_tasks, CASE WHEN SUM(os.waiting_tasks_count) OVER (PARTITION BY os.wait_type) > 0 THEN CAST( SUM(os.wait_time_ms) OVER (PARTITION BY os.wait_type) / (1. * SUM(os.waiting_tasks_count) OVER (PARTITION BY os.wait_type)) AS NUMERIC(12,1)) ELSE 0 END AS avg_wait_time_ms, CURRENT_TIMESTAMP as sample_time FROM sys.dm_os_wait_stats os LEFT JOIN #ignorable_waits iw on os.wait_type=iw.wait_type WHERE iw.wait_type is null ORDER BY sum_wait_time_ms DESC; GO /********************************* What are the higest waits *right now*? *********************************/ /* Note: this is dependent on the #ignorable_waits table created earlier. */ if OBJECT_ID('tempdb..#wait_batches') is not null drop table #wait_batches; if OBJECT_ID('tempdb..#wait_data') is not null drop table #wait_data; GO CREATE TABLE #wait_batches ( batch_id int identity primary key, sample_time datetime not null ); CREATE TABLE #wait_data ( batch_id INT NOT NULL , wait_type NVARCHAR(256) NOT NULL , wait_time_ms BIGINT NOT NULL , waiting_tasks BIGINT NOT NULL ); CREATE CLUSTERED INDEX cx_wait_data on #wait_data(batch_id); GO /* This temporary procedure records wait data to a temp table. */ if OBJECT_ID('tempdb..#get_wait_data') IS NOT NULL DROP procedure #get_wait_data; GO CREATE PROCEDURE #get_wait_data @intervals tinyint = 2, @delay char(12)='00:00:30.000' /* 30 seconds*/ AS DECLARE @batch_id int, @current_interval tinyint, @msg nvarchar(max); SET NOCOUNT ON; SET @current_interval=1; WHILE @current_interval <= @intervals BEGIN INSERT #wait_batches(sample_time) SELECT CURRENT_TIMESTAMP; SELECT @batch_id=SCOPE_IDENTITY(); INSERT #wait_data (batch_id, wait_type, wait_time_ms, waiting_tasks) SELECT @batch_id, os.wait_type, SUM(os.wait_time_ms) OVER (PARTITION BY os.wait_type) as sum_wait_time_ms, SUM(os.waiting_tasks_count) OVER (PARTITION BY os.wait_type) AS sum_waiting_tasks FROM sys.dm_os_wait_stats os LEFT JOIN #ignorable_waits iw on os.wait_type=iw.wait_type WHERE iw.wait_type is null ORDER BY sum_wait_time_ms DESC; set @msg= CONVERT(char(23),CURRENT_TIMESTAMP,121)+ N': Completed sample ' + cast(@current_interval as nvarchar(4)) + N' of ' + cast(@intervals as nvarchar(4)) + '.' RAISERROR (@msg,0,1) WITH NOWAIT; SET @current_interval=@current_interval+1; if @current_interval <= @intervals WAITFOR DELAY @delay; END GO /* Let's take two samples 30 seconds apart */ exec #get_wait_data @intervals=2, @delay='00:00:30.000'; GO /* What were we waiting on? This query compares the most recent two samples. */ with max_batch as ( select top 1 batch_id, sample_time from #wait_batches order by batch_id desc ) SELECT b.sample_time as [Second Sample Time], datediff(ss,wb1.sample_time, b.sample_time) as [Sample Duration in Seconds], wd1.wait_type, cast((wd2.wait_time_ms-wd1.wait_time_ms)/1000. as numeric(12,1)) as [Wait Time (Seconds)], (wd2.waiting_tasks-wd1.waiting_tasks) AS [Number of Waits], CASE WHEN (wd2.waiting_tasks-wd1.waiting_tasks) > 0 THEN cast((wd2.wait_time_ms-wd1.wait_time_ms)/ (1.0*(wd2.waiting_tasks-wd1.waiting_tasks)) as numeric(12,1)) ELSE 0 END AS [Avg ms Per Wait] FROM max_batch b JOIN #wait_data wd2 on wd2.batch_id=b.batch_id JOIN #wait_data wd1 on wd1.wait_type=wd2.wait_type AND wd2.batch_id - 1 = wd1.batch_id join #wait_batches wb1 on wd1.batch_id=wb1.batch_id WHERE (wd2.waiting_tasks-wd1.waiting_tasks) > 0 ORDER BY [Wait Time (Seconds)] DESC; GO
SELECT SUBSTRING(st.TEXT, (qs.statement_start_offset / 2) + 1, ( ( ( CASE qs.statement_end_offset WHEN - 1 THEN DATALENGTH(st.[text]) ELSE qs.statement_end_offset END ) - qs.statement_start_offset ) / 2 ) + 1) AS [sql] ,DB_NAME(er.database_id) AS [db_name] ,es.login_name ,ec.client_net_address ,er.command AS request_command ,er.start_time AS request_start_time ,wt.wait_duration_ms AS waiting_duration ,wt.wait_type AS waiting_type ,ot.task_state ,ec.connection_id ,ec.session_id ,es.[status] AS session_status ,CASE es.transaction_isolation_level WHEN 0 THEN 'Unspecified' WHEN 1 THEN 'Read Uncomitted' WHEN 2 THEN 'Read Committed' WHEN 3 THEN 'Repeatable' WHEN 4 THEN 'Serializable' WHEN 5 THEN 'Snapshot' END AS transaction_isolation_level ,er.request_id ,er.[status] AS request_status ,USER_NAME(er.[user_id]) AS [request_user] ,er.wait_type AS request_blocked_type ,er.wait_time AS request_blocked_time ,ec.connect_time ,es.login_time ,er.cpu_time ,os.cpu_id ,os.scheduler_id ,th.os_thread_id ,er.task_address ,ot.worker_address ,th.thread_address ,CASE WHEN er.[sql_handle] IS NULL THEN ec.most_recent_sql_handle ELSE er.[sql_handle] END AS [sql_handle] FROM sys.dm_exec_connections AS ec INNER JOIN sys.dm_exec_sessions AS es ON es.session_id = ec.session_id LEFT OUTER JOIN sys.dm_exec_requests AS er ON er.connection_id = ec.connection_id INNER JOIN sys.dm_os_tasks ot ON er.task_address = ot.task_address OR er.task_address = ot.parent_task_address LEFT OUTER JOIN sys.dm_os_workers AS ow ON ow.worker_address = ot.worker_address LEFT OUTER JOIN sys.dm_os_threads AS th ON th.thread_address = ow.thread_address LEFT OUTER JOIN sys.dm_os_schedulers AS os ON os.scheduler_id = ot.scheduler_id LEFT OUTER JOIN sys.dm_os_waiting_tasks AS wt ON wt.waiting_task_address = ot.task_address LEFT OUTER JOIN sys.dm_exec_query_stats AS qs ON qs.[sql_handle] = ( CASE WHEN er.[sql_handle] IS NULL THEN ec.most_recent_sql_handle ELSE er.[sql_handle] END ) CROSS APPLY sys.dm_exec_sql_text(CASE WHEN er.[sql_handle] IS NULL THEN ec.most_recent_sql_handle ELSE er.[sql_handle] END) AS st WHERE er.database_id = DB_ID() AND er.request_id IS NOT NULL AND wt.wait_duration_ms > 0 AND st.[text] NOT LIKE '%statement_start_offset%' ORDER BY wt.wait_duration_ms DESC;
《人人都是 DBA》系列文章索引:
本系列文章《人人都是 DBA》由 Dennis Gao 发表自博客园,未经作者本人同意禁止任何形式的转载,任何自动或人为的爬虫转载行为均为耍流氓。