set linesize 999 pagesize 999
col event format a50
select sql_id,event,count(*) from v$active_session_history where SAMPLE_TIME>SYSDATE-1/48 group by sql_id,event order by 3 desc;
select sql_id,inst_id,event,count(*),machine from gv$active_session_history where SAMPLE_TIME>SYSDATE-1/96 group by sql_id,inst_id,event,machine order by 4 desc;
发现事件出现cpu争用和行锁等待
resmgr:cpu quantum
enq: TX - row lock contention
- 检查产生行锁的会话,找到产生阻塞的会话ID(machine字段为数据库主机名的会话为内部进程,可以忽略不计)
select inst_id,sid, sql_id, program, logon_time,machine, terminal,username, event,p1,p2, blocking_instance,blocking_session,final_blocking_instance,final_blocking_session
from gv$session
where wait_class <> 'Idle'
or blocking_session is not null
order by 1,2;
命令行中显示太多放不下
set linesize 999 pagesize 999
select inst_id,sid, logon_time,machine, terminal,username, event
from gv$session
where wait_class <> 'Idle'
or blocking_session is not null
order by 1,2;
- 检查被2583,2148阻塞的会话
select inst_id,sid, sql_id, program, logon_time,machine, terminal,username, event,p1,p2, blocking_instance,blocking_session,final_blocking_instance,final_blocking_session
from gv$session
where sid in (2583,2148)
order by 1,2;
- 发现产生阻塞的会话状态为inacitve,说明该会话一直持有数据库行锁,并未执行操作,产生了空闲等待。怀疑是程序上的释放锁机制存在问题。
select sid,username,machine,status from
gv$session where sid in (2583,2148)
通过操作系统进程也能确定该问题。
查询该会话所在的spid
select spid from v$process where addr in (select paddr from gv$session where sid= &1);
&1:(2583,2148)
找到spid为:12809|181982
ps -ef l| egrep ‘12809|181982’
发现进程状态为S ,表示该进程处于sleep状态。
- 最终定位到程序框架中seata代理的for update 导致锁表问题,业务系统超时导致报错,然后锁未释放,业务中午执行工单nacos配置修改后问题解决该问题。
- cpu资源争用依然存在,因此需要查询造成争用的sql语句交给开发进行分析。
Select sql_id,event,count(*) from v$active_session_history group by sql_id,event order by 3 desc;
查询结果
'50q7m6dm47ycg' 未造成大量等待的sql语句。
- 查看该语句的执行计划
select dbms_sqltune.report_sql_monitor (sql_id=>'50q7m6dm47ycg',type=>'html') from dual;
发现有一项IS_DELETED=2造成了全表扫描,执行了3百万次,是执行慢的主要原因。
查询产生全表扫描的字段信息:IS_DELETED
SELECT * FROM DBA_IND_COLUMNS WHERE TABLE_OWNER=‘ZHCZ_CAFWE’ AND TABLE_NAME=’WORKFLOW_TASK_TO’;
SELECT IS_DELETED ,COUNT(1) FROM ZHCZ_CAFWE.WORKFLOW_TASK_TODO GROUP BY IS_DELETED ORDER BY 2 DESC;
通过awr分析,发现该语句执行时间占比超过89.7%,cpu时间占用22%
- cpu资源争用依然存在,查询单次采用中高峰292个活动会话,其中169个全表扫描,占用cpu资源存在瓶颈,所以需要优化sql语句,同时将部分业务放到其他节点上。
Select count(*) from v$active_session_history where sample_id=17972085
Count(*)
--------------
292
Select count(*) from v$active_session_history where sample_id=17972085 and sql_plan_options=’FULL’
Count(*)
--------------
169
- 监控执行时间较长的sql语句
- 杀会话
select 'alter system kill session '''||s.sid||', '||s.serial#||''';' as standalone_kill_session
,'alter system kill session '''||s.sid||', '||s.serial#||', @'||s.inst_id||''';' as rac_kill_session
,s.*
from gv$session s
where 1=1
and s.schemaname <> 'SYS'
and s.status <> 'INACTIVE'
;
查看被锁的表
select t2.username,
t2.sid,
t2.serial#,
t3.object_name,
t2.OSUSER,
t2.MACHINE,
t2.PROGRAM,
t2.LOGON_TIME,
t2.COMMAND,
t2.LOCKWAIT,
t2.SADDR,
t2.PADDR,
t2.TADDR,
t2.SQL_ADDRESS,
t1.LOCKED_MODE
from v$locked_object t1, v$session t2, dba_objects t3
where t1.session_id = t2.sid
and t1.object_id = t3.object_id
order by t2.logon_time;
产生锁的语句
SELECT /*+ ORDERED */
sql_text
FROM v$sqltext a
WHERE (a.hash_value, a.address) IN
(SELECT DECODE(sql_hash_value, 0, prev_hash_value, sql_hash_value),
DECODE(sql_hash_value, 0, prev_sql_addr, sql_address)
FROM v$session b
WHERE b.sid = '5636') /* 此处67 为SID*/
ORDER BY piece ASC;
select 'alter system kill session '''||sid||','||serial#||''' immediate;' from v$session where wait_class <> 'Idle'
and blocking_session is not null AND event='enq: TX - row lock contention' AND usernamE IN ('BYDMESIND' , 'BYDMES2') ;
select 'alter system kill session '''||sid||','||serial#||''' immediate;' from v$session where sid in (select final_blocking_session
from gv$session
where wait_class <> 'Idle'
and blocking_session is not null and event='enq: TX - row lock contention' and username='BYDMESIND' AND SQL_ID='7b6apjykkrh2q')
select 'alter system kill session '''||sid||','||serial#||''' immediate;' from v$session where sid in (select sid
from gv$session
where wait_class <> 'Idle' and SQL_ID='7b6apjykkrh2q' and event='enq: TX - row lock contention' )
select 'alter system kill session ''' || ta.sid || ',' || ta.serial# ||''' immediate;' from v$session ta where ta.username='CDJP';
netstat -tn | awk '$4 ~ /:1521$/ && $6 == "ESTABLISHED" {print $5}' | cut -d: -f1 | sort | uniq -c | sort -nr
select machine ,count(*) from v$session group by machine order by 2 desc,1 ;
with lk as
(
select
/*+materialize*/
inst_id ,
decode(request ,
0 ,'holder',
'waiter') role,
sid ,
type ,
request ,
lmode ,
block ,
ctime ,
id1 ,
id2
from
gv$lock
where
(
id1,id2,type) in
(
select
id1,
id2,
type
from
gv$lock
where
request>0))
select
/*+rule*/
inst_id ,
sid ,
program,
event ,
status ,
state ,
sql_id ,
case
when
status='ACTIVE'
then
(
select
substr(sql_text,1,80)
from
v$open_cursor
where
sql_id=s.sql_id)
end sql_text ,
last_call_et ,
row_wait_obj#,
case
when
row_wait_obj# <>-1
then
dbms_rowid.rowid_create(1,(
select
data_object_id
from
dba_objects
where
object_id=row_wait_obj#), row_wait_file#, row_wait_block#, row_wait_row#)
end row_id ,
blocking_instance,
blocking_session
from
gv$session s
where
type <>'BACKGROUND'
and (
inst_id,sid) in
(
select
inst_id,
sid
from
lk);
生产awr快照
SQL> begin
dbms_workload_repository.create_snapshot();
end;
/复制