暂无图片
暂无图片
暂无图片
暂无图片
暂无图片

PostgreSQL SQL log duration time 源码分析

digoal 2016-04-26
1037

作者

digoal

日期

2016-04-26

标签

PostgreSQL , 时长 , duration , query


背景

PostgreSQL 可以通过参数设置是否要记录SQL的执行时间,以及执行时间超过多少的SQL。

注意这里的执行时间实际上包含了网络的传输时间。

所以在遇到慢查询时,除了要排查数据库的问题,实际上还需要排查网络以及客户端的问题,因为客户端接收数据堵塞也会造成慢查询,就像我前天写的文章。

PostgreSQL IDE pgadmin , edb postgres enterprise manager 查询慢的问题分析

https://yq.aliyun.com/articles/32438

另外需要指出的是,PostgreSQL的内核在这方面有改进的空间,最好是把网络传输的时间另外计算。

这样更容易排查问题。

如果要将网络时间另外计算,需要hack一下内核的postgres.c中的几个函数,文章后面会分析。

测试

在数据库中创建表和测试数据

postgres=> create table tbl(id int); CREATE TABLE postgres=> insert into tbl select generate_series(1,200000); INSERT 0 200000 postgres=> \dt+ tbl List of relations Schema | Name | Type | Owner | Size | Description --------+------+-------+-------+---------+------------- public | tbl | table | test | 7104 kB | (1 row)

确保打开了慢查询的审计日志

```
postgres=> show log_min_duration_statement ;
log_min_duration_statement


1s
(1 row)
```

在数据库所在服务器的本地执行如下查询,很快就返回

digoal@localhost-> date; psql -c "select * from tbl" >/dev/null ; date; Fri Apr 22 11:59:52 CST 2016 Fri Apr 22 11:59:53 CST 2016

开启一个比较慢的网络,例如手机的2G网络,然后通过手机上网连接到数据库,执行同样的SQL,耗时变长了,因为网络不稳定,时快时慢。

```
digoal@digoal01-> date ; psql -h remote_host -p 1921 -U test postgres -c "select * from tbl" >/dev/null ; date;
Fri Apr 22 12:31:08 CST 2016
Fri Apr 22 12:31:18 CST 2016

digoal@digoal01-> date ; psql -h remote_host -p 1921 -U test postgres -c "select * from tbl limit 20000" >/dev/null ; date;
Fri Apr 22 12:34:30 CST 2016
Fri Apr 22 12:34:47 CST 2016
```

在数据库的日志中,可以看到慢查询的审计日志,耗时包含了SQL数据库将数据传输到客户端的时间。

2016-04-22 12:33:13.112 CST,"test","postgres",2680,"36.16.129.195:11812",5719a97f.a78,3,"SELECT",2016-04-22 12:33:03 CST,7/0,0,LOG,00000,"duration: 8300.129 ms statement: select * from tbl limit 20000",,,,,,,,"exec_simple_query, postgres.c:1149","psql"

对应的代码

src/backend/tcop/postgres.c

check_log_duration

检查是否需要输出duration。

同时计算从语句时间开始到当前时间的一个时间差,也就是SQL的duration。

有4个接口会记录时间

exec_execute_message 使用绑定变量时, execute sql的时间 exec_bind_message 使用绑定变量时, bind的时间 exec_parse_message 使用绑定变量时, parse的时间 exec_simple_query 未使用绑定变量时,执行SQL的时间

这几个函数的代码如下

```
/
* check_log_duration
* Determine whether current command's duration should be logged

* Returns:
* 0 if no logging is needed, 不需要记录SQL时间
* 1 if just the duration should be logged, 需要记录SQL时间, 但是不需要记录SQL详情
* 2 if duration and query details should be logged, 需要记录SQL时间,同时需要记录SQL 详情

* If logging is needed, the duration in msec is formatted into msec_str[],
* which must be a 32-byte buffer.

* was_logged should be TRUE if caller already logged query details (this
* essentially prevents 2 from being returned).
/
int
check_log_duration(char
msec_str, bool was_logged)
{
if (log_duration || log_min_duration_statement >= 0)
{
long secs;
int usecs;
int msecs;
bool exceeded;

    TimestampDifference(GetCurrentStatementStartTimestamp(),  
                        GetCurrentTimestamp(),  
                        &secs, &usecs);  // 语句开始到当前的时间  
    msecs = usecs / 1000;

    /*  
     * This odd-looking test for log_min_duration_statement being exceeded  
     * is designed to avoid integer overflow with very long durations:  
     * don't compute secs * 1000 until we've verified it will fit in int.  
     */  
    exceeded = (log_min_duration_statement == 0 ||  
                (log_min_duration_statement > 0 &&  
                 (secs > log_min_duration_statement / 1000 ||  
                  secs * 1000 + msecs >= log_min_duration_statement)));

    if (exceeded || log_duration)  
    {  
        snprintf(msec_str, 32, "%ld.%03d",  
                 secs * 1000 + msecs, usecs % 1000);  
        if (exceeded && !was_logged)  
            return 2;  
        else  
            return 1;  
    }  
}

return 0;

}
```

simple exec

```
/
* exec_simple_query

* Execute a "simple Query" protocol message.
/
static void
exec_simple_query(const char
query_string)
{
CommandDest dest = whereToSendOutput;
MemoryContext oldcontext;
List parsetree_list;
ListCell
parsetree_item;
bool save_log_statement_stats = log_statement_stats;
bool was_logged = false;
bool isTopLevel;
char msec_str[32];
...
/
* Create unnamed portal to run the query or queries in. If there
* already is one, silently drop it.
/
portal = CreatePortal("", true, true);
/ Don't display the portal in pg_cursors /
portal->visible = false;

    /*  
     * We don't have to copy anything into the portal, because everything  
     * we are passing here is in MessageContext, which will outlive the  
     * portal anyway.  
     */  
    PortalDefineQuery(portal,  
                      NULL,  
                      query_string,  
                      commandTag,  
                      plantree_list,  
                      NULL);

    /*  
     * Start the portal.  No parameters here.  
     */  
    PortalStart(portal, NULL, 0, InvalidSnapshot);

    /*  
     * Select the appropriate output format: text unless we are doing a  
     * FETCH from a binary cursor.  (Pretty grotty to have to do this here  
     * --- but it avoids grottiness in other places.  Ah, the joys of  
     * backward compatibility...)  
     */  
    format = 0;             /* TEXT is default */  
    if (IsA(parsetree, FetchStmt))  
    {  
        FetchStmt  *stmt = (FetchStmt *) parsetree;

        if (!stmt->ismove)  
        {  
            Portal      fportal = GetPortalByName(stmt->portalname);

            if (PortalIsValid(fportal) &&  
                (fportal->cursorOptions & CURSOR_OPT_BINARY))  
                format = 1; /* BINARY */  
        }  
    }  
    PortalSetResultFormat(portal, 1, &format);

    /*  
     * Now we can create the destination receiver object.  
     */  
    receiver = CreateDestReceiver(dest);  
    if (dest == DestRemote)  
        SetRemoteDestReceiverParams(receiver, portal);

    /*  
     * Switch back to transaction context for execution.  
     */  
    MemoryContextSwitchTo(oldcontext);

    /*  
     * Run the portal to completion, and then drop it (and the receiver).  
     */  
    (void) PortalRun(portal,  
                     FETCH_ALL,  
                     isTopLevel,  
                     receiver,  
                     receiver,  
                     completionTag);

    (*receiver->rDestroy) (receiver);

    PortalDrop(portal, false);

    if (IsA(parsetree, TransactionStmt))  
    {  
        /*  
         * If this was a transaction control statement, commit it. We will  
         * start a new xact command for the next command (if any).  
         */  
        finish_xact_command();  
    }

...
/
* Close down transaction statement, if one is open.
/
finish_xact_command();

/*  
 * If there were no parsetrees, return EmptyQueryResponse message.  
 */  
if (!parsetree_list)  
    NullCommand(dest);

/*  
 * Emit duration logging if appropriate.  
 */  
switch (check_log_duration(msec_str, was_logged))  
{  
    case 1:  
        ereport(LOG,  
                (errmsg("duration: %s ms", msec_str),  
                 errhidestmt(true)));  
        break;  
    case 2:  
        ereport(LOG,  
                (errmsg("duration: %s ms  statement: %s",  
                        msec_str, query_string),  
                 errhidestmt(true),  
                 errdetail_execute(parsetree_list)));  
        break;  
}

if (save_log_statement_stats)  
    ShowUsage("QUERY STATISTICS");

TRACE_POSTGRESQL_QUERY_DONE(query_string);

debug_query_string = NULL;

}
```

parse

```
/
* exec_parse_message

* Execute a "Parse" protocol message.
/
static void
exec_parse_message(const char
query_string, / string to execute /
const char stmt_name, / name for prepared stmt /
Oid
paramTypes, / parameter types /
int numParams) / number of parameters /
{
MemoryContext unnamed_stmt_context = NULL;
MemoryContext oldcontext;
List parsetree_list;
Node
raw_parse_tree;
const char commandTag;
List
querytree_list;
CachedPlanSource psrc;
bool is_named;
bool save_log_statement_stats = log_statement_stats;
char msec_str[32];
...
/

* Send ParseComplete.
*/
if (whereToSendOutput == DestRemote)
pq_putemptymessage('1');

/*  
 * Emit duration logging if appropriate.  
 */  
switch (check_log_duration(msec_str, false))  
{  
    case 1:  
        ereport(LOG,  
                (errmsg("duration: %s ms", msec_str),  
                 errhidestmt(true)));  
        break;  
    case 2:  
        ereport(LOG,  
                (errmsg("duration: %s ms  parse %s: %s",  
                        msec_str,  
                        *stmt_name ? stmt_name : "<unnamed>",  
                        query_string),  
                 errhidestmt(true)));  
        break;  
}

if (save_log_statement_stats)  
    ShowUsage("PARSE MESSAGE STATISTICS");

debug_query_string = NULL;

}

```

bind

```
/
* exec_bind_message

* Process a "Bind" message to create a portal from a prepared statement
/
static void
exec_bind_message(StringInfo input_message)
{
const char
portal_name;
const char stmt_name;
int numPFormats;
int16
pformats = NULL;
int numParams;
int numRFormats;
int16 rformats = NULL;
CachedPlanSource
psrc;
CachedPlan cplan;
Portal portal;
char
query_string;
char saved_stmt_name;
ParamListInfo params;
MemoryContext oldContext;
bool save_log_statement_stats = log_statement_stats;
bool snapshot_set = false;
char msec_str[32];
...
/

* Now we can define the portal.

* DO NOT put any code that could possibly throw an error between the
* above GetCachedPlan call and here.
/
PortalDefineQuery(portal,
saved_stmt_name,
query_string,
psrc->commandTag,
cplan->stmt_list,
cplan);

/* Done with the snapshot used for parameter I/O and parsing/planning */  
if (snapshot_set)  
    PopActiveSnapshot();

/*  
 * And we're ready to start portal execution.  
 */  
PortalStart(portal, params, 0, InvalidSnapshot);

/*  
 * Apply the result format requests to the portal.  
 */  
PortalSetResultFormat(portal, numRFormats, rformats);

/*  
 * Send BindComplete.  
 */  
if (whereToSendOutput == DestRemote)  
    pq_putemptymessage('2');

/*  
 * Emit duration logging if appropriate.  
 */  
switch (check_log_duration(msec_str, false))  
{  
    case 1:  
        ereport(LOG,  
                (errmsg("duration: %s ms", msec_str),  
                 errhidestmt(true)));  
        break;  
    case 2:  
        ereport(LOG,  
                (errmsg("duration: %s ms  bind %s%s%s: %s",  
                        msec_str,  
                        *stmt_name ? stmt_name : "<unnamed>",  
                        *portal_name ? "/" : "",  
                        *portal_name ? portal_name : "",  
                        psrc->query_string),  
                 errhidestmt(true),  
                 errdetail_params(params)));  
        break;  
}

if (save_log_statement_stats)  
    ShowUsage("BIND MESSAGE STATISTICS");

debug_query_string = NULL;

}
```

execute

```
/
* exec_execute_message

* Process an "Execute" message for a portal
/
static void
exec_execute_message(const char
portal_name, long max_rows)
{
CommandDest dest;
DestReceiver receiver;
Portal portal;
bool completed;
char completionTag[COMPLETION_TAG_BUFSIZE];
const char
sourceText;
const char *prepStmtName;
ParamListInfo portalParams;
bool save_log_statement_stats = log_statement_stats;
bool is_xact_command;
bool execute_is_fetch;
bool was_logged = false;
char msec_str[32];

...
/
* Okay to run the portal.
/
if (max_rows <= 0)
max_rows = FETCH_ALL;

completed = PortalRun(portal,  
                      max_rows,  
                      true, /* always top level */  
                      receiver,  
                      receiver,  
                      completionTag);

(*receiver->rDestroy) (receiver);

if (completed)  
{  
    if (is_xact_command)  
    {  
        /*  
         * If this was a transaction control statement, commit it.  We  
         * will start a new xact command for the next command (if any).  
         */  
        finish_xact_command();  
    }  
    else  
    {  
        /*  
         * We need a CommandCounterIncrement after every query, except  
         * those that start or end a transaction block.  
         */  
        CommandCounterIncrement();  
    }

    /* Send appropriate CommandComplete to client */  
    EndCommand(completionTag, dest);  
}  
else  
{  
    /* Portal run not complete, so send PortalSuspended */  
    if (whereToSendOutput == DestRemote)  
        pq_putemptymessage('s');  
}

/*  
 * Emit duration logging if appropriate.  
 */  
switch (check_log_duration(msec_str, was_logged))  
{  
    case 1:  
        ereport(LOG,  
                (errmsg("duration: %s ms", msec_str),  
                 errhidestmt(true)));  
        break;  
    case 2:  
        ereport(LOG,  
                (errmsg("duration: %s ms  %s %s%s%s: %s",  
                        msec_str,  
                        execute_is_fetch ?  
                        _("execute fetch from") :  
                        _("execute"),  
                        prepStmtName,  
                        *portal_name ? "/" : "",  
                        *portal_name ? portal_name : "",  
                        sourceText),  
                 errhidestmt(true),  
                 errdetail_params(portalParams)));  
        break;  
}

if (save_log_statement_stats)  
    ShowUsage("EXECUTE MESSAGE STATISTICS");

debug_query_string = NULL;

}
```

PostgreSQL 许愿链接

您的愿望将传达给PG kernel hacker、数据库厂商等, 帮助提高数据库产品质量和功能, 说不定下一个PG版本就有您提出的功能点. 针对非常好的提议,奖励限量版PG文化衫、纪念品、贴纸、PG热门书籍等,奖品丰富,快来许愿。开不开森.

9.9元购买3个月阿里云RDS PostgreSQL实例

PostgreSQL 解决方案集合

德哥 / digoal's github - 公益是一辈子的事.

digoal's wechat

文章转载自digoal,如果涉嫌侵权,请发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。

评论