Python自动化报告系统

原创 red_hope 2023-10-28

361

一、代码主要框架说明

本次公开：

第一个：巡检主函数

第二个：word模板自动生成代码。注：这个word模板生成系统完全是坐着自写，国内写这个的比较少，是完全用python一点点写出一个word模板出来，不是先用word或者wps先做个模板，然后用python去填空。当然，两者各有优缺点。

主函数调用word模板自动生成自动化报告系统，是巡检系统最核心的内容。

二、主函数代码

代码的注释很详细，请大家自行越多，就不做过多解读了

# power by yugang 13909590025
# Ningxia Chengda Data Technology Co., Ltd
# 主函数为通用函数，可以巡检各种类型不同的数据库、主机及网络设备，没有类型限制，不同在于要采用不通的知识库
import re
from docx import Document
import xlwt
import time
import xlrd
# 以下是自定义方法
from basc_method import num_to_char, mydocx_table_format, get_db_name_input, get_all_fengefu, get_xj_file_name, \
    set_buff_replace_one, set_buff_replace_two, set_buff_replace_three,get_title,list_dir,get_hostname
from excel_creat import excel_create_get_program
from get_knowledge import get_knowledge, get_auto_check_title, get_how_todo
from get_word_format import get_word_format_title_one, get_word_format_add_table, set_word_excel_qy
from show_db_time2 import show_db_time
from show_me_a_table import show_table_all_program
from word_template import word_template_my_favorite

if __name__ == '__main__':
    # #####################################全局变量声明区#####################################################
    nxcdtcl_db_list = []  # 记录所有巡检数据库的清单
    nxcdtcl_wt_all = []  # 记录所有问题
    nxcdtcl_report_type = 'THIN'  # 另外一种模式FULL，report_type:报告输出类型，如果是FUll，输出详细版本
    # nxcdtcl_report_type = 'FULL'  # 另外一种模式FULL，report_type:报告输出类型，如果是FUll，输出详细版本
    nxcdtcl_excel_col_num = 0  # excelsheet的行号
    col_number = 0  # excelsheet_check_program的行号
    # #####################################全局变量声明结束#####################################################

    # ######################################WORD及EXCEL定义区####################################################
    my_doc = Document()  # 创建一个word实例，如果word的定义出现在这个位置，表示所有log输出一个汇总报告
    workbook = xlwt.Workbook(encoding='utf-8', style_compression=0)  # 创建一个excel实例，将问题同时输出到excel中，便于汇总
    sheet = set_word_excel_qy(my_doc, workbook) # 设置word及表格的表头部分
    sheet_check_program = workbook.add_sheet(f'巡检项目', cell_overwrite_ok=True)
    # ######################################WORD及EXCEL定义区结束####################################################
    file_dir = 'D:\\oracle巡检\\2022-9-13'
    file_all = list_dir(file_dir)
    # 开始逐个分析巡检文件
    for file_count in range(len(file_all)):
        # ######################################局部变量声明区，这些变量在循环内部使用############################
        nxcdtcl_jb_xunjian_title = ''  # word的各级标题变量，也就是巡检项目
        nxcdtcl_jb_title_list = []  # 置为空
        nxcdtcl_jb_cost_list = []  # 记录dbcost
        nxcdtcl_jb_logicread_list = []  # 记录逻辑读
        nxcdtcl_jb_physicread_list = []  # 记录物理读
        nxcdtcl_jb_parsehards_list = [] # 记录硬解析情况
        nxcdtcl_jb_wt_host_list = []  # 将巡检发现的主机问题，记录到 wt_all list中
        nxcdtcl_jb_wt_db_list = []  # 将巡检发现的数据库问题，记录到 wt_all list中
        nxcdtcl_jb_fenge_list = []  # 用于存放分隔符,分割符用于分割不同巡检项目
        nxcdtcl_host_str = ''  # 用于存放主机名称
        # ######################################局部变量声明区结束######################################

        file = file_all[file_count]
        file_str = get_xj_file_name(file, file_count) # 获取巡检文件名称
        file_open_first = open(file, 'r', encoding='UTF-8')  # 使用utf8编码打开，第一次打开，读取全部分隔符
        nxcdtcl_jb_fenge_list = get_all_fengefu(file_open_first)
        file_open_first.close()  # 用完文件关闭是个好习惯
        # ######################################  读取分隔符结束  ######################################

        f = open(file, 'r', encoding='UTF-8')         # 再打开文件，开始正式读取巡检内容
        buff = f.read()
        buff = set_buff_replace_one(buff)  # 将多余的SQL>,换行格式去掉
        db_name = get_db_name_input(buff)  # 获取数据库名称
        nxcdtcl_db_list.append(db_name)
        get_word_format_title_one(my_doc,f'{num_to_char(file_count + 1)}、{db_name}')  # 数据库名称作为一级标题
        # ######################################  打开文件并设置一级标题（标题是数据库名称）结束  ############

        tile_num = 1  # 标题编号
        for j in range(len(nxcdtcl_jb_fenge_list)):
            k = j + 1  # 设置K，是为了防止j溢出，因为最后一行是没有j+1的
            if k < len(nxcdtcl_jb_fenge_list):
                fenge1 = nxcdtcl_jb_fenge_list[j]  # fenge1 是当前巡检的标题内容
                fenge2 = nxcdtcl_jb_fenge_list[j + 1]  # fenge2 是下一个巡检的标题内容
                fenge1 = set_buff_replace_two(fenge1)  # 简单格式化，去掉多余信息
                fenge2 = set_buff_replace_two(fenge2)
                print(f'{k}:开始分析:{fenge1.lstrip()}相关的内容')
                nxcdtcl_jb_xunjian_title, check_type = get_title(fenge1)  # 将英文格式化为中文
                if nxcdtcl_jb_xunjian_title == 'PLNOCHECK': # 判断标题是否需要跳过，是pleast not check，简写：PLNOCHECK，就跳过
                    print(f'  {fenge1.replace("#","")}设置为不检查，跳过......跳过......跳过......跳过......跳过......跳过......')
                    continue
                big_number = num_to_char(tile_num)  # 向word中写，要用大写的
                tile_num = tile_num + 1
                # 获取hostname,用于标示主机检查是哪个节点

                if 'Check file system' in fenge1:
                    nxcdtcl_host_str = get_hostname(fenge1)
                if nxcdtcl_jb_xunjian_title not in nxcdtcl_jb_title_list:
                    nxcdtcl_jb_title_list.append(nxcdtcl_jb_xunjian_title)
                # 获取当前巡检项内容
                pat = re.compile(fenge1 + '(.*)' + fenge2, re.S)
                result = pat.findall(buff)
                # 把三个图形先生成，后面插入到word中，这个功能依赖联网，当前机器要连接互联网
                if nxcdtcl_jb_xunjian_title in '数据库繁忙情况' :
                    nxcdtcl_jb_cost_list = show_db_time(result, db_name, 'dbtime') #dbtime、logicalreads、physicalreads
                if nxcdtcl_jb_xunjian_title in '数据库逻辑读' :
                    nxcdtcl_jb_logicread_list = show_db_time(result, db_name, 'logicalreads') #dbtime、logicalreads、physicalreads
                if nxcdtcl_jb_xunjian_title in '数据库物理读' :
                    nxcdtcl_jb_physicread_list = show_db_time(result, db_name, 'physicalreads') #dbtime、logicalreads、physicalreads
                if  nxcdtcl_jb_xunjian_title in '硬解析情况':
                    nxcdtcl_jb_parsehards_list = show_db_time(result, db_name,'parsehards')



                # 读取当前巡检项内容读，进行智能分析
                for match1 in result:
                    match1 = set_buff_replace_three(match1)
                    # 如果报告类型是FULL，打印巡检项目到表格中：
                    if nxcdtcl_report_type == 'FULL':
                        title = f'巡检{big_number}:{nxcdtcl_jb_xunjian_title}'
                        my_doc.add_heading(title, level=2)
                        get_word_format_add_table(my_doc, '1、巡检结果：', match1)
                # 利用知识库对巡检内容match1进行知识检索，发现巡检项是否有问题，这是关键内容
                str_output_now = get_knowledge(nxcdtcl_jb_xunjian_title, match1)
                # 如果报告类型是FULL，输出命令检查的详细信息
                if nxcdtcl_report_type == 'FULL':
                    get_word_format_add_table(my_doc, '2、分析结果：', str_output_now)
                    my_doc.add_heading("此项目由ORA_CHECK智能分析", level=5)
                # 11111111 是巡检发现问题的输出，如果没有问题，输出的是00000000
                if str_output_now[1] == '11111111':
                    # 如发现问题,记录在List:NXCDTCL_WT_ALL，便于在问题汇总中自动列出
                    # 同时，如果检查类型是操作系统，则需要在巡检标题中表明是哪个host，因此加了host_str标签用于区分
                    if check_type == 'OS':
                        progrom_str = f'{nxcdtcl_jb_xunjian_title}:{nxcdtcl_host_str}:{str_output_now[0]}'
                        nxcdtcl_jb_wt_host_list.append(progrom_str)
                        nxcdtcl_wt_all.append({"数据库名称：": db_name, "巡检内容：": f'{nxcdtcl_jb_xunjian_title}'
                                                f'({nxcdtcl_host_str})',"优化项:": str_output_now[0],
                                               "优化措施：":get_how_todo(progrom_str)})
                    else:
                        progrom_str = f'{nxcdtcl_jb_xunjian_title}:{str_output_now[0]}。'
                        nxcdtcl_jb_wt_db_list.append(progrom_str)
                        nxcdtcl_wt_all.append({"数据库名称：": db_name, "巡检内容：": nxcdtcl_jb_xunjian_title,
                                               "优化项:":str_output_now[0],"优化措施：":get_how_todo(progrom_str)})
        # ######################################文件分析结束####################################################
        # 开始按照模板生成word，此处采用
        excel_col_number = word_template_my_favorite(my_doc, db_name, nxcdtcl_jb_wt_host_list, nxcdtcl_jb_wt_db_list,
                                                     buff, sheet, nxcdtcl_jb_cost_list, nxcdtcl_jb_physicread_list,
                                                     nxcdtcl_jb_logicread_list, nxcdtcl_jb_parsehards_list, nxcdtcl_excel_col_num)
        nxcdtcl_excel_col_num = excel_col_number + 1
        # 如果需要采用其它模板，可以编辑新的word_template_my_favorite_2代替，即可生成不同形式的模板

        # #####################################下面保存每套库巡检的项目############################################
        check_title_list = get_auto_check_title()
        for title_str in check_title_list:
            excel_create_get_program(sheet_check_program, db_name, title_str, col_number)
            col_number = col_number + 1
        # #####################################保存每套库巡检的项目完成############################################

    # #####################################下面生成所有巡检发现的问题到word中############################################
    get_word_format_title_one(my_doc, f'{num_to_char(file_count + 2)}、巡检问题汇总')
    show_table_all_program(nxcdtcl_wt_all, my_doc)
    # #####################################生成所有巡检发现的问题到word中完毕############################################

    # #####################################保存word及excel##########################################################
    # 如果把下面的缩进到巡检内，则一个log，一个word一个报告
    try:
        time_flag = time.time()
        print('正在生成word报告...')
        my_doc.save(f'{file_dir}\ORACLE巡检报告_{nxcdtcl_report_type}_{time_flag}.docx')
    except BaseException as e:
        print(f'CHKERR-100:保存巡检word文档失败：{e}')
    # 表格excel
    try:
        print('正在生成excel报告...')
        workbook.save(f'{file_dir}\ORACLE巡检报告_{time_flag}.xls')

    except BaseException as e:
        print(f'CHKERR-100:保存巡检excel文档失败：{e}')
        print(time.localtime())
    f.close()
    print(f'#####分析第{file_count + 1}个文件完成#####')
    # bye-bye power by yugang 13909590025 all right reserved########################################################
复制

三、word自动生成部分。

word文档模板代码：

from docx.shared import Cm
from docx.shared import Inches
import numpy as np
from basc_method import mydocx_table_format, tabBgColor, check_out_list
from excel_creat import excel_create_content
from get_knowledge import get_how_todo
from get_word_format import get_word_format_title_two, input_word_text, get_word_format_title_three, \
    get_word_format_add_table_plus, get_word_format_add_table
from show_me_a_table import show_table_hostinfo, show_table_dbinfo, show_table_oracle_event


# 模板一
def word_template_my_favorite(my_doc,db_name, wt_all_host, wt_all_db, buff, sheet, cost_list, physicalreads_list,
                       logicalreads_list, nxcdtcl_jb_parsehards_list,excel_col_number_input):
    # 以下开始将项目问题写入到word及excel中
    excel_col_number = excel_col_number_input
    get_word_format_title_two(my_doc, f'1、{db_name}主机系统健康检查')
    input_word_text(my_doc, '以下主要是对本数据库系统所在的宿主机做一个简要的健康检查和评估，如果发现问题，请主机方维护工程师做进一步的检查与评估。')
    get_word_format_title_three(my_doc, '1.1 主机基本信息')
    input_word_text(my_doc, '主机基本信息如下表所示：')
    table_return = show_table_hostinfo(buff, my_doc, db_name)  # 用表格形式开始显示数据库主机及数据库的基本信息
    cpu_count = table_return[0]
    get_word_format_title_three(my_doc, '1.2 磁盘分区空间检查')
    for file_i in range(len(wt_all_host)):
        if '主机文件系统巡检' in wt_all_host[file_i]:
            input_word_text(my_doc, wt_all_host[file_i])

    # ################################列出主机其它所有问题################################
    get_word_format_title_three(my_doc, '1.3 主机其它检问题汇总')
    excel_rowid = 0  # excel_col_number_input 是行号，excel_rowid 是行号
    host_problem = 1
    # 开始对主机问题逐一列出，同时写入到word及excel
    for k in range(len(wt_all_host)):
        wt_now = wt_all_host[k]
        if check_out_list(wt_now):
            continue
        how_to_do = get_word_format_add_table_plus(my_doc, wt_all_host, k, host_problem)  # 将有问题的，写到的word的表格中
        host_problem = host_problem + 1
        wt_display = f'{wt_all_host[k]}'
        excel_col_number = excel_col_number + 1
        title_current = wt_display.split(":")[0]
        excel_rowid = excel_rowid + 1
        excel_create_content(sheet, excel_col_number, excel_rowid, db_name, title_current, wt_display, how_to_do,
                             table_return[2], table_return[3], table_return[4], table_return[5], table_return[6])

    if host_problem == 1:
        input_word_text(my_doc, f'{db_name}主机巡检未发现其它问题，正常。')
    # ################################列出主机其它所有问题-完毕################################

    # 以上显示的是主机的，一下开始显示数据库的基本信息及巡检问题
    get_word_format_title_two(my_doc, f'2、{db_name}数据库系统健康检查')
    input_word_text(my_doc, '以下章节主要是对数据库使用情况做下健康检查分析。')
    get_word_format_title_three(my_doc, '2.1 数据库基本信息')
    input_word_text(my_doc, '数据库基本信息如下表所示：')
    show_table_dbinfo(buff, my_doc, db_name)
    get_word_format_title_three(my_doc, '2.2 表空间使用状况')
    for file_i in range(len(wt_all_db)):
        if '表空间使用率检查' in wt_all_db[file_i]:
            input_word_text(my_doc, wt_all_db[file_i])
    get_word_format_title_three(my_doc, '2.3 重做日志状况')
    for file_i in range(len(wt_all_db)):
        if '在线日志大小检查' in wt_all_db[file_i]:
            input_word_text(my_doc, wt_all_db[file_i])
    for file_i in range(len(wt_all_db)):
        if '日志组切换频率检查' in wt_all_db[file_i]:
            input_word_text(my_doc, wt_all_db[file_i])
    get_word_format_title_three(my_doc, '2.4 失效对象情况')
    for file_i in range(len(wt_all_db)):
        if '失效对象检查' in wt_all_db[file_i]:
            input_word_text(my_doc, wt_all_db[file_i])
    get_word_format_title_three(my_doc, '2.5 数据库SCN信息')
    for file_i in range(len(wt_all_db)):
        if 'SCN天花板时间检查' in wt_all_db[file_i]:
            input_word_text(my_doc, wt_all_db[file_i])
    get_word_format_title_three(my_doc, '2.6 不可用索引检查')
    for file_i in range(len(wt_all_db)):
        if '失效索引检查' in wt_all_db[file_i]:
            input_word_text(my_doc, wt_all_db[file_i])
    get_word_format_title_three(my_doc, '2.7 ASM磁盘组检查')
    for file_i in range(len(wt_all_db)):
        if 'ASM磁盘组检查' in wt_all_db[file_i]:
            input_word_text(my_doc, wt_all_db[file_i])

    # ################################开始性能分析部分################################
    performance_title_number = 1
    get_word_format_title_two(my_doc, f'3、{db_name}库性能分析')
    input_word_text(my_doc, '以下章节主要是数据库性能进行分析，结合数据库繁忙程度、逻辑读、物理读趋势图，使管理员对数据库压力情况'
                            '有直观了解。')
    if len(cost_list) > 2:
        get_word_format_title_three(my_doc, f'3.{performance_title_number}{db_name}繁忙趋势')
        performance_title_number = performance_title_number + 1
        try:
            my_doc.add_picture(f'D:\巡检自动化\dbtime_list\\dbtime_line_{db_name}.gif', width=Inches(6.0),
                               height=Inches(2.2))
            max_cost = round(np.max(cost_list), 2)
            # avg_cost = round(np.mean(cost_list),2)
            if int(max_cost) < int(cpu_count):
                paragraph = my_doc.add_paragraph(f'{db_name}库计算高峰期数据库压力，用DB Time/Elapsed值与CPU数相比较，在上图高峰期时段，'
                                                 f'数据库cost最大值为{max_cost},少于cpu数量：{cpu_count},数据库总体压力正常。')
            if int(max_cost) > int(cpu_count):
                paragraph = my_doc.add_paragraph(f'{db_name}库计算高峰期数据库压力，用DB Time/Elapsed值与CPU数相比较，在上图高峰期时段，'
                                                 f'数据库cost最大值为{max_cost},大于cpu数量：{cpu_count},'
                                                 f'数据库高峰时段cpu算力不足，需合理分配每个节点压力。')
        except BaseException as e:
            print(f'数据采集失败，绘制DBCOST图形失败：{e}')
            my_doc.add_paragraph('采集DBCOST数据失败，不影响巡检结果，可忽略。')
    if len(logicalreads_list) > 2:
        get_word_format_title_three(my_doc, f'3.{performance_title_number}{db_name}逻辑读趋势')
        performance_title_number =  performance_title_number + 1
        try:
            my_doc.add_picture(f'D:\巡检自动化\dbtime_list\\logicalreads_line_{db_name}.gif', width=Inches(6.0),
                               height=Inches(2.2))
            max_cost = round(np.max(logicalreads_list), 2)
            paragraph = my_doc.add_paragraph(f'{db_name}库计算高峰期数据库逻辑读最大值为：{max_cost}M/S,正常。')

        except BaseException as e:
            print(f'数据采集失败，绘制逻辑读图形失败：{e}')
            my_doc.add_paragraph('采集逻辑读数据，不影响巡检结果，可忽略。')

    if len(physicalreads_list) > 2:
        get_word_format_title_three(my_doc, f'3.{performance_title_number}{db_name}物理读趋势')
        performance_title_number = performance_title_number + 1
        try:
            my_doc.add_picture(f'D:\巡检自动化\dbtime_list\\physicalreads_line_{db_name}.gif', width=Inches(6.0),
                               height=Inches(2.2))
            max_cost = round(np.max(physicalreads_list), 2)
            paragraph = my_doc.add_paragraph(f'{db_name}库计算高峰期数据库物理读最大值为：{max_cost}M/S,正常。')

        except BaseException as e:
            print(f'数据采集失败，绘制物理读图形失败：{e}')
            my_doc.add_paragraph('采集物理读数据，不影响巡检结果，可忽略。')

    if len(nxcdtcl_jb_parsehards_list) > 2:
        get_word_format_title_three(my_doc, f'3.{performance_title_number}{db_name}硬解析趋势图')
        performance_title_number = performance_title_number + 1
        try:
            my_doc.add_picture(f'D:\巡检自动化\dbtime_list\\parsehards_line_{db_name}.gif', width=Inches(6.0),
                               height=Inches(2.2))
            max_cost = round(np.max(physicalreads_list), 2)
            paragraph = my_doc.add_paragraph(f'{db_name}库硬解析最大值为：{max_cost}次/S,请注意观察硬解析趋势，防范sharepool碎片。')

        except BaseException as e:
            print(f'数据采集失败，绘制硬解析图形失败：{e}')
            my_doc.add_paragraph('采集硬解析数据失败，不影响巡检结果，可忽略。')

    # 等待事件检查
    get_word_format_title_three(my_doc, f'3.{performance_title_number}{db_name}等待事件')
    performance_title_number = performance_title_number + 1
    for file_i in range(len(wt_all_db)):
        if '等待事件检查' in wt_all_db[file_i]:
            show_str = wt_all_db[file_i].replace('。','')
            input_word_text(my_doc, show_str)
            show_table_oracle_event(buff, my_doc, db_name)
            input_word_text(my_doc, get_how_todo(wt_all_db[file_i]))

    get_word_format_title_three(my_doc, f'3.{performance_title_number}{db_name} 性能较差SQL')
    performance_title_number = performance_title_number + 1
    for file_i in range(len(wt_all_db)):
        if '高消耗SQL' in wt_all_db[file_i]:
            if len(wt_all_db[file_i]) >= 5 and '无问题' not in wt_all_db[file_i]:
                get_word_format_add_table(my_doc, '', wt_all_db[file_i])
            else:
                input_word_text(my_doc,'数据库性能良好，采样期间未抓取到高消耗SQL')
    # 性能分析部分结束
    # ################################性能部分结束################################

    # ################################参数检查################################
    # 参数检查部分开始
    get_word_format_title_two(my_doc, f'4、{db_name}参数分析')
    input_word_text(my_doc, '以下章节主要针对主要参数进行分析，提供ORACLE数据库的参数调整建议：')
    for file_i in range(len(wt_all_db)):
        if '数据库参数检查' in wt_all_db[file_i]:
            get_word_format_add_table(my_doc, '', wt_all_db[file_i])
    # ################################ 参数检查部分结束################################

    # ################################将所有数据库问题汇总 ################################
    get_word_format_title_two(my_doc, f"5 、{db_name}数据库巡检问题总结")
    db_pro_count = 1
    for k in range(len(wt_all_db)):
        wt_now = wt_all_db[k]
        if check_out_list(wt_now):
            continue
        excel_col_number = excel_col_number + 1
        table_hz = my_doc.add_table(rows=2, cols=1, style='Table Grid')
        tabBgColor(table_hz, 1, '8DB3E2')
        wt_display = f'{wt_all_db[k]}'
        table_hz.rows[0].cells[0].text = f'{db_pro_count}、{wt_display}'  # 将问题显示到表格中，即将wt_display赋值给word的表格中
        how_to_do = get_how_todo(wt_display)  # 显示处理措施，处理措施从 get_how_todo（）知识库函数中读取
        db_pro_count = db_pro_count + 1  # 问题数量+1
        if len(how_to_do) <= 5:
            how_to_do = '知识不断完善中，请联系维护人员进一步跟进处理。'
        table_hz.rows[1].cells[0].text = f'处理措施：\n {how_to_do}'
        mydocx_table_format(table_hz, 8)  # 格式化表格
        title_current = wt_display.split(":")[0]
        excel_rowid = excel_rowid + 1
        # 将问题编号输出到excel中
        excel_create_content(sheet, excel_col_number, excel_rowid, db_name, title_current, wt_display, how_to_do,
                             table_return[2], table_return[3], table_return[4], table_return[5], table_return[6])

    if db_pro_count == 1:
        paragraph1 = my_doc.add_paragraph()
        paragraph1.add_run(f'{db_name}数据库本次巡检正常,未发现问题。')
        paragraph1.paragraph_format.first_line_indent = Cm(0.74)

    return excel_col_number
复制

四、其它

因涉及大量小函数，功能较为简单，但数量太多，这里就不再一一贴出，感兴趣的可以查看之前的在墨天伦上的我的其它文章，对主要过程均做了实现：例如画图、知识库等。

「喜欢这篇文章，您的关注和赞赏是给作者最好的鼓励」

关注作者

Python自动化报告系统

一、代码主要框架说明

二、主函数代码

三、word自动生成部分。

四、其它

评论

相关阅读