본문 바로가기

etc

fxcop warnings 데이터 가져오기 스크립트

C# 정적시험을 하기 위하여 fxcop을 사용하기로 하였다. 어떤 warning list를 가지고 있는지 조사하려고 웹사이트를 찾았으나 보기 좋게 excel로 전체 데이터는 제공되지 않았다. 간단히 python을 이용하여 warning 정보를 긁어오는 스크립트를 만들어 보았다. html 파싱을 위하여 BeautifulSoup4 모듈을 사용하였고, Excel로 저장하기 위하여 xlwt 모듈을 사용하였다.

BeautifulSoup의 Encoding 문제가 발생하여 지체되었으나 대충 4시간 정도 걸린거 같다. BeautifulSoup의 Encoding 문제 해결은 링크를 확인한다.

Cause, HowToFix 등 필드에 있는 Tag 들은.. 그냥 엑셀의 바꾸기로 바꿨다.ㅋㅋㅋ(귀찮아!)


아래의 스크립트를 수행하면, fxcop가 관리하는 warning 목록을 아래와 같은 Excel 파일로 얻을 수 있다.



# -*- coding: utf-8 -*-
"""
Created on Mon Apr 21 13:19:12 2014

@author: Joohyun Lee
"""
import os
import sys
import bs4
import urllib
import xlwt

urls = ['http://msdn.microsoft.com/en-US/library/ms182125(v=vs.80).aspx',
        'http://msdn.microsoft.com/en-US/library/ms182184(v=vs.80).aspx',
        'http://msdn.microsoft.com/en-US/library/ms182193(v=vs.80).aspx',
        'http://msdn.microsoft.com/en-US/library/ms182218(v=vs.80).aspx',
        'http://msdn.microsoft.com/en-US/library/ms182232(v=vs.80).aspx',
        'http://msdn.microsoft.com/en-US/library/ms182260(v=vs.80).aspx',
        'http://msdn.microsoft.com/en-US/library/ms182282(v=vs.80).aspx',
        'http://msdn.microsoft.com/en-US/library/ms182296(v=vs.80).aspx',
        'http://msdn.microsoft.com/en-US/library/ms182324(v=vs.80).aspx']

base_url = 'http://msdn.microsoft.com'

def read_main_warning(t_url, warning_list):
    print("Try to connect %s ==> " % t_url,end="")
    try:
        f = urllib.request.urlopen(t_url)
    except:
        print("Error!")
        return False
        
    print("Success!")
    
    page_content = f.read()
    
    soup = bs4.BeautifulSoup(page_content)
    soup.renderContents('utf-8')
    
    sub_warinings = soup.find_all(attrs={"class" : "toclevel2" })
    
    i = 1    
    
    for sub_warning in sub_warinings:
        sub_warning_des = sub_warning.a.string
        sub_warning_url = base_url + sub_warning.a['href']
        
        print("%d : %s" % (i, sub_warning_des))
        
        td = read_sub_warning(sub_warning_url)
        if td == None:
            continue
        
        td["RuleDesc1"] = sub_warning_des
        
        warning_list.append(td)
        
        i = i + 1

def read_sub_warning(t_url):
    sub_warning_data = {}
    print("\tTry to connect %s ==> " % t_url,end="")
    try:
        f = urllib.request.urlopen(t_url)
    except:
        print("Error!")
        return None
        
    print ("Success")
    
    page_content = f.read()
    
    soup = bs4.BeautifulSoup(page_content)
    soup.renderContents('utf-8')
        
    soup_table_section = soup.find(attrs={"class":"tableSection"})
    
    # get TypeName
    t_tr = soup_table_section.table.tr
    sub_warning_data["TypeName"] = t_tr.td.next_sibling.next_sibling.p.string
    
    # get CheckId
    t_tr = soup_table_section.table.tr.next_sibling
    sub_warning_data["CheckId"] = t_tr.td.next_sibling.next_sibling.p.string
    
    # get Category
    t_tr = soup_table_section.table.tr.next_sibling.next_sibling
    sub_warning_data["Category"] = t_tr.td.next_sibling.next_sibling.p.string    
    
    # get BreakingChange
    t_tr = soup_table_section.table.tr.next_sibling.next_sibling.next_sibling
    sub_warning_data["BreakingChange"] = t_tr.td.next_sibling.next_sibling.p.string
    
    # get Cause
    soup_detail = soup.find(attrs={"name":"sectionToggle0"})
    sub_warning_data["Cause"] = str(soup_detail.next_sibling.next_sibling)
    
    # get RuleDescription
    soup_detail = soup.find(attrs={"name":"sectionToggle1"})
    sub_warning_data["RuleDesc2"] = str(soup_detail.next_sibling.next_sibling)
    
    # get HowToFix
    soup_detail = soup.find(attrs={"name":"sectionToggle2"})
    sub_warning_data["HowToFix"] = str(soup_detail.next_sibling.next_sibling)
    
    # set URL
    sub_warning_data["URL"] = t_url
    
    return sub_warning_data

def write_to_excel(file_name, warning_list):
    wbk = xlwt.Workbook('utf-8')
    ws = wbk.add_sheet("Data")
    
    row = 0
    ws.write(row, 0, 'Category')
    ws.write(row, 1, 'TypeName')
    ws.write(row, 2, 'CheckId')
    ws.write(row, 3, 'BreakingChange')
    ws.write(row, 4, 'RuleDesc1')
    ws.write(row, 5, 'RuleDesc2')
    ws.write(row, 6, 'Cause')
    ws.write(row, 7, 'HowToFix')
    ws.write(row, 8, 'URL')
    
    row = 1
    
    for item in warning_list:
        ws.write(row, 0, item['Category'])
        ws.write(row, 1, item['TypeName'])
        ws.write(row, 2, item['CheckId'])
        ws.write(row, 3, item['BreakingChange'])
        ws.write(row, 4, item['RuleDesc1'])
        try:
            ws.write(row, 5, item['RuleDesc2'])
        except:
            ws.write(row, 5, "Error!")
            
        ws.write(row, 6, item['Cause'])
        ws.write(row, 7, item['HowToFix'])
        ws.write(row, 8, item['URL'])
        
        row = row + 1
        
    wbk.save(file_name)

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("fxcop_reader [filename]")
        os._exit(-1)
        
    file_name = sys.argv[1]
    
    warning_list = []
    
    for t_url in urls:
        read_main_warning(t_url, warning_list)
        
    write_to_excel(file_name, warning_list)