Python实现一个代码行数统计工具(易拓展到其他语言版)
前情提要及单种语言的代码行统计思路,详见我另一篇文章:Python实现一个代码行数统计工具(以C/C++为例)
关于如何达到容易拓展到其他语言的效果,我想到的方法是用列表将不同语言的续行符、注释符、字符串符存起来,靠文件后缀来确定该用哪种符号。这只是我自己的一种思路,但感觉思路很一般,有想法的读者们可以进一步的优化。
具体代码如下:
from queue import Empty
import sys
import os
from enum import Enum
import time
from unittest.mock import patch
import threadingclass Counter:Line_numbers = 0Code = 0total_comment_numbers = 0Blanks = 0# -------------------------------------------------------------------------# 扩展到其他语言代码统计时需要维护的代码块extendList = [".h", ".cpp", ".hpp", ".rb"] # 不同语言代码文件扩展名列表# -------------------------------------------------------------------------def get_filelist(dir, Filelist):# 获取当前目录下所有子目录及所有cpp文件newDir = dirdirList = []if os.path.isfile(dir) and os.path.splitext(dir)[1] in Counter.extendList:#文件扩展名属于列表中其中一种时,文件路径添加到filelist中Filelist.append(dir)# 路径为目录时,遍历目录下的所有文件和目录elif os.path.isdir(dir):for s in os.listdir(dir):newDir=os.path.join(dir, s)Counter.get_filelist(newDir, Filelist)return Filelistdef CodeCounter(filename, path):codes_numbers = 0empty = 0comment_numbers = 0extendIdx = Counter.extendList.index(os.path.splitext(filename)[1])# -------------------------------------------------------------------------# 扩展到其他语言代码统计时需要维护的代码块,各标志符列表下标与源代码文件后缀列表(Counter.extendList)下标一致LineCommentSymbol = ["//", "//", "//", "#"] # 进入行注释标识BlockCommentSymbol = ["/*", "/*", "/*", "=begin"] # 进入块注释标识ExitBlockCommentSymbol = ["*/", "*/", "*/", "=end"] # 退出块注释标识CharStringSymbol = [["\""], ["\""], ["\""], ["\"", "\'"]] # 进入字符串标识CombinationSymbol = ["\\", "\\", "\\", "\\"] # 续行符标识# -------------------------------------------------------------------------# 打开文件并获取所有行fp = open(filename, encoding = 'gbk', errors = 'ignore')lines = fp.readlines()row_cur_status = Status.Common # 设置初始状态为Commontemp = ""for line in lines:line = temp + lineline = line.strip("\r\t ")if line[-len(CombinationSymbol[extendIdx]) - 1:] == CombinationSymbol[extendIdx]: # 检查末尾是否有续行符,若有续行符,则保存当前line值,准备与下一行进行拼接temp += line[:-len(CombinationSymbol[extendIdx]) - 1]continueelse:temp = ""lineLen = len(line)if lineLen == 1 and line == '\n':#空行,空行数量+1empty += 1# f.write('\n')continueskipStep = 0 # 需要跳过的字符数,用于跳过一些符号,例如遇到//时进入行注释状态,跳过到//后面第一个字符is_effective_code = False # 有效代码行标识for i in range(lineLen):if skipStep != 0:skipStep -= 1continueif row_cur_status == Status.Common:# 普通状态下for symbol in CharStringSymbol[extendIdx]:#检查是否进入字符串模式symbolLen = len(symbol)if(i + symbolLen > lineLen):continueif line[i:i + symbolLen] == symbol:is_effective_code = Truerow_cur_status = Status.CharString # 切换到字符串状态CharStringStart = symbol # 记录字符串开始时的标识符,用于判断后续退出位置breakif row_cur_status != Status.Common:continue# 检查是否进入行注释状态symbol = LineCommentSymbol[extendIdx]symbolLen = len(symbol)if (i + symbolLen <= lineLen) and line[i:i + symbolLen] == symbol:row_cur_status = Status.LineComment # 切换到行注释状态skipStep = symbolLen - 1continue# 检查是否进入块注释状态symbol = BlockCommentSymbol[extendIdx]symbolLen = len(symbol)if (i + symbolLen <= lineLen) and line[i:i + symbolLen] == symbol:row_cur_status = Status.BlockComments # 切换到块注释状态skipStep = symbolLen - 1continueif line[i] == '\n':continueif line[i] == ' ':continueelse:is_effective_code = True # 代码行有效continueelif row_cur_status == Status.CharString:#字符串状态下if line[i:i + len(CharStringStart)] == CharStringStart:row_cur_status = Status.Common # 字符串结束,切换回普通状态 skipStep = len(CharStringStart) - 1# is_effective_code = Truecontinueelse:continueelif row_cur_status == Status.BlockComments:# 块注释状态下symbol = ExitBlockCommentSymbol[extendIdx]symbolLen = len(symbol)if (i + symbolLen <= lineLen) and line[i:i + symbolLen] == symbol:# 退出块注释,注释行加上块注释的最后一行,切换回普通状态 comment_numbers += 1row_cur_status = Status.CommonskipStep = symbolLen - 1continueelse:continue# 单行遍历结束后,以当前状态记录行数# 代码行有效,有效代码行数+1if is_effective_code == True:codes_numbers += 1# 当前状态为块注释或行注释状态下,注释代码行数+1if row_cur_status in (Status.BlockComments, Status.LineComment):comment_numbers += 1# 当前状态不为块注释时,进入下一行前,初始化当前状态if row_cur_status != Status.BlockComments:row_cur_status = Status.Commontotal = len(lines) if(lines[-1][-1] == '\n'):total += 1empty += 1fp.close()# f.close()print("file:{0} total:{1} empty:{2} effective:{3} comment:{4} effective:{3}".format(filename.replace(path + "\\", ""), total, empty, codes_numbers, comment_numbers))Counter.Line_numbers += totalCounter.Blanks += emptyCounter.Code += codes_numbersCounter.total_comment_numbers += comment_numbersif __name__ == "__main__":path = os.path.abspath(sys.argv[1]) #获取命令行输入的文件夹绝对路径# path = r"C:\Users\Undefined\Desktop\test\Osiris"list = Counter.get_filelist(path, [])threads = []# 将可能遇到的情况枚举# Common:表示普通状态 # CharString:表示字符串状态 # LineComment:表示行注释状态 # BlockComments:表示块注释状态 Status = Enum('Status','Init Common CharString LineComment BlockComments')for file in list:t = threading.Thread(target=Counter.CodeCounter,args=(file, path))threads.append(t)for thr in threads:thr.start()for the in threads: thr.join()time.sleep(0.1)print("-"*56)print("- {0:<10} {1:<10} {2:<10} {3:<10} {4:<10}".format("Files", "Lines", "Code", "Comments", "Blanks"))print("-"*56)print(" {0:<10} {1:<10} {2:<10} {3:<10} {4:<10}".format(len(list), Counter.Line_numbers, Counter.Code, Counter.total_comment_numbers, Counter.Blanks))print("-"*56)
若文中有错误,还请大佬在评论区指正,我会好好学习和改进,谢谢大佬们🙇
本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!
