御剑字典合并赛选去重工具集合

合并赛选

将wordlists目录下的所有文件中的内容整理赛选后写入到script目录中对应后缀的文件中。

如：把wordlists目录下的2.txt中的第三行后缀是.php，那么将写入到script目录的php.txt中。

#!/usr/env python
#-*- coding:utf-8 -*-

##########################################################################
######					路径扫描字典分类工具					 #############
######					Author: Pino_HD					     #############
######									By T00ls.Net		 #############
##########################################################################

import os
import time

ext = ['php', 'jsp', 'jspx.txt', 'asp', 'aspx', 'dir', 'mdb']

def open_files():
	f_asp = open('asp.txt','a+')
	f_aspx = open('aspx.txt', 'a+')
	f_php = open('php.txt', 'a+')
	f_jsp = open('jsp.txt', 'a+')
	f_jspx = open('jspx.txt', 'a+')
	f_dir = open('dir.txt','a+')
	f_other = open('mdb.txt', 'a+')
	return f_asp, f_aspx, f_php, f_jsp, f_jspx, f_dir, f_other

def close_files(f_asp, f_aspx, f_php, f_jsp, f_jspx, f_dir, f_other):
	f_asp.close()
	f_aspx.close()
	f_php.close()
	f_jsp.close()
	f_other.close()
	f_dir.close()

if __name__ == '__main__':
	starttime = time.time()
	path = "../wordlists/"
	count = 0
	f = open_files()
	f_asp = f[0]
	f_aspx = f[1]
	f_php = f[2]
	f_jsp = f[3]
	f_jspx = f[4]
	f_dir = f[5]
	f_other = f[6]
	files = []
	for file in os.listdir(path):
		if os.path.isfile(os.path.join(path, file)):
			print (file)
			with open(path + file) as fp:
				lines = fp.readlines()
				for line in lines:
					count = count + 1
					if '.asp' in line and '.aspx' not in line:
						f_asp.write(line)
					elif '.aspx' in line:
						f_aspx.write(line)
					elif '.php' in line:
						f_php.write(line)
					elif '.jsp' in line and '.jspx' not in line:
						f_jsp.write(line)
					elif '.jspx' in line:
						f_jspx.write(line)
					elif '.' not in line:
						f_dir.write(line)
					else:
						f_other.write(line)
	close_files(f_asp, f_aspx, f_php, f_jsp, f_jspx, f_dir, f_other)
	endtime = time.time()
	costtime = endtime - starttime
	print ('[OK]Job finishes !!!')
	print ('[OK]Total ' + str(count) +' lines')
	print ('[OK]Costs ' + str(costtime) + ' \'s')

去重复

#coding=utf-8
 
import sys, re, os
 
def getDictList(dict):
    regx = '''[\w\~`\!\@\#\$\%\^\&amp;\*\(\)\_\-\+\=\[\]\{\}\:\;\,\.\/\&lt;\&gt;\?]+'''
    with open(dict) as f:
        data = f.read()
        return re.findall(regx, data)
 
def rmdp(dictList):
    return list(set(dictList))
 
def fileSave(dictRmdp, out):
    with open(out, 'a') as f:
        for line in dictRmdp:
            f.write(line + '\n')
 
def main():
    try:
        dict = sys.argv[1].strip()
        out = sys.argv[2].strip()
    except Exception, e:
        print 'error:', e
        me = os.path.basename(__file__)
        print 'usage: %s <input type="text" /> <output>' %me
        print 'example: %s dict.txt dict_rmdp.txt' %me
        exit()
 
    dictList = getDictList(dict)
    dictRmdp = rmdp(dictList)
    fileSave(dictRmdp, out)
     
if __name__ == '__main__':
    main()
</output>

本文作者为Mr.Wu，转载请注明，尊守博主劳动成果！

由于经常折腾代码，可能会导致个别文章内容显示错位或者别的 BUG 影响阅读;
如发现请在该文章下留言告知于我，thank you !

御剑字典合并赛选去重工具集合