因为最近都是使用的是subDomainsBrute.py对子域名进行爆破。但是三级域名的支持却不是很好。有小伙伴提示是在http://i.links.cn/subdomain/上进行查询的。于是简单的测试了下,写了一个小脚本方便查询
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import requests,re,sys
def get_domain(domain):
headers = {
"Content-Type": "application/x-www-form-urlencoded",
"Referer": "http://i.links.cn/subdomain/",
}
payload = ("domain={domain}&b2=1&b3=1&b4=1".format(domain=domain))
r = requests.post("http://i.links.cn/subdomain/", params=payload)
file=r.text.encode('ISO-8859-1')
regex = re.compile('value="(.+?)"><input')
result=regex.findall(file)
list = '\n'.join(result)
print list
if __name__ == "__main__":
commandargs = sys.argv[1:]
args = "".join(commandargs)
get_domain(args)

对比了下。还真的处了三级域名

#!/usr/bin/env python
# encoding: utf-8
import re
import sys
import json
import time
import socket
import random
import urllib
import urllib2
from bs4 import BeautifulSoup
# 随机AGENT
USER_AGENTS = [
"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)",
]
def random_useragent():
return random.choice(USER_AGENTS)
def getUrlRespHtml(url):
respHtml=''
try:
heads = {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset':'GB2312,utf-8;q=0.7,*;q=0.7',
'Accept-Language':'zh-cn,zh;q=0.5',
'Cache-Control':'max-age=0',
'Connection':'keep-alive',
'Keep-Alive':'115',
'User-Agent':random_useragent()}
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor())
urllib2.install_opener(opener)
req = urllib2.Request(url)
opener.addheaders = heads.items()
respHtml = opener.open(req).read()
except Exception:
pass
return respHtml
def links_get(domain):
trytime = 0
#links里面得到的数据不是很全,准确率没法保证
domainslinks = []
try:
req=urllib2.Request('http://i.links.cn/subdomain/?b2=1&b3=1&b4=1&domain='+domain)
req.add_header('User-Agent',random_useragent())
res=urllib2.urlopen(req, timeout = 30)
src=res.read()
TempD = re.findall('value="http.*?">',src,re.S)
for item in TempD:
item = item[item.find('//')+2:-2]
#result=socket.getaddrinfo(item,None)
#print result[0][4]
domainslinks.append(item)
domainslinks={}.fromkeys(domainslinks).keys()
return domainslinks
except Exception, e:
pass
trytime += 1
if trytime > 3:
return domainslinks
def bing_get(domain):
trytime = 0
f = 1
domainsbing = []
#bing里面获取的数据不是很完全
while True:
try:
req=urllib2.Request('http://cn.bing.com/search?count=50&q=site:'+domain+'&first='+str(f))
req.add_header('User-Agent',random_useragent())
res=urllib2.urlopen(req, timeout = 30)
src=res.read()
TempD=re.findall('<cite>(.*?)<\/cite>',src)
for item in TempD:
item=item.split('<strong>')[0]
item += domain
try:
if not (item.startswith('http://') or item.startswith('https://')):
item = "http://" + item
proto, rest = urllib2.splittype(item)
host, rest = urllib2.splithost(rest)
host, port = urllib2.splitport(host)
if port == None:
item = host
else:
item = host + ":" + port
except:
print traceback.format_exc()
pass
domainsbing.append(item)
if f<500 and re.search('class="sb_pagN"',src) is not None:
f = int(f)+50
else:
subdomainbing={}.fromkeys(domainsbing).keys()
return subdomainbing
break
except Exception, e:
pass
trytime+=1
if trytime>3:
return domainsbing
def google_get(domain):
trytime = 0
s=1
domainsgoogle=[]
#需要绑定google的hosts
while True:
try:
req=urllib2.Request('http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=site:'+domain+'&rsz=8&start='+str(s))
req.add_header('User-Agent',random_useragent())
res=urllib2.urlopen(req, timeout = 30)
src=res.read()
results = json.loads(src)
TempD = results['responseData']['results']
for item in TempD:
item=item['visibleUrl']
item=item.encode('utf-8')
domainsgoogle.append(item)
s = int(s)+8
except Exception, e:
trytime += 1
if trytime >= 3:
domainsgoogle={}.fromkeys(domainsgoogle).keys()
return domainsgoogle
def Baidu_get(domain):
domainsbaidu=[]
try:
pg = 10
for x in xrange(1,pg):
rn=50
pn=(x-1)*rn
url = 'http://www.baidu.com/baidu?cl=3&tn=baidutop10&wd=site:'+domain.strip()+'&rn='+str(rn)+'&pn='+str(pn)
src=getUrlRespHtml(url)
soup = BeautifulSoup(src)
html=soup.find('div', id="content_left")
if html:
html_doc=html.find_all('h3',class_="t")
if html_doc:
for doc in html_doc:
href=doc.find('a')
link=href.get('href')
#需要第二次请求,从302里面获取到跳转的地址[速度很慢]
rurl=urllib.unquote(urllib2.urlopen(link.strip()).geturl()).strip()
reg='http:\/\/[^\.]+'+'.'+domain
match_url = re.search(reg,rurl)
if match_url:
item=match_url.group(0).replace('http://','')
domainsbaidu.append(item)
except Exception, e:
pass
domainsbaidu={}.fromkeys(domainsbaidu).keys()
return domainsbaidu
def get_360(domain):
#从360获取的数据一般都是网站管理员自己添加的,所以准备率比较高。
domains360=[]
try:
url = 'http://webscan.360.cn/sub/index/?url='+domain.strip()
src=getUrlRespHtml(url)
item = re.findall(r'\)">(.*?)</strong>',src)
if len(item)>0:
for i in xrange(1,len(item)):
domains360.append(item[i])
else:
item = ''
domains360.append(item)
except Exception, e:
pass
domains360={}.fromkeys(domains360).keys()
return domains360
def get_subdomain_run(domain):
mydomains = []
mydomains.extend(links_get(domain))
mydomains.extend(bing_get(domain))
mydomains.extend(Baidu_get(domain))
mydomains.extend(google_get(domain))
mydomains.extend(get_360(domain))
mydomains = list(set(mydomains))
return mydomains
if __name__ == "__main__":
if len(sys.argv) == 2:
print get_subdomain_run(sys.argv[1])
sys.exit(0)
else:
print ("usage: %s domain" % sys.argv[0])
sys.exit(-1)
python mysubdomain.py wooyun.org
['www.wooyun.org', 'zone.wooyun.org', 'summit.wooyun.org', 'ce.wooyun.org', 'drops.wooyun.org', 'wooyun.org', 'wiki.wooyun.org', 'z.wooyun.org', 'job.wooyun.org', 'zhuanlan.wooyun.org', 'www2d00.wooyun.org', 'test.wooyun.org', 'en.wooyun.org', 'api.wooyun.org', 'paper.wooyun.org', 'edu.wooyun.org']
2016.1.28增加百度与360搜索抓取
python mysubdomain.py jd.cn
['temp1.jd.cn', 'ngb.jd.cn', 'www.fy.jd.cn', 'dangan.jd.cn', 'rd.jd.cn', 'bb.jd.cn', 'www.jd.cn', 'bjxc.jd.cn', 'www.xnz.jd.cn', 'jw.jd.cn', 'www.gsj.jd.cn', 'www.wuqiao.jd.cn', 'nlj.jd.cn', 'czj.jd.cn', 'www.smj.jd.cn', 'zfrx.jd.cn', 'www.jjjc.jd.cn', 'gtj.jd.cn', 'bbs.jd.cn', 'hbcy.jd.cn', 'lcsq.xnz.jd.cn', 'jtj.jd.cn', 'www.nkj.jd.cn', 'zx.jd.cn', 'www.daj.jd.cn', 'www.hbcy.jd.cn', 'slj.jd.cn', 'kfq.jd.cn', 'www.jxw.jd.cn', 'jwxxw.jd.cn', 'www.kx.jd.cn', 'qxj.jd.cn', 'www.sjj.jd.cn', 'www.jfw.jd.cn', 'www.dqz.jd.cn', 'yl.jd.cn', 'www.tw.jd.cn', 'www.qxj.jd.cn', 'www.dwzw.jd.cn', 'www.czj.jd.cn', 'www.ajj.jd.cn', 'www.gxs.jd.cn', 'www.dx.jd.cn', 'sjj.jd.cn', 'www.jtj.jd.cn', 'www.wjj.jd.cn', 'www.mzj.jd.cn', 'www.cgj.jd.cn', 'jsj.jd.cn', 'www.dangan.jd.cn', 'www.wlj.jd.cn', 'www.mj.jd.cn', 'www.zwz.jd.cn', 'www.sf.jd.cn', 'www.sbz.jd.cn', 'www.cl.jd.cn', 'fzb.jd.cn', 'ajj.jd.cn', 'www.rsj.jd.cn', 'www.jdz.jd.cn', 'www.xh.jd.cn', 'qzlxjysj.jd.cn', 'www.wjmj.jd.cn', 'www.sbdw.jd.cn', 'www.flower.jd.cn', 'www.kjj.jd.cn', 'www.yjj.jd.cn', 'wjj.jd.cn', 'jdz.jd.cn', 'www.cb.jd.cn', 'www.ptz.jd.cn', 'nkj.jd.cn', '333.jd.cn', 'www.dxs.jd.cn', 'www.cxy.jd.cn', 'www.wjz.jd.cn', 'www.fzb.jd.cn', 'login.jd.cn', 'ldj.jd.cn', 'jfw.jd.cn', 'www.zfcg.jd.cn', 'www.kfq.jd.cn', 'www.dhz.jd.cn', 'www.zfrx.jd.cn', 'www.rd.jd.cn', 'dxs.jd.cn', 'jggw.jd.cn', 'www.yilin.jd.cn', 'www.tjj.jd.cn', 'www.zfw.jd.cn', 'g.jd.cn', 'www.rc.jd.cn', 'yfsq.xnz.jd.cn', 'www.wqz.jd.cn', 'zfcg.jd.cn', 'fgj.jd.cn', 'hbj.jd.cn', 'fgw.jd.cn', 'www.acd.jd.cn', 'sfj.jd.cn', 'www.zx.jd.cn', 'kx.jd.cn', 'www.ylz.jd.cn', 'www.zhenwu.jd.cn', 'fcz.jd.cn', 'tjj.jd.cn', 'kjj.jd.cn', 'gjj.jd.cn', 'cl.jd.cn', 'www.njj.jd.cn', 'www.slj.jd.cn', 'www.ldj.jd.cn', 'www.jsj.jd.cn', 'zfw.jd.cn', 'news.jd.cn', 'tw.jd.cn', 'www.dgz.jd.cn', 'yjj.jd.cn', 'njj.jd.cn', 'www.jggw.jd.cn', 'www.gjj.jd.cn', 'www.kp.jd.cn', 'www.qx.jd.cn', 'lsj.jd.cn', 'www.hbj.jd.cn', 'www.gcz.jd.cn', 'rc.jd.cn', 'jd.cn', 'jgj.jd.cn', 'jjjc.jd.cn', 'www.wsj.jd.cn', 'rsj.jd.cn', 'www.syb.jd.cn', 'files.jd.cn', 'www.jgj.jd.cn', 'www.xjz.jd.cn', 'fkb.jd.cn', 'qx.jd.cn', 'gsl.jd.cn', 'ptz.jd.cn', 'zzb.jd.cn', 'www.zjj.jd.cn', 'www.rfb.jd.cn', 'cb.jd.cn', 'www.fgj.jd.cn', 'www.da.jd.cn', 'www.lsj.jd.cn', 'www.fcz.jd.cn', 'www.ngb.jd.cn', 'www.sbzs.jd.cn', 'sf.jd.cn', 'www.jsw.jd.cn']