使用Selenium破解新浪微博的四宫格验证码

2020-02-15 23:17:08

字体：大中小

来源：转载

供稿：网友

在我们爬虫的时候经常会遇到验证码，新浪微博的验证码是四宫格形式。

可以采用模板验证码的破解方式，也就是把所有验证码的情况全部列出来，然后拿验证码的图片和这所有情况中的图片进行对比，然后获取验证码，再通过selenium自动拖拽点击，进行破解。

我们将验证码四个点标注为1234，那么所有的情况就是以下24种情况。

数字代表箭头指向：

1234	2134	3124	4321
1243	2143	3142	4312
1342	2314	3214	4123
1324	2341	3241	4132
1423	2413	3412	4213
1432	2431	3421	4231

所有的情况就是以上24种。我们将这24中验证码的情况放在一个文件夹内，当我们在登录的时候用获取的验证码截图去和所有的情况一一对比，然后获取完全相同的验证码，进行点击即可。代码如下：

from selenium import webdriverfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.support import expected_conditions as ECfrom selenium.webdriver.common.by import Byfrom selenium.common.exceptions import TimeoutExceptionfrom selenium.webdriver.common.action_chains import ActionChainsimport timefrom PIL import Imagefrom io import BytesIOfrom os import listdirUSERNAME = ''PASSWORD = ''class CrackWeiboSlide():  def __init__(self):    self.url = 'https://passport.weibo.cn/signin/login'    self.browser = webdriver.Chrome()    self.wait = WebDriverWait(self.browser,20)    self.username = USERNAME    self.password = PASSWORD  def __del__(self):    self.browser.close()  def open(self):    """    打开网页输入用户名密码登录    :return: None    """    self.browser.get(self.url)    username = self.wait.until(EC.presence_of_element_located((By.ID,'loginName')))    password = self.wait.until(EC.presence_of_element_located((By.ID,'loginPassword')))    submit = self.wait.until(EC.element_to_be_clickable((By.ID, 'loginAction')))    username.send_keys(self.username)    password.send_keys(self.password)    submit.click()  def get_position(self):    """    获取验证码的位置    :return: 位置    """    try:      img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME,'patt-shadow')))    except TimeoutException:      print('未出现验证码')      self.open()    time.sleep(2)    location = img.location    size = img.size    top=location['y']    bottom = location['y']+size['height']    left = location['x']    right = location['x']+size['width']    return (top,bottom,left,right)  def get_screenshot(self):    """    获取截图    :return:截图    """    screentshot = self.browser.get_screenshot_as_png()    # BytesIO将网页截图转换成二进制    screentshot = Image.open(BytesIO(screentshot))    return screentshot  def get_image(self,name):    """获取验证码图片"""    top,bottom,left,right = self.get_position()    print('验证码位置',top,bottom,left,right)    screenshot = self.get_screenshot()    # crop()将图片裁剪出来,后面需要一个参数    captcha = screenshot.crop((left,top,right,bottom))    captcha.save(name)    return captcha  def detect_image(self,image):    """    匹配图片    :param self:    :param image: 图片    :return: 拖动顺序    """    # 图片所在的文件夹    for template_name in listdir('templates/'):      print('正在匹配',template_name)      template = Image.open('templates/'+template_name)      # 匹配图片      if self.same_img(image,template):        # 将匹配到的文件名转换为列表        numbers = [int(number)for number in list(template_name.split('.')[0])]        print('拖动顺序',numbers)        return numbers  def is_pixel_equal(self,image1,image2,x,y):    """    判断两个像素的相似度    :param image1: 图片1    :param image2: 图片2    :param x: 位置x    :param y: 位置y    :return: 像素是否相同    """     # 取像素点    pixel1 = image1.load()[x,y]    pixel2 = image2.load()[x,y]    # 偏差量等于60    threshold = 60    if abs(pixel1[0]-pixel2[0]) < threshold and abs(pixel1[1]-pixel2[1])<threshold and abs(pixel1[2]-pixel2[2])<threshold:      return True    else:      return False  def same_img(self,image,template):    """    识别相似的验证码    :param image: 准备识别的验证码    :param template: 模板    :return:    """    # 相似度阈值    threshold = 0.99    count = 0    # 匹配所有像素点    for x in range(image.width):      for y in range(image.height):        # 判断像素        if self.is_pixel_equal(image,template,x,y):          count+=1    result = float(count)/(image.width*image.height)    if result>threshold:      print('成功匹配')      return True    return False  def move(self,numbers):    """    根据顺序拖动,此处接收的参数为前面的验证码的顺序列表    :param numbers:    :return:    """    # 获取四宫格的四个点    circles = self.browser.find_elements_by_css_selector('.patt-wrap .patt-circ')    print('-----------------',circles)    dx = dy =0    for index in range(4):      circle = circles[numbers[index]-1]      if index == 0:        # 点击第一个点        ActionChains(self.browser).move_to_element_with_offset(circle,circle.size['width']/2,circle.size['height']/2).click_and_hold().perform()      else:        # 慢慢移动        times = 30        for i in range(times):          ActionChains(self.browser).move_by_offset(dx/times,dy/times).perform()          time.sleep(1/times)      if index == 3:        # 松开鼠标        ActionChains(self.browser).release().perform()      else:        # 计算下次的偏移        dx = circles[numbers[index+1]-1].location['x'] - circle.location['x']        dy = circles[numbers[index+1]-1].location['y'] - circle.location['y']  def crack(self):    """    破解入口    :return:    """    self.open()    # 获取验证码图片    image = self.get_image('captcha.png')    numbers = self.detect_image(image)    self.move(numbers)    time.sleep(10)    print('识别结束')if __name__ == '__main__':  crack = CrackWeiboSlide()  crack.crack()

上一篇：python try except 捕获所有异常的实例

下一篇：详解Python发送email的三种方式