【验证码挑战】天眼查文字点选验证100%免费,思路可拓展其他文字点选
评论
收藏

【验证码挑战】天眼查文字点选验证100%免费,思路可拓展其他文字点选

经验分享
【耐家军】DC
2026-04-22 12:05·浏览量:187
【耐家军】DC
影刀专家
影刀认证工程师
发布于 2026-04-22 12:05187浏览

效果展示

流程参考

py代码

import xbot
from xbot import print, sleep
from . import package
from .package import variables as glv
import ddddocr
import cv2
import numpy as np
from PIL import Image
import requests
from io import BytesIO
import os
import base64
from difflib import SequenceMatcher
# 初始化检测器、识别器
det = ddddocr.DdddOcr(det=True, ocr=False, show_ad=False)
ocr = ddddocr.DdddOcr(det=False, ocr=True, show_ad=False)

# 单字字形相似度计算
def char_sim(c1, c2):
    return SequenceMatcher(None, c1, c2).ratio()

def get_3char_unique_coords(image_input, target_3chars, input_type="auto"):
    # 1. 解析base64/url/本地图片
    img_bytes = None
    if input_type == "auto":
        if image_input.startswith(("http://", "https://")):
            img_bytes = requests.get(image_input, timeout=10).content
        elif len(image_input) > 500 or "base64" in image_input:
            if "," in image_input:
                image_input = image_input.split(",")[-1]
            img_bytes = base64.b64decode(image_input)
        else:
            with open(image_input, "rb") as f:
                img_bytes = f.read()
    # 2. 检测文字框 + 严格按图片【从左到右顺序排序】
    boxes = det.detection(img_bytes)
    boxes.sort(key=lambda b: b[0])  # 按x坐标,还原阅读顺序
    
    nparr = np.frombuffer(img_bytes, np.uint8)
    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    pil_img = Image.fromarray(img_rgb)

    # 存储:按图片顺序的 【文字、唯一坐标】
    detected_list = []
    for x1, y1, x2, y2 in boxes:
        crop = pil_img.crop((x1, y1, x2, y2))
        buf = BytesIO()
        crop.save(buf, format="JPEG")
        word = ocr.classification(buf.getvalue()).strip()
        center_x = (x1 + x2) // 2
        center_y = (y1 + y2) // 2
        detected_list.append( (word, [center_x, center_y]) )


    # 3. 贪心匹配:一一绑定,坐标绝对不重复
    result = {}
    used_pos = []  # 已占用坐标,禁止重复使用
    
    # 按你输入词语顺序,逐个匹配最像且未被使用的字
    for target_char in target_3chars:
        best_match = None
        best_score = -1
        
        for word, pos in detected_list:
            if pos in used_pos:
                continue  # 跳过已经被匹配过的坐标
            score = char_sim(target_char, word)
            if score > best_score:
                best_score = score
                best_match = (word, pos)
        try:
            result[target_char] = best_match[1]
            used_pos.append(best_match[1]) 
        except: # 标记坐标已占用
            pass
    return result

# 原有函数保留
def sbwz(image_input):
    img_bytes = None
    if image_input.startswith(("http://", "https://")):
        img_bytes = requests.get(image_input, timeout=10).content
    elif len(image_input) > 500 or "base64" in image_input:
        if "," in image_input:
            image_input = image_input.split(",")[-1]
        img_bytes = base64.b64decode(image_input)
    else:
        with open(image_input, "rb") as f:
            img_bytes = f.read()
    return ocr.classification(img_bytes)

def main(args):
    pass

所需库

猜你喜欢

手搓系列之天眼查单缺口滑块验证——思路可拓展其他单缺口滑块完全免费通过率99%

【验证码挑战】手搓之文字点选系列,百分百免费借助ddddcor开源库

【验证码挑战】ICP/IP地址/域名备案查询 单滑块验证专用通过率90%

手搓系列之ddddocr解决计算验证码100%免费通过率百分之90以上

DCpage:搞定反验证码网站就是如此简单,py纯免费无需云码接口

【验证码挑战】手搓系列之躲避障碍验证码,方法你绝对想不到

手搓系列之oppo滑块验证通过率90%——思路可拓展其他滑块验证

收藏6
全部评论1
最新
发布评论
评论