提取boss直聘薪酬
评论
收藏

提取boss直聘薪酬

经验分享
袁佳伟
2025-06-20 01:00·浏览量:1402
袁佳伟
影刀专家
影刀认证工程师
发布于 2025-06-20 01:001402浏览
import requests
import json

from typing import *
try:
    from xbot.app.logging import trace as print
except:
    from xbot import print

from xbot_ai import *

def get_job_salary_info(browser, salary_element, api_url: str) -> str:
    """
    title: 获取招聘网站薪资信息
    description: 当页面显示的%salary_element%薪资信息为乱码时,通过调用%api_url%接口获取正确的薪资数据

    inputs:
        - browser (WebBrowser): 目标网页
        - salary_element (Selector): `薪资显示元素`,uuid: sk5u60hd
        - api_url (str-textbox): API接口地址,用于获取正确的薪资信息,eg: "https://www.zhipin.com/wapi/zpgeek/search/joblist.json?page=1&pageSize=15&city=101270100&expectInfo=&query=Python&multiSubway=&multiBusinessDistrict=&position=&jobType=&salary=&experience=°ree=&industry=&scale=&stage=&scene=1&_=1750227433998"
    outputs:
        - salary_info (str): 正确的薪资信息
    """
    
    print("1.检查页面薪资元素显示状态")
    try:
        salary_elements = browser.find_all_by_xpath(salary_element, timeout=3)
        if salary_elements:
            current_text = salary_elements[0].get_text()
            print(f"    a.当前薪资显示文本: {current_text}")
            if current_text and not any(char in current_text for char in ['-', 'K', '薪']):
                print("    b.薪资信息显示正常,直接返回")
                return current_text
        else:
            print("    a.未找到薪资元素")
    except Exception as e:
        print(f"    a.获取页面薪资元素失败: {str(e)}")

    print("2.调用API接口获取薪资数据")
    try:
        # 获取当前页面的cookies和headers来模拟浏览器请求
        cookies_js = """
        function(element, args) {
            return document.cookie;
        }
        """
        cookies_str = browser.execute_javascript(cookies_js)
        
        # 设置请求头模拟浏览器
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Accept': 'application/json, text/plain, */*',
            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
            'Referer': 'https://www.zhipin.com/',
            'Cookie': cookies_str
        }
        
        print("    a.发送API请求")
        response = requests.get(api_url, headers=headers, timeout=10)
        
        if response.status_code != 200:
            raise Exception(f"API请求失败,状态码: {response.status_code}")
            
    except Exception as e:
        print(f"    a.API请求异常: {str(e)}")
        raise Exception(f"无法获取薪资数据: {str(e)}")

    print("3.解析API响应数据")
    try:
        data = response.json()
        
        if not data.get('zpData', {}).get('jobList'):
            raise Exception("API响应数据格式异常,未找到职位列表")
            
        job_list = data['zpData']['jobList']
        if not job_list:
            raise Exception("职位列表为空")
            
        # 获取第一个职位的薪资信息
        first_job = job_list[0]
        if 'salaryDesc' in first_job:
            salary_info = first_job['salaryDesc']
            print(f"    a.成功获取薪资信息: {salary_info}")
            return salary_info
        else:
            raise Exception("职位数据中未找到薪资字段")
            
    except json.JSONDecodeError:
        print("    a.JSON解析失败")
        raise Exception("API响应数据格式错误,无法解析JSON")
    except Exception as e:
        print(f"    a.数据解析异常: {str(e)}")
        raise Exception(f"解析薪资数据失败: {str(e)}"


收藏3
全部评论1
最新
发布评论
评论