WPS嵌入单元格图片(DISPIMG)提取
评论
收藏

WPS嵌入单元格图片(DISPIMG)提取

经验分享
皮凯
2026-02-02 17:34·浏览量:536
皮凯
影刀高级开发者
发布于 2026-02-02 17:34536浏览

今天看到社区有人问这个问题,然后正好我之前遇到过类似情景,就去找了一下之前的代码,直接发个帖子分享给大家

原理就是xlsx文件本质是一个zip压缩包,里面文件中其实有对应图片文件,还有记录对应映射关系的文件,只要正确解析就能拿到对应图片文件



import zipfile
import xml.etree.ElementTree as ET
import os
import openpyxl

def extract_embedded_image_from_cell(xlsx_file_path, sheet_name, cell_address, output_image_path):
    """
    title: 提取WPS单元格嵌入式图片
    description: 从指定的xlsx文件中提取 % cell_address % 单元格的嵌入式图片,并保存到 % output_image_path % 路径。
    inputs:
        - xlsx_file_path (str): Excel文件路径,eg: "C:/documents/data.xlsx"
        - sheet_name (str): 工作表名称
        - cell_address (str): 单元格地址,eg: "A1"
        - output_image_path (str): 输出图片保存路径,eg: "C:/output/image.png"
    outputs:
        - result (str): 提取结果信息,eg: "图片已成功提取并保存到指定路径"
    """
    # Step 1: 读取单元格公式,提取 DISPIMG ID
    wb = openpyxl.load_workbook(xlsx_file_path, data_only=False)
    ws = wb[sheet_name]
    cell = ws[cell_address]
    formula = cell.value

    if not (isinstance(formula, str) and 'DISPIMG(' in formula):
        return f"单元格 {cell_address} 不包含有效的 DISPIMG 公式"

    start = formula.find('"') + 1
    end = formula.find('"', start)
    image_id = formula[start:end]

    # Step 2: 解析 cellimages.xml 和其关系文件,建立 ID -> media 路径映射
    with zipfile.ZipFile(xlsx_file_path, 'r') as zf:
        try:
            cellimages_xml = zf.read('xl/cellimages.xml')
            rels_xml = zf.read('xl/_rels/cellimages.xml.rels')
        except KeyError:
            return "文件中未找到嵌入图片数据(可能不是WPS生成的xlsx)"

    ns = {
        'xdr': 'http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing',
        'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
        'r': 'http://schemas.openxmlformats.org/package/2006/relationships'
    }

    # 构建 name(ID) -> embed_id 映射
    root = ET.fromstring(cellimages_xml)
    name_to_embed = {}
    for pic in root.findall('.//xdr:pic', ns):
        name = pic.find('.//xdr:cNvPr', ns).get('name')
        blip = pic.find('.//a:blip', ns)
        if name and blip is not None:
            embed_id = blip.get('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed')
            name_to_embed[name] = embed_id

    # 构建 embed_id -> target 路径映射
    root_rel = ET.fromstring(rels_xml)
    embed_to_target = {
        rel.get('Id'): rel.get('Target')
        for rel in root_rel.findall('.//r:Relationship', ns)
    }

    # 获取目标图片路径
    embed_id = name_to_embed.get(image_id)
    if not embed_id or embed_id not in embed_to_target:
        return f"未找到图片ID {image_id} 对应的资源"

    media_path = f"xl/{embed_to_target[embed_id]}"
    if media_path not in zipfile.ZipFile(xlsx_file_path).namelist():
        return f"图片资源 {media_path} 不存在于文件中"

    # Step 3: 提取并保存图片
    os.makedirs(os.path.dirname(output_image_path), exist_ok=True)
    with zipfile.ZipFile(xlsx_file_path) as zf:
        with zf.open(media_path) as img_file:
            with open(output_image_path, 'wb') as out_file:
                out_file.write(img_file.read())

    return f"图片已成功提取并保存到 {output_image_path}"


收藏5
全部评论1
最新
发布评论
评论