今天看到社区有人问这个问题,然后正好我之前遇到过类似情景,就去找了一下之前的代码,直接发个帖子分享给大家
原理就是xlsx文件本质是一个zip压缩包,里面文件中其实有对应图片文件,还有记录对应映射关系的文件,只要正确解析就能拿到对应图片文件
import zipfile
import xml.etree.ElementTree as ET
import os
import openpyxl
def extract_embedded_image_from_cell(xlsx_file_path, sheet_name, cell_address, output_image_path):
"""
title: 提取WPS单元格嵌入式图片
description: 从指定的xlsx文件中提取 % cell_address % 单元格的嵌入式图片,并保存到 % output_image_path % 路径。
inputs:
- xlsx_file_path (str): Excel文件路径,eg: "C:/documents/data.xlsx"
- sheet_name (str): 工作表名称
- cell_address (str): 单元格地址,eg: "A1"
- output_image_path (str): 输出图片保存路径,eg: "C:/output/image.png"
outputs:
- result (str): 提取结果信息,eg: "图片已成功提取并保存到指定路径"
"""
# Step 1: 读取单元格公式,提取 DISPIMG ID
wb = openpyxl.load_workbook(xlsx_file_path, data_only=False)
ws = wb[sheet_name]
cell = ws[cell_address]
formula = cell.value
if not (isinstance(formula, str) and 'DISPIMG(' in formula):
return f"单元格 {cell_address} 不包含有效的 DISPIMG 公式"
start = formula.find('"') + 1
end = formula.find('"', start)
image_id = formula[start:end]
# Step 2: 解析 cellimages.xml 和其关系文件,建立 ID -> media 路径映射
with zipfile.ZipFile(xlsx_file_path, 'r') as zf:
try:
cellimages_xml = zf.read('xl/cellimages.xml')
rels_xml = zf.read('xl/_rels/cellimages.xml.rels')
except KeyError:
return "文件中未找到嵌入图片数据(可能不是WPS生成的xlsx)"
ns = {
'xdr': 'http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing',
'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
'r': 'http://schemas.openxmlformats.org/package/2006/relationships'
}
# 构建 name(ID) -> embed_id 映射
root = ET.fromstring(cellimages_xml)
name_to_embed = {}
for pic in root.findall('.//xdr:pic', ns):
name = pic.find('.//xdr:cNvPr', ns).get('name')
blip = pic.find('.//a:blip', ns)
if name and blip is not None:
embed_id = blip.get('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed')
name_to_embed[name] = embed_id
# 构建 embed_id -> target 路径映射
root_rel = ET.fromstring(rels_xml)
embed_to_target = {
rel.get('Id'): rel.get('Target')
for rel in root_rel.findall('.//r:Relationship', ns)
}
# 获取目标图片路径
embed_id = name_to_embed.get(image_id)
if not embed_id or embed_id not in embed_to_target:
return f"未找到图片ID {image_id} 对应的资源"
media_path = f"xl/{embed_to_target[embed_id]}"
if media_path not in zipfile.ZipFile(xlsx_file_path).namelist():
return f"图片资源 {media_path} 不存在于文件中"
# Step 3: 提取并保存图片
os.makedirs(os.path.dirname(output_image_path), exist_ok=True)
with zipfile.ZipFile(xlsx_file_path) as zf:
with zf.open(media_path) as img_file:
with open(output_image_path, 'wb') as out_file:
out_file.write(img_file.read())
return f"图片已成功提取并保存到 {output_image_path}"