diff --git a/runtime/ops/histoqc_op/_init_.py b/runtime/ops/histoqc_op/_init_.py new file mode 100644 index 00000000..0534bbb9 --- /dev/null +++ b/runtime/ops/histoqc_op/_init_.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +from datamate.core.base_op import OPERATORS +OPERATORS.register_module( + module_name='HistoQCMapper', + module_path='ops.user.histoqc_op.process' +) \ No newline at end of file diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC b/runtime/ops/histoqc_op/histoqc_src/HistoQC new file mode 160000 index 00000000..b7956129 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC @@ -0,0 +1 @@ +Subproject commit b79561292e49d153e2e4c1401c9c48d3d5f925e1 diff --git a/runtime/ops/histoqc_op/metadata.yml b/runtime/ops/histoqc_op/metadata.yml new file mode 100644 index 00000000..c133462d --- /dev/null +++ b/runtime/ops/histoqc_op/metadata.yml @@ -0,0 +1,25 @@ +name: 'HistoQC' +description: '集成HistoQC输出处理 : 提取伪影坐标(GeoJSON)、汇总质量指标(TSV)并关联全套掩码图像(PNG)。' +language: 'python' +vendor: 'huawei' +raw_id: 'HistoQCMapper' +version: '1.0.0' +modal: 'multimodal' # 多模态,因为涉及文本(TSV/JSON)和图像(PNG) +inputs: 'image' +outputs: 'multimodal' +settings: + scaleFactor: + name: '缩放倍率' + description: '掩码相对于原图的下采样倍率(HistoQC log_factor)。' + type: 'inputNumber' + defaultVal: 1.0 + exportFullImages: + name: '导出全套图像' + description: '是否将所有生成的掩码图(Equalized/Mask/Blur)关联至输出。' + type: 'switch' + defaultVal: 'true' + includeStats: + name: '包含指标统计' + description: '是否从results.tsv中提取该切片的具体质量.' + type: 'switch' + defaultVal: 'true' \ No newline at end of file diff --git a/runtime/ops/histoqc_op/process.py b/runtime/ops/histoqc_op/process.py new file mode 100644 index 00000000..2a57c981 --- /dev/null +++ b/runtime/ops/histoqc_op/process.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +import cv2 +import numpy as np +import json +import os +import pandas as pd +import subprocess +from pathlib import Path +from typing import Dict, Any +from datamate.core.base_op import Mapper + +class HistoQCMapper(Mapper): + def execute(self, sample: Dict[str, Any]) -> Dict[str, Any]: + # 动态路径与环境变量初始化 + current_dir = os.path.dirname(os.path.abspath(__file__)) + # 定位算子包内部的配置文件 + config_path = os.path.join(current_dir, "histoqc_src", "histoqc", "config", "config_v2.1.ini") + # 定位 HistoQC 源代码目录 + histoqc_src_dir = os.path.join(current_dir, "histoqc_src") + + file_path = sample.get("filePath", "") + out_dir = sample.get("export_path", os.path.dirname(file_path)) + if not os.path.exists(out_dir): + os.makedirs(out_dir, exist_ok=True) + + # HistoQC 在 TSV 和图片命名中通常保留完整文件名(含后缀) + base_name = os.path.basename(file_path) + + # 注入环境变量,确保能找到包内 HistoQC 模块 + custom_env = os.environ.copy() + custom_env["PYTHONPATH"] = f"{histoqc_src_dir}{os.pathsep}{custom_env.get('PYTHONPATH', '')}" + + #启动 HistoQC 算法 + cmd = [ + "python3", "-m", "histoqc", + "-o", out_dir, + "-c", config_path, + "-n", "4", + file_path + ] + + try: + + # 执行算法,设置 30 分钟超时 + result = subprocess.run(cmd, capture_output=True, text=True, env=custom_env, timeout=1800) + + # 如果被系统 Kill (返回码 -9) 或其他错误,记录在 text 字段 + if result.returncode != 0: + sample["text"] = json.dumps({"error": f"HistoQC failed with return code {result.returncode}", "stderr": result.stderr}) + return sample + + except Exception as e: + sample["text"] = json.dumps({"error": f"Subprocess exception: {str(e)}"}) + return sample + + #结果解析与归档 + final_report = { + "slide_name": base_name, + "metrics": {}, + "annotations": [], + "generated_images": [] + } + + # 解析 TSV 质量指标 + + tsv_path = Path(out_dir) / "results.tsv" + + if tsv_path.exists(): + try: + # comment='#' 跳过 HistoQC 的元数据行,on_bad_lines='skip' 增强容错 + df = pd.read_csv(tsv_path, sep='\t', comment='#', on_bad_lines='skip') + # 匹配第一列文件名 + row = df[df.iloc[:, 0].astype(str) == base_name] + if not row.empty: + final_report["metrics"] = row.iloc[-1].to_dict() + except Exception as e: + final_report["metrics_error"] = str(e) + + # 提取伪影坐标并转换为 GeoJSON (适应多种路径结构) + scale_factor = float(getattr(self, 'scaleFactor', 1.0)) + artifact_types = { + "pen_markings": {"name": "Pen Marking", "color": -65536}, + "coverslip_edge": {"name": "Coverslip Edge", "color": -16711936} + } + + # 考虑到 HistoQC 可能会创建子文件夹,使用 glob 进行深度搜索 + search_dir = Path(out_dir) / base_name + if not search_dir.exists(): + search_dir = Path(out_dir) + + for key, info in artifact_types.items(): + # 搜索包含文件名和伪影类型的图片 + found_files = list(search_dir.glob(f"*{base_name}*{key}*.png")) + if found_files: + mask_path = found_files[0] + mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE) + if mask is not None and np.max(mask) > 0: + _, thresh = cv2.threshold(mask, 1, 255, cv2.THRESH_BINARY) + contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + for contour in contours: + pts = (contour.reshape(-1, 2) * scale_factor).tolist() + if len(pts) < 3: continue + pts.append(pts[0]) + final_report["annotations"].append({ + "type": "Feature", + "properties": {"classification": {"name": info["name"], "colorRGB": info["color"]}}, + "geometry": {"type": "Polygon", "coordinates": [pts]} + }) + final_report["generated_images"].append(mask_path.name) + # 写回 DataMate 样本数据 + sample["text"] = json.dumps(final_report,ensure_ascii=False,indent=2) + if final_report["metrics"]: + sample["extra_info"] = final_report["metrics"] + return sample \ No newline at end of file