interface SocialMetadata {
title?: string;
description?: string;
image?: string;
url?: string;
siteName?: string;
type?: string;
}
async function extractSocialMetadata(url: string): Promise<SocialMetadata> {
const metadata: SocialMetadata = {};
const response = await fetch(url);
const rewriter = new HTMLRewriter()
// 提取开放图谱元标签
.on('meta[property^="og:"]', {
element(el) {
const property = el.getAttribute("property");
const content = el.getAttribute("content");
if (property && content) {
// 将"og:image"转换为"image"等
const key = property.replace("og:", "") as keyof SocialMetadata;
metadata[key] = content;
}
},
})
// 提取 Twitter 卡片元标签作为备选
.on('meta[name^="twitter:"]', {
element(el) {
const name = el.getAttribute("name");
const content = el.getAttribute("content");
if (name && content) {
const key = name.replace("twitter:", "") as keyof SocialMetadata;
// 仅在没有 OG 数据时才使用 Twitter 卡片数据
if (!metadata[key]) {
metadata[key] = content;
}
}
},
})
// 备选常规元标签
.on('meta[name="description"]', {
element(el) {
const content = el.getAttribute("content");
if (content && !metadata.description) {
metadata.description = content;
}
},
})
// 备选标题标签
.on("title", {
text(text) {
if (!metadata.title) {
metadata.title = text.text;
}
},
});
// 处理响应
await rewriter.transform(response).blob();
// 将相对图片 URL 转换为绝对 URL
if (metadata.image && !metadata.image.startsWith("http")) {
try {
metadata.image = new URL(metadata.image, url).href;
} catch {
// 如果解析失败则保留原始 URL
}
}
return metadata;
}