diff --git a/src/components/shortcodes/LinkCard.astro b/src/components/shortcodes/LinkCard.astro
index b8097d4..7cdb361 100644
--- a/src/components/shortcodes/LinkCard.astro
+++ b/src/components/shortcodes/LinkCard.astro
@@ -1,4 +1,6 @@
---
+import { getMetadata, getWaybackMetadata } from '../../plugins/get-metadata';
+
interface Props {
url: string;
showArchive?: boolean;
@@ -29,71 +31,12 @@ function formatDateToNumber(date: Date | string | undefined): string {
return `${year}${month}${day}`;
}
-// Get metadata from the URL
-async function fetchMetadata(url: string) {
- try {
- const response = await fetch(url, {
- headers: {
- 'User-Agent': 'Mozilla/5.0 (compatible; LinkCard/1.0)'
- }
- });
-
- if (!response.ok) {
- throw new Error(`HTTP ${response.status}`);
- }
-
- const html = await response.text();
-
- // 提取元数据
- const titleMatch = html.match(/
]*>([^<]+)<\/title>/i);
- const descriptionMatch = html.match(/]+name=["']description["'][^>]+content=["']([^"']+)["']/i) ||
- html.match(/]+property=["']og:description["'][^>]+content=["']([^"']+)["']/i);
- const imageMatch = html.match(/]+property=["']og:image["'][^>]+content=["']([^"']+)["']/i) ||
- html.match(/]+name=["']twitter:image["'][^>]+content=["']([^"']+)["']/i);
- const siteNameMatch = html.match(/]+property=["']og:site_name["'][^>]+content=["']([^"']+)["']/i);
-
- return {
- title: titleMatch?.[1]?.trim() || new URL(url).hostname,
- description: descriptionMatch?.[1]?.trim() || '',
- image: imageMatch?.[1]?.trim() || '',
- siteName: siteNameMatch?.[1]?.trim() || new URL(url).hostname,
- domain: new URL(url).hostname
- };
- } catch (error) {
- console.warn(`Failed to fetch metadata for ${url}:`, error);
- const domain = new URL(url).hostname;
- return {
- title: domain,
- description: '',
- image: '',
- siteName: domain,
- domain
- };
- }
-}
-
-// Check if the URL is archived on the Wayback Machine at the updated/build time
-// TODO: bringing user's own archive service link
-async function checkArchive(url: string) {
- try {
- // Determine which date to use (prefer updatedDate if available, or fallback to the build time)
- const timestamp = (updatedDate ? formatDateToNumber(updatedDate) : formatDateToNumber(pubDate)) || formatDateToNumber(new Date());
- const archiveUrl = `https://archive.org/wayback/available?url=${encodeURIComponent(url)}×tamp=${timestamp}`;
- const response = await fetch(archiveUrl);
- const data = await response.json();
-
- if (data.archived_snapshots?.closest?.available) {
- return data.archived_snapshots.closest.url;
- }
- } catch (error) {
- console.warn(`Failed to check archive for ${url}:`, error);
- }
- return null;
-}
+// Determine which date to use (prefer updatedDate if available, or fallback to the build time)
+const timestamp = (updatedDate ? formatDateToNumber(updatedDate) : formatDateToNumber(pubDate)) || formatDateToNumber(new Date());
// extract metadata and archive URL
-const metadata = Astro.props.title ? siteMetadata : await fetchMetadata(url);
-const archiveUrl = showArchive ? await checkArchive(url) : null;
+const metadata = Astro.props.title ? siteMetadata : await getMetadata(url);
+const archiveUrl = showArchive ? await getWaybackMetadata(url, timestamp) : null;
---
diff --git a/src/plugins/get-metadata.js b/src/plugins/get-metadata.js
new file mode 100644
index 0000000..1e1da23
--- /dev/null
+++ b/src/plugins/get-metadata.js
@@ -0,0 +1,103 @@
+import { parse } from "ultrahtml";
+import "ultrahtml/selector";
+import {querySelector} from "ultrahtml/selector";
+
+// Simple in-memory cache
+const metadataCache = new Map();
+
+export async function getMetadata(url) {
+ if (metadataCache.has(url)) {
+ const cached = metadataCache.get(url);
+ return cached.data;
+ }
+
+ try {
+ const response = await fetch(url, {
+ headers: {
+ 'User-Agent': 'Mozilla/5.0 (compatible; LinkCard/1.1)',
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9',
+ }
+ });
+
+ if (!response.ok) {
+ throw new Error(`Request not succeed: HTTP ${response.status}`);
+ }
+
+ const html = await response.text();
+
+ const document = parse(html);
+
+ const metadata = {
+ title: '',
+ description: '',
+ image: '',
+ siteName: '',
+ domain: new URL(url).hostname
+ };
+
+ // Extract title
+ const titleElement = querySelector(document,'title');
+
+ if (titleElement) {
+ metadata.title = titleElement.children[0].value.trim();
+ }
+ // Extract other metadata
+ const descriptionElement = querySelector(document, 'meta[name="description"]');
+ if (descriptionElement) {
+ metadata.description = descriptionElement.attributes.content || '';
+ }
+ const imageElement = querySelector(document,'meta[property="og:image"]') || querySelector(document,'meta[name="twitter:image"]');
+ if (imageElement) {
+ metadata.image = imageElement.attributes.content || '';
+ }
+ const siteNameElement = querySelector(document,'meta[property="og:site_name"]')
+ if (siteNameElement) {
+ metadata.siteName = siteNameElement.attributes.content || '';
+ } else {
+ metadata.siteName = metadata.domain; // Fallback to domain if no site name found
+ }
+
+ // Store in cache
+ metadataCache.set(url, {
+ data: metadata
+ });
+
+ return metadata;
+ } catch (error) {
+ console.warn(`Failed to fetch metadata for ${url}:`, error);
+ const domain = new URL(url).hostname;
+ return {
+ title: domain,
+ description: '',
+ image: '',
+ siteName: domain,
+ domain
+ };
+ }
+}
+
+export async function getWaybackMetadata(url, timestamp){
+ try {
+ const archiveUrl = `https://archive.org/wayback/available?url=${encodeURIComponent(url)}×tamp=${timestamp}`;
+
+ if (metadataCache.has(archiveUrl)) {
+ const cached = metadataCache.get(archiveUrl);
+ return cached.data;
+ }
+
+ const response = await fetch(archiveUrl);
+ const data = await response.json();
+
+ if (data.archived_snapshots?.closest?.available) {
+ // Store in cache
+ metadataCache.set(archiveUrl, {
+ data: data.archived_snapshots.closest.url
+ });
+
+ return data.archived_snapshots.closest.url;
+ }
+ } catch (error) {
+ console.warn(`Failed to check archive for ${url}:`, error);
+ }
+ return null;
+}
\ No newline at end of file