Move the event parser and dedup functions to Rust (#206)
* feat: improve js parser * feat: move parser and dedup to rust * fix: parser * fix: get event function * feat: improve parser performance (#207) * feat: improve parser performance * feat: add test for video parsing * feat: finish new parser --------- Co-authored-by: XIAO YU <xyzmhx@gmail.com>
This commit is contained in:
@@ -1,12 +1,31 @@
|
||||
import { IMAGES, VIDEOS } from "./constants";
|
||||
import { Meta } from "@lume/types";
|
||||
import { IMAGES, NOSTR_EVENTS, NOSTR_MENTIONS, VIDEOS } from "./constants";
|
||||
import { fetch } from "@tauri-apps/plugin-http";
|
||||
|
||||
export function parser(content: string) {
|
||||
// Get clean content
|
||||
export async function parser(
|
||||
content: string,
|
||||
abortController?: AbortController,
|
||||
) {
|
||||
const words = content.split(/( |\n)/);
|
||||
const urls = content.match(/(https?:\/\/\S+)/gi);
|
||||
|
||||
// Extract hashtags
|
||||
const hashtags = words.filter((word) => word.startsWith("#"));
|
||||
|
||||
// Extract nostr events
|
||||
const events = words.filter((word) =>
|
||||
NOSTR_EVENTS.some((el) => word.startsWith(el)),
|
||||
);
|
||||
|
||||
// Extract nostr mentions
|
||||
const mentions = words.filter((word) =>
|
||||
NOSTR_MENTIONS.some((el) => word.startsWith(el)),
|
||||
);
|
||||
|
||||
// Extract images and videos from content
|
||||
const images: string[] = [];
|
||||
const videos: string[] = [];
|
||||
|
||||
let text: string = content;
|
||||
|
||||
if (urls) {
|
||||
@@ -16,20 +35,44 @@ export function parser(content: string) {
|
||||
if (IMAGES.includes(ext)) {
|
||||
text = text.replace(url, "");
|
||||
images.push(url);
|
||||
break;
|
||||
}
|
||||
|
||||
if (VIDEOS.includes(ext)) {
|
||||
text = text.replace(url, "");
|
||||
videos.push(url);
|
||||
break;
|
||||
}
|
||||
|
||||
if (urls.length <= 3) {
|
||||
try {
|
||||
const res = await fetch(url, {
|
||||
method: "HEAD",
|
||||
priority: "high",
|
||||
signal: abortController.signal,
|
||||
// proxy: settings.proxy;
|
||||
});
|
||||
|
||||
if (res.headers.get("Content-Type").startsWith("image")) {
|
||||
text = text.replace(url, "");
|
||||
images.push(url);
|
||||
break;
|
||||
}
|
||||
} catch {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const trimContent = text.trim();
|
||||
|
||||
return {
|
||||
content: trimContent,
|
||||
const meta: Meta = {
|
||||
content: text.trim(),
|
||||
images,
|
||||
videos,
|
||||
events,
|
||||
mentions,
|
||||
hashtags,
|
||||
};
|
||||
|
||||
return meta;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user