Move the event parser and dedup functions to Rust (#206)

* feat: improve js parser

* feat: move parser and dedup to rust

* fix: parser

* fix: get event function

* feat: improve parser performance (#207)

* feat: improve parser performance

* feat: add test for video parsing

* feat: finish new parser

---------

Co-authored-by: XIAO YU <xyzmhx@gmail.com>
This commit is contained in:
雨宮蓮
2024-06-12 08:27:53 +07:00
committed by GitHub
parent 1c20512ecc
commit 71be59b2e9
21 changed files with 610 additions and 250 deletions

13
src-tauri/Cargo.lock generated
View File

@@ -2707,6 +2707,15 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd1bc4d24ad230d21fb898d1116b1801d7adfc449d42026475862ab48b11e70e"
[[package]]
name = "linkify"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1dfa36d52c581e9ec783a7ce2a5e0143da6237be5811a0b3153fedfdbe9f780"
dependencies = [
"memchr",
]
[[package]]
name = "linux-keyutils"
version = "0.2.4"
@@ -2786,12 +2795,15 @@ name = "lume"
version = "4.0.0"
dependencies = [
"cocoa",
"futures",
"keyring",
"keyring-search",
"linkify",
"monitor",
"nostr-sdk",
"objc",
"rand 0.8.5",
"reqwest",
"serde",
"serde_json",
"specta",
@@ -2812,6 +2824,7 @@ dependencies = [
"tauri-plugin-upload",
"tauri-specta",
"tokio",
"url",
]
[[package]]

View File

@@ -17,11 +17,11 @@ serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] }
monitor = { git = "https://github.com/ahkohd/tauri-toolkit", branch = "v2" }
tauri = { version = "2.0.0-beta", features = [
"unstable",
"tray-icon",
"macos-private-api",
"native-tls-vendored",
"protocol-asset",
"unstable",
"tray-icon",
"macos-private-api",
"native-tls-vendored",
"protocol-asset",
] }
tauri-plugin-clipboard-manager = { git = "https://github.com/tauri-apps/plugins-workspace", branch = "v2" }
tauri-plugin-dialog = { git = "https://github.com/tauri-apps/plugins-workspace", branch = "v2" }
@@ -40,6 +40,10 @@ tauri-plugin-decorum = "0.1.0"
specta = "^2.0.0-rc.12"
keyring = "2"
keyring-search = "0.2.0"
reqwest = "0.12.4"
url = "2.5.0"
futures = "0.3.30"
linkify = "0.10.0"
[target.'cfg(target_os = "macos")'.dependencies]
cocoa = "0.25.0"

View File

@@ -59,6 +59,7 @@
"fs:allow-read-file",
"theme:allow-set-theme",
"theme:allow-get-theme",
"http:default",
"shell:allow-open",
{
"identifier": "http:default",

View File

@@ -1 +1 @@
{"desktop-capability":{"identifier":"desktop-capability","description":"Capability for the desktop","local":true,"windows":["main","panel","splash","settings","search","nwc","activity","zap-*","event-*","user-*","editor-*","column-*"],"permissions":["path:default","event:default","window:default","app:default","resources:default","menu:default","tray:default","notification:allow-is-permission-granted","notification:allow-request-permission","notification:default","os:allow-locale","os:allow-platform","os:allow-os-type","updater:default","updater:allow-check","updater:allow-download-and-install","window:allow-start-dragging","window:allow-create","window:allow-close","window:allow-set-focus","window:allow-center","window:allow-minimize","window:allow-maximize","window:allow-set-size","window:allow-set-focus","window:allow-start-dragging","decorum:allow-show-snap-overlay","clipboard-manager:allow-write-text","clipboard-manager:allow-read-text","webview:allow-create-webview-window","webview:allow-create-webview","webview:allow-set-webview-size","webview:allow-set-webview-position","webview:allow-webview-close","dialog:allow-open","dialog:allow-ask","dialog:allow-message","process:allow-restart","fs:allow-read-file","theme:allow-set-theme","theme:allow-get-theme","shell:allow-open",{"identifier":"http:default","allow":[{"url":"http://**/"},{"url":"https://**/"}]},{"identifier":"fs:allow-read-text-file","allow":[{"path":"$RESOURCE/locales/*"},{"path":"$RESOURCE/resources/*"}]}],"platforms":["linux","macOS","windows"]}}
{"desktop-capability":{"identifier":"desktop-capability","description":"Capability for the desktop","local":true,"windows":["main","panel","splash","settings","search","nwc","activity","zap-*","event-*","user-*","editor-*","column-*"],"permissions":["path:default","event:default","window:default","app:default","resources:default","menu:default","tray:default","notification:allow-is-permission-granted","notification:allow-request-permission","notification:default","os:allow-locale","os:allow-platform","os:allow-os-type","updater:default","updater:allow-check","updater:allow-download-and-install","window:allow-start-dragging","window:allow-create","window:allow-close","window:allow-set-focus","window:allow-center","window:allow-minimize","window:allow-maximize","window:allow-set-size","window:allow-set-focus","window:allow-start-dragging","decorum:allow-show-snap-overlay","clipboard-manager:allow-write-text","clipboard-manager:allow-read-text","webview:allow-create-webview-window","webview:allow-create-webview","webview:allow-set-webview-size","webview:allow-set-webview-position","webview:allow-webview-close","dialog:allow-open","dialog:allow-ask","dialog:allow-message","process:allow-restart","fs:allow-read-file","theme:allow-set-theme","theme:allow-get-theme","http:default","shell:allow-open",{"identifier":"http:default","allow":[{"url":"http://**/"},{"url":"https://**/"}]},{"identifier":"fs:allow-read-text-file","allow":[{"path":"$RESOURCE/locales/*"},{"path":"$RESOURCE/resources/*"}]}],"platforms":["linux","macOS","windows"]}}

View File

@@ -1,11 +1,23 @@
use crate::Nostr;
use nostr_sdk::prelude::*;
use std::{str::FromStr, time::Duration};
use futures::future::join_all;
use nostr_sdk::prelude::*;
use serde::Serialize;
use specta::Type;
use tauri::State;
use crate::Nostr;
use crate::nostr::utils::{dedup_event, Meta, parse_event};
#[derive(Debug, Serialize, Type)]
pub struct RichEvent {
pub raw: String,
pub parsed: Option<Meta>,
}
#[tauri::command]
#[specta::specta]
pub async fn get_event(id: &str, state: State<'_, Nostr>) -> Result<String, String> {
pub async fn get_event(id: &str, state: State<'_, Nostr>) -> Result<RichEvent, String> {
let client = &state.client;
let event_id: Option<EventId> = match Nip19::from_bech32(id) {
Ok(val) => match val {
@@ -36,7 +48,14 @@ pub async fn get_event(id: &str, state: State<'_, Nostr>) -> Result<String, Stri
{
Ok(events) => {
if let Some(event) = events.first() {
Ok(event.as_json())
let raw = event.as_json();
let parsed = if event.kind == Kind::TextNote {
Some(parse_event(&event.content).await)
} else {
None
};
Ok(RichEvent { raw, parsed })
} else {
Err("Cannot found this event with current relay list".into())
}
@@ -50,7 +69,7 @@ pub async fn get_event(id: &str, state: State<'_, Nostr>) -> Result<String, Stri
#[tauri::command]
#[specta::specta]
pub async fn get_replies(id: &str, state: State<'_, Nostr>) -> Result<Vec<String>, String> {
pub async fn get_replies(id: &str, state: State<'_, Nostr>) -> Result<Vec<RichEvent>, String> {
let client = &state.client;
match EventId::from_hex(id) {
@@ -58,7 +77,21 @@ pub async fn get_replies(id: &str, state: State<'_, Nostr>) -> Result<Vec<String
let filter = Filter::new().kinds(vec![Kind::TextNote]).event(event_id);
match client.get_events_of(vec![filter], None).await {
Ok(events) => Ok(events.into_iter().map(|ev| ev.as_json()).collect()),
Ok(events) => {
let futures = events.into_iter().map(|ev| async move {
let raw = ev.as_json();
let parsed = if ev.kind == Kind::TextNote {
Some(parse_event(&ev.content).await)
} else {
None
};
RichEvent { raw, parsed }
});
let rich_events = join_all(futures).await;
Ok(rich_events)
}
Err(err) => Err(err.to_string()),
}
}
@@ -72,7 +105,7 @@ pub async fn get_events_by(
public_key: &str,
as_of: Option<&str>,
state: State<'_, Nostr>,
) -> Result<Vec<String>, String> {
) -> Result<Vec<RichEvent>, String> {
let client = &state.client;
match PublicKey::from_str(public_key) {
@@ -88,7 +121,21 @@ pub async fn get_events_by(
.until(until);
match client.get_events_of(vec![filter], None).await {
Ok(events) => Ok(events.into_iter().map(|ev| ev.as_json()).collect()),
Ok(events) => {
let futures = events.into_iter().map(|ev| async move {
let raw = ev.as_json();
let parsed = if ev.kind == Kind::TextNote {
Some(parse_event(&ev.content).await)
} else {
None
};
RichEvent { raw, parsed }
});
let rich_events = join_all(futures).await;
Ok(rich_events)
}
Err(err) => Err(err.to_string()),
}
}
@@ -102,7 +149,7 @@ pub async fn get_local_events(
pubkeys: Vec<String>,
until: Option<&str>,
state: State<'_, Nostr>,
) -> Result<Vec<String>, String> {
) -> Result<Vec<RichEvent>, String> {
let client = &state.client;
let as_of = match until {
Some(until) => Timestamp::from_str(until).unwrap(),
@@ -128,7 +175,22 @@ pub async fn get_local_events(
.get_events_of(vec![filter], Some(Duration::from_secs(10)))
.await
{
Ok(events) => Ok(events.into_iter().map(|ev| ev.as_json()).collect()),
Ok(events) => {
let dedup = dedup_event(&events, false);
let futures = dedup.into_iter().map(|ev| async move {
let raw = ev.as_json();
let parsed = if ev.kind == Kind::TextNote {
Some(parse_event(&ev.content).await)
} else {
None
};
RichEvent { raw, parsed }
});
let rich_events = join_all(futures).await;
Ok(rich_events)
}
Err(err) => Err(err.to_string()),
}
}
@@ -138,7 +200,7 @@ pub async fn get_local_events(
pub async fn get_global_events(
until: Option<&str>,
state: State<'_, Nostr>,
) -> Result<Vec<String>, String> {
) -> Result<Vec<RichEvent>, String> {
let client = &state.client;
let as_of = match until {
Some(until) => Timestamp::from_str(until).unwrap(),
@@ -154,7 +216,22 @@ pub async fn get_global_events(
.get_events_of(vec![filter], Some(Duration::from_secs(8)))
.await
{
Ok(events) => Ok(events.into_iter().map(|ev| ev.as_json()).collect()),
Ok(events) => {
let dedup = dedup_event(&events, false);
let futures = dedup.into_iter().map(|ev| async move {
let raw = ev.as_json();
let parsed = if ev.kind == Kind::TextNote {
Some(parse_event(&ev.content).await)
} else {
None
};
RichEvent { raw, parsed }
});
let rich_events = join_all(futures).await;
Ok(rich_events)
}
Err(err) => Err(err.to_string()),
}
}
@@ -165,7 +242,7 @@ pub async fn get_hashtag_events(
hashtags: Vec<&str>,
until: Option<&str>,
state: State<'_, Nostr>,
) -> Result<Vec<String>, String> {
) -> Result<Vec<RichEvent>, String> {
let client = &state.client;
let as_of = match until {
Some(until) => Timestamp::from_str(until).unwrap(),
@@ -178,7 +255,22 @@ pub async fn get_hashtag_events(
.hashtags(hashtags);
match client.get_events_of(vec![filter], None).await {
Ok(events) => Ok(events.into_iter().map(|ev| ev.as_json()).collect()),
Ok(events) => {
let dedup = dedup_event(&events, false);
let futures = dedup.into_iter().map(|ev| async move {
let raw = ev.as_json();
let parsed = if ev.kind == Kind::TextNote {
Some(parse_event(&ev.content).await)
} else {
None
};
RichEvent { raw, parsed }
});
let rich_events = join_all(futures).await;
Ok(rich_events)
}
Err(err) => Err(err.to_string()),
}
}

View File

@@ -1,5 +1,184 @@
use nostr_sdk::Event;
use std::collections::HashSet;
use std::str::FromStr;
use linkify::LinkFinder;
use nostr_sdk::{Alphabet, Event, SingleLetterTag, Tag, TagKind};
use reqwest::Client;
use serde::Serialize;
use specta::Type;
use url::Url;
#[derive(Debug, Serialize, Type)]
pub struct Meta {
pub content: String,
pub images: Vec<String>,
pub videos: Vec<String>,
pub events: Vec<String>,
pub mentions: Vec<String>,
pub hashtags: Vec<String>,
}
const NOSTR_EVENTS: [&str; 10] = [
"@nevent1",
"@note1",
"@nostr:note1",
"@nostr:nevent1",
"nostr:note1",
"note1",
"nostr:nevent1",
"nevent1",
"Nostr:note1",
"Nostr:nevent1",
];
const NOSTR_MENTIONS: [&str; 10] = [
"@npub1",
"nostr:npub1",
"nostr:nprofile1",
"nostr:naddr1",
"npub1",
"nprofile1",
"naddr1",
"Nostr:npub1",
"Nostr:nprofile1",
"Nostr:naddr1",
];
const IMAGES: [&str; 7] = ["jpg", "jpeg", "gif", "png", "webp", "avif", "tiff"];
const VIDEOS: [&str; 5] = ["mp4", "mov", "avi", "webm", "mkv"];
pub fn get_latest_event(events: &[Event]) -> Option<&Event> {
events.iter().max_by_key(|event| event.created_at())
}
pub fn dedup_event(events: &[Event], nsfw: bool) -> Vec<Event> {
let mut seen_ids = HashSet::new();
events
.iter()
.filter(|&event| {
let e = TagKind::SingleLetter(SingleLetterTag::lowercase(Alphabet::E));
let e_tags: Vec<&Tag> = event.tags.iter().filter(|el| el.kind() == e).collect();
let ids: Vec<&str> = e_tags.iter().filter_map(|tag| tag.content()).collect();
let is_dup = ids.iter().any(|id| seen_ids.contains(*id));
for id in &ids {
seen_ids.insert(*id);
}
if nsfw {
let w_tags: Vec<&Tag> = event
.tags
.iter()
.filter(|el| el.kind() == TagKind::ContentWarning)
.collect();
!is_dup && w_tags.is_empty()
} else {
!is_dup
}
})
.cloned()
.collect()
}
pub async fn parse_event(content: &str) -> Meta {
let words: Vec<_> = content.split_whitespace().collect();
let mut finder = LinkFinder::new();
finder.url_must_have_scheme(false);
let urls: Vec<_> = finder.links(content).collect();
let hashtags = words
.iter()
.filter(|&&word| word.starts_with('#'))
.map(|&s| s.to_string())
.collect::<Vec<_>>();
let events = words
.iter()
.filter(|&&word| NOSTR_EVENTS.iter().any(|&el| word.starts_with(el)))
.map(|&s| s.to_string())
.collect::<Vec<_>>();
let mentions = words
.iter()
.filter(|&&word| NOSTR_MENTIONS.iter().any(|&el| word.starts_with(el)))
.map(|&s| s.to_string())
.collect::<Vec<_>>();
let mut images = Vec::new();
let mut videos = Vec::new();
let mut text = content.to_string();
if !urls.is_empty() {
let client = Client::new();
for url in urls {
let url_str = url.as_str();
if let Ok(parsed_url) = Url::from_str(url_str) {
if let Some(ext) = parsed_url
.path_segments()
.and_then(|segments| segments.last().and_then(|s| s.split('.').last()))
{
if IMAGES.contains(&ext) {
text = text.replace(url_str, "");
images.push(url_str.to_string());
break;
}
if VIDEOS.contains(&ext) {
text = text.replace(url_str, "");
videos.push(url_str.to_string());
break;
}
}
// Check the content type of URL via HEAD request
if let Ok(res) = client.head(url_str).send().await {
if let Some(content_type) = res.headers().get("Content-Type") {
if content_type.to_str().unwrap_or("").starts_with("image") {
text = text.replace(url_str, "");
images.push(url_str.to_string());
break;
}
}
}
}
}
}
// Clean up the resulting content string to remove extra spaces
let cleaned_text = text.trim().to_string();
Meta {
content: cleaned_text,
events,
mentions,
hashtags,
images,
videos,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_parse_event() {
let content = "Check this image: https://example.com/image.jpg #cool @npub1";
let meta = parse_event(content).await;
assert_eq!(meta.content, "Check this image: #cool @npub1");
assert_eq!(meta.images, vec!["https://example.com/image.jpg"]);
assert_eq!(meta.videos, Vec::<String>::new());
assert_eq!(meta.hashtags, vec!["#cool"]);
assert_eq!(meta.mentions, vec!["@npub1"]);
}
#[tokio::test]
async fn test_parse_video() {
let content = "Check this video: https://example.com/video.mp4 #cool @npub1";
let meta = parse_event(content).await;
assert_eq!(meta.content, "Check this video: #cool @npub1");
assert_eq!(meta.images, Vec::<String>::new());
assert_eq!(meta.videos, vec!["https://example.com/video.mp4"]);
assert_eq!(meta.hashtags, vec!["#cool"]);
assert_eq!(meta.mentions, vec!["@npub1"]);
}
}