use std::collections::HashSet; use std::str::FromStr; use linkify::LinkFinder; use nostr_sdk::{Alphabet, Event, EventId, FromBech32, PublicKey, SingleLetterTag, Tag, TagKind}; use nostr_sdk::prelude::Nip19Event; use reqwest::Client; use serde::Serialize; use specta::Type; use url::Url; #[derive(Debug, Serialize, Type)] pub struct Meta { pub content: String, pub images: Vec, pub videos: Vec, pub events: Vec, pub mentions: Vec, pub hashtags: Vec, } const NOSTR_EVENTS: [&str; 10] = [ "@nevent1", "@note1", "@nostr:note1", "@nostr:nevent1", "nostr:note1", "note1", "nostr:nevent1", "nevent1", "Nostr:note1", "Nostr:nevent1", ]; const NOSTR_MENTIONS: [&str; 10] = [ "@npub1", "nostr:npub1", "nostr:nprofile1", "nostr:naddr1", "npub1", "nprofile1", "naddr1", "Nostr:npub1", "Nostr:nprofile1", "Nostr:naddr1", ]; const IMAGES: [&str; 7] = ["jpg", "jpeg", "gif", "png", "webp", "avif", "tiff"]; const VIDEOS: [&str; 5] = ["mp4", "mov", "avi", "webm", "mkv"]; pub fn get_latest_event(events: &[Event]) -> Option<&Event> { events.iter().max_by_key(|event| event.created_at()) } pub fn dedup_event(events: &[Event]) -> Vec { let mut seen_ids = HashSet::new(); events .iter() .filter(|&event| { let e = TagKind::SingleLetter(SingleLetterTag::lowercase(Alphabet::E)); let e_tags: Vec<&Tag> = event.tags.iter().filter(|el| el.kind() == e).collect(); let ids: Vec<&str> = e_tags.iter().filter_map(|tag| tag.content()).collect(); let is_dup = ids.iter().any(|id| seen_ids.contains(*id)); for id in &ids { seen_ids.insert(*id); } !is_dup }) .cloned() .collect() } pub async fn parse_event(content: &str) -> Meta { let mut finder = LinkFinder::new(); finder.url_must_have_scheme(false); // Get urls let urls: Vec<_> = finder.links(content).collect(); // Get words let words: Vec<_> = content.split_whitespace().collect(); let hashtags = words .iter() .filter(|&&word| word.starts_with('#')) .map(|&s| s.to_string()) .collect::>(); let events = words .iter() .filter(|&&word| NOSTR_EVENTS.iter().any(|&el| word.starts_with(el))) .map(|&s| s.to_string()) .collect::>(); let mentions = words .iter() .filter(|&&word| NOSTR_MENTIONS.iter().any(|&el| word.starts_with(el))) .map(|&s| s.to_string()) .collect::>(); let mut images = Vec::new(); let mut videos = Vec::new(); let mut text = content.to_string(); if !urls.is_empty() { let client = Client::new(); for url in urls { let url_str = url.as_str(); if let Ok(parsed_url) = Url::from_str(url_str) { if let Some(ext) = parsed_url .path_segments() .and_then(|segments| segments.last().and_then(|s| s.split('.').last())) { if IMAGES.contains(&ext) { text = text.replace(url_str, ""); images.push(url_str.to_string()); // Process the next item. continue; } if VIDEOS.contains(&ext) { text = text.replace(url_str, ""); videos.push(url_str.to_string()); // Process the next item. continue; } } // Check the content type of URL via HEAD request if let Ok(res) = client.head(url_str).send().await { if let Some(content_type) = res.headers().get("Content-Type") { if content_type.to_str().unwrap_or("").starts_with("image") { text = text.replace(url_str, ""); images.push(url_str.to_string()); // Process the next item. continue; } } } } } } // Clean up the resulting content string to remove extra spaces let cleaned_text = text.trim().to_string(); Meta { content: cleaned_text, events, mentions, hashtags, images, videos, } } pub fn create_event_tags(content: &str) -> Vec { let mut tags: Vec = vec![]; let mut tag_set: HashSet = HashSet::new(); // Get words let words: Vec<_> = content.split_whitespace().collect(); // Get mentions let mentions = words .iter() .filter(|&&word| ["nostr:", "@"].iter().any(|&el| word.starts_with(el))) .map(|&s| s.to_string()) .collect::>(); // Get hashtags let hashtags = words .iter() .filter(|&&word| word.starts_with('#')) .map(|&s| s.to_string()) .collect::>(); for mention in mentions { let entity = mention.replace("nostr:", "").replace("@", ""); if !tag_set.contains(&entity) { if entity.starts_with("npub") { if let Ok(public_key) = PublicKey::from_bech32(&entity) { let tag = Tag::public_key(public_key); tags.push(tag); } else { continue; } } if entity.starts_with("nprofile") { if let Ok(public_key) = PublicKey::from_bech32(&entity) { let tag = Tag::public_key(public_key); tags.push(tag); } else { continue; } } if entity.starts_with("note") { if let Ok(event_id) = EventId::from_bech32(&entity) { let hex = event_id.to_hex(); let tag = Tag::parse(&["e", &hex, "", "mention"]).unwrap(); tags.push(tag); } else { continue; } } if entity.starts_with("nevent") { if let Ok(event) = Nip19Event::from_bech32(&entity) { let hex = event.event_id.to_hex(); let relay = event.clone().relays.into_iter().next().unwrap_or("".into()); let tag = Tag::parse(&["e", &hex, &relay, "mention"]).unwrap(); if let Some(author) = event.author { let tag = Tag::public_key(author); tags.push(tag); } tags.push(tag); } else { continue; } } tag_set.insert(entity); } } for hashtag in hashtags { if !tag_set.contains(&hashtag) { let tag = Tag::hashtag(hashtag.clone()); tags.push(tag); tag_set.insert(hashtag); } } tags } #[cfg(test)] mod tests { use super::*; #[tokio::test] async fn test_parse_event() { let content = "Check this image: https://example.com/image.jpg #cool @npub1"; let meta = parse_event(content).await; assert_eq!(meta.content, "Check this image: #cool @npub1"); assert_eq!(meta.images, vec!["https://example.com/image.jpg"]); assert_eq!(meta.videos, Vec::::new()); assert_eq!(meta.hashtags, vec!["#cool"]); assert_eq!(meta.mentions, vec!["@npub1"]); } #[tokio::test] async fn test_parse_video() { let content = "Check this video: https://example.com/video.mp4 #cool @npub1"; let meta = parse_event(content).await; assert_eq!(meta.content, "Check this video: #cool @npub1"); assert_eq!(meta.images, Vec::::new()); assert_eq!(meta.videos, vec!["https://example.com/video.mp4"]); assert_eq!(meta.hashtags, vec!["#cool"]); assert_eq!(meta.mentions, vec!["@npub1"]); } }