add media extractor
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
use std::hash::Hash;
|
||||
use std::ops::Range;
|
||||
|
||||
use common::{EventExt, NostrParser};
|
||||
use gpui::SharedString;
|
||||
use common::{EventExt, NostrParser, extract_and_remove_media_urls};
|
||||
use gpui::{SharedString, SharedUri};
|
||||
use nostr_sdk::prelude::*;
|
||||
|
||||
/// New message.
|
||||
@@ -132,6 +132,8 @@ pub struct RenderedMessage {
|
||||
pub author: PublicKey,
|
||||
/// The content/text of the message
|
||||
pub content: String,
|
||||
/// List of media URLs in the message
|
||||
pub media: Vec<SharedUri>,
|
||||
/// Message created time as unix timestamp
|
||||
pub created_at: Timestamp,
|
||||
/// List of mentioned public keys in the message
|
||||
@@ -144,11 +146,13 @@ impl From<&Event> for RenderedMessage {
|
||||
fn from(val: &Event) -> Self {
|
||||
let mentions = extract_mentions(&val.content);
|
||||
let replies_to = extract_reply_ids(&val.tags);
|
||||
let (media, string) = extract_and_remove_media_urls(&val.content);
|
||||
|
||||
Self {
|
||||
id: val.id,
|
||||
author: val.pubkey,
|
||||
content: val.content.clone(),
|
||||
content: string,
|
||||
media,
|
||||
created_at: val.created_at,
|
||||
mentions,
|
||||
replies_to,
|
||||
@@ -160,12 +164,14 @@ impl From<&UnsignedEvent> for RenderedMessage {
|
||||
fn from(val: &UnsignedEvent) -> Self {
|
||||
let mentions = extract_mentions(&val.content);
|
||||
let replies_to = extract_reply_ids(&val.tags);
|
||||
let (media, string) = extract_and_remove_media_urls(&val.content);
|
||||
|
||||
Self {
|
||||
// Event ID must be known
|
||||
id: val.id.unwrap(),
|
||||
author: val.pubkey,
|
||||
content: val.content.clone(),
|
||||
content: string,
|
||||
media,
|
||||
created_at: val.created_at,
|
||||
mentions,
|
||||
replies_to,
|
||||
@@ -177,12 +183,14 @@ impl From<&NewMessage> for RenderedMessage {
|
||||
fn from(val: &NewMessage) -> Self {
|
||||
let mentions = extract_mentions(&val.rumor.content);
|
||||
let replies_to = extract_reply_ids(&val.rumor.tags);
|
||||
let (media, string) = extract_and_remove_media_urls(&val.rumor.content);
|
||||
|
||||
Self {
|
||||
// Event ID must be known
|
||||
id: val.rumor.id.unwrap(),
|
||||
author: val.rumor.pubkey,
|
||||
content: val.rumor.content.clone(),
|
||||
content: string,
|
||||
media,
|
||||
created_at: val.rumor.created_at,
|
||||
mentions,
|
||||
replies_to,
|
||||
|
||||
@@ -20,3 +20,4 @@ log.workspace = true
|
||||
dirs = "5.0"
|
||||
qrcode = "0.14.1"
|
||||
bech32 = "0.11.1"
|
||||
regex = "1.10"
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
pub use debounced_delay::*;
|
||||
pub use display::*;
|
||||
pub use event::*;
|
||||
pub use media_extractor::*;
|
||||
pub use parser::*;
|
||||
pub use paths::*;
|
||||
pub use range::*;
|
||||
@@ -8,6 +9,7 @@ pub use range::*;
|
||||
mod debounced_delay;
|
||||
mod display;
|
||||
mod event;
|
||||
mod media_extractor;
|
||||
mod parser;
|
||||
mod paths;
|
||||
mod range;
|
||||
|
||||
117
crates/common/src/media_extractor.rs
Normal file
117
crates/common/src/media_extractor.rs
Normal file
@@ -0,0 +1,117 @@
|
||||
use gpui::SharedUri;
|
||||
use regex::Regex;
|
||||
|
||||
/// Extracts media URLs from a string and returns both the extracted URLs
|
||||
/// and the string with media URLs removed
|
||||
pub struct MediaExtractor {
|
||||
image_regex: Regex,
|
||||
video_regex: Regex,
|
||||
}
|
||||
|
||||
impl MediaExtractor {
|
||||
/// Creates a new MediaExtractor with compiled regex patterns
|
||||
pub fn new() -> Self {
|
||||
MediaExtractor {
|
||||
// Match common image extensions
|
||||
image_regex: Regex::new(
|
||||
r#"(?i)\bhttps?://[^\s<>"']+\.(?:jpg|jpeg|png|gif|bmp|webp|svg|ico)(?:\?[^\s<>"']*)?\b"#,
|
||||
).unwrap(),
|
||||
// Match common video extensions
|
||||
video_regex: Regex::new(
|
||||
r#"(?i)\bhttps?://[^\s<>"']+\.(?:mp4|mov|avi|mkv|webm|flv|wmv|m4v|3gp)(?:\?[^\s<>"']*)?\b"#,
|
||||
).unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Extracts all media URLs from a string
|
||||
pub fn extract_media_urls(&self, text: &str) -> Vec<SharedUri> {
|
||||
let mut urls = Vec::new();
|
||||
|
||||
// Extract image URLs
|
||||
for capture in self.image_regex.find_iter(text) {
|
||||
urls.push(capture.as_str().to_string().into());
|
||||
}
|
||||
|
||||
// Extract video URLs
|
||||
for capture in self.video_regex.find_iter(text) {
|
||||
urls.push(capture.as_str().to_string().into());
|
||||
}
|
||||
|
||||
urls
|
||||
}
|
||||
|
||||
/// Removes all media URLs from a string and returns the cleaned text
|
||||
pub fn remove_media_urls(&self, text: &str) -> String {
|
||||
let mut result = text.to_string();
|
||||
|
||||
// Remove image URLs
|
||||
result = self.image_regex.replace_all(&result, "").to_string();
|
||||
|
||||
// Remove video URLs
|
||||
result = self.video_regex.replace_all(&result, "").to_string();
|
||||
|
||||
// Clean up extra whitespace that might result from removal
|
||||
self.cleanup_text(&result)
|
||||
}
|
||||
|
||||
/// Extracts media URLs and removes them from the string, returning both
|
||||
pub fn extract_and_remove(&self, text: &str) -> (Vec<SharedUri>, String) {
|
||||
let urls = self.extract_media_urls(text);
|
||||
let cleaned_text = self.remove_media_urls(text);
|
||||
(urls, cleaned_text)
|
||||
}
|
||||
|
||||
/// Helper function to clean up text after URL removal
|
||||
fn cleanup_text(&self, text: &str) -> String {
|
||||
let text = text.trim();
|
||||
|
||||
// Remove multiple consecutive spaces
|
||||
let re = Regex::new(r"\s+").unwrap();
|
||||
re.replace_all(text, " ").to_string()
|
||||
}
|
||||
|
||||
/// Validates if a URL is a valid media URL
|
||||
pub fn is_media_url(&self, url: &str) -> bool {
|
||||
self.image_regex.is_match(url) || self.video_regex.is_match(url)
|
||||
}
|
||||
|
||||
/// Categorizes extracted URLs into images and videos
|
||||
pub fn categorize_urls(&self, urls: &[SharedUri]) -> (Vec<SharedUri>, Vec<SharedUri>) {
|
||||
let mut images = Vec::new();
|
||||
let mut videos = Vec::new();
|
||||
|
||||
for url in urls {
|
||||
if self.image_regex.is_match(url) {
|
||||
images.push(url.clone());
|
||||
} else if self.video_regex.is_match(url) {
|
||||
videos.push(url.clone());
|
||||
}
|
||||
}
|
||||
|
||||
(images, videos)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MediaExtractor {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Convenience function for one-time extraction and removal
|
||||
pub fn extract_and_remove_media_urls(text: &str) -> (Vec<SharedUri>, String) {
|
||||
let extractor = MediaExtractor::new();
|
||||
extractor.extract_and_remove(text)
|
||||
}
|
||||
|
||||
/// Convenience function for just extracting media URLs
|
||||
pub fn extract_media_urls(text: &str) -> Vec<SharedUri> {
|
||||
let extractor = MediaExtractor::new();
|
||||
extractor.extract_media_urls(text)
|
||||
}
|
||||
|
||||
/// Convenience function for just removing media URLs
|
||||
pub fn remove_media_urls(text: &str) -> String {
|
||||
let extractor = MediaExtractor::new();
|
||||
extractor.remove_media_urls(text)
|
||||
}
|
||||
Reference in New Issue
Block a user