use gpui::SharedUri; use regex::Regex; /// Extracts media URLs from a string and returns both the extracted URLs /// and the string with media URLs removed pub struct MediaExtractor { image_regex: Regex, video_regex: Regex, } impl MediaExtractor { /// Creates a new MediaExtractor with compiled regex patterns pub fn new() -> Self { MediaExtractor { // Match common image extensions image_regex: Regex::new( r#"(?i)\bhttps?://[^\s<>"']+\.(?:jpg|jpeg|png|gif|bmp|webp|svg|ico)(?:\?[^\s<>"']*)?\b"#, ).unwrap(), // Match common video extensions video_regex: Regex::new( r#"(?i)\bhttps?://[^\s<>"']+\.(?:mp4|mov|avi|mkv|webm|flv|wmv|m4v|3gp)(?:\?[^\s<>"']*)?\b"#, ).unwrap(), } } /// Extracts all media URLs from a string pub fn extract_media_urls(&self, text: &str) -> Vec { let mut urls = Vec::new(); // Extract image URLs for capture in self.image_regex.find_iter(text) { urls.push(capture.as_str().to_string().into()); } // Extract video URLs for capture in self.video_regex.find_iter(text) { urls.push(capture.as_str().to_string().into()); } urls } /// Removes all media URLs from a string and returns the cleaned text pub fn remove_media_urls(&self, text: &str) -> String { let mut result = text.to_string(); // Remove image URLs result = self.image_regex.replace_all(&result, "").to_string(); // Remove video URLs result = self.video_regex.replace_all(&result, "").to_string(); // Clean up extra whitespace that might result from removal self.cleanup_text(&result) } /// Extracts media URLs and removes them from the string, returning both pub fn extract_and_remove(&self, text: &str) -> (Vec, String) { let urls = self.extract_media_urls(text); let cleaned_text = self.remove_media_urls(text); (urls, cleaned_text) } /// Helper function to clean up text after URL removal fn cleanup_text(&self, text: &str) -> String { let text = text.trim(); // Remove multiple consecutive spaces let re = Regex::new(r"\s+").unwrap(); re.replace_all(text, " ").trim().to_string() } /// Validates if a URL is a valid media URL pub fn is_media_url(&self, url: &str) -> bool { self.image_regex.is_match(url) || self.video_regex.is_match(url) } /// Categorizes extracted URLs into images and videos pub fn categorize_urls(&self, urls: &[SharedUri]) -> (Vec, Vec) { let mut images = Vec::new(); let mut videos = Vec::new(); for url in urls { if self.image_regex.is_match(url) { images.push(url.clone()); } else if self.video_regex.is_match(url) { videos.push(url.clone()); } } (images, videos) } } impl Default for MediaExtractor { fn default() -> Self { Self::new() } } /// Convenience function for one-time extraction and removal pub fn extract_and_remove_media_urls(text: &str) -> (Vec, String) { let extractor = MediaExtractor::new(); extractor.extract_and_remove(text) } /// Convenience function for just extracting media URLs pub fn extract_media_urls(text: &str) -> Vec { let extractor = MediaExtractor::new(); extractor.extract_media_urls(text) } /// Convenience function for just removing media URLs pub fn remove_media_urls(text: &str) -> String { let extractor = MediaExtractor::new(); extractor.remove_media_urls(text) }