polish
This commit is contained in:
@@ -19,7 +19,13 @@ interface IPreFetchedResource {
|
||||
imagesPropertyType?: string;
|
||||
proxyUrl?: string;
|
||||
url: string;
|
||||
data: any;
|
||||
data: string;
|
||||
}
|
||||
|
||||
function throwOnLoopback(address: string) {
|
||||
if (OPENGRAPH.REGEX_LOOPBACK.test(address)) {
|
||||
throw new Error('SSRF request detected, trying to query host');
|
||||
}
|
||||
}
|
||||
|
||||
function metaTag(doc: cheerio.CheerioAPI, type: string, attr: string) {
|
||||
@@ -28,42 +34,42 @@ function metaTag(doc: cheerio.CheerioAPI, type: string, attr: string) {
|
||||
}
|
||||
|
||||
function metaTagContent(doc: cheerio.CheerioAPI, type: string, attr: string) {
|
||||
return doc(`meta[${attr}='${type}']`).attr('content');
|
||||
return doc(`meta[${attr}='${type}']`).attr(`content`);
|
||||
}
|
||||
|
||||
function getTitle(doc: cheerio.CheerioAPI) {
|
||||
let title =
|
||||
metaTagContent(doc, 'og:title', 'property') ||
|
||||
metaTagContent(doc, 'og:title', 'name');
|
||||
metaTagContent(doc, `og:title`, `property`) ||
|
||||
metaTagContent(doc, `og:title`, `name`);
|
||||
if (!title) {
|
||||
title = doc('title').text();
|
||||
title = doc(`title`).text();
|
||||
}
|
||||
return title;
|
||||
}
|
||||
|
||||
function getSiteName(doc: cheerio.CheerioAPI) {
|
||||
const siteName =
|
||||
metaTagContent(doc, 'og:site_name', 'property') ||
|
||||
metaTagContent(doc, 'og:site_name', 'name');
|
||||
metaTagContent(doc, `og:site_name`, `property`) ||
|
||||
metaTagContent(doc, `og:site_name`, `name`);
|
||||
return siteName;
|
||||
}
|
||||
|
||||
function getDescription(doc: cheerio.CheerioAPI) {
|
||||
const description =
|
||||
metaTagContent(doc, 'description', 'name') ||
|
||||
metaTagContent(doc, 'Description', 'name') ||
|
||||
metaTagContent(doc, 'og:description', 'property');
|
||||
metaTagContent(doc, `description`, `name`) ||
|
||||
metaTagContent(doc, `Description`, `name`) ||
|
||||
metaTagContent(doc, `og:description`, `property`);
|
||||
return description;
|
||||
}
|
||||
|
||||
function getMediaType(doc: cheerio.CheerioAPI) {
|
||||
const node = metaTag(doc, 'medium', 'name');
|
||||
const node = metaTag(doc, `medium`, `name`);
|
||||
if (node) {
|
||||
const content = node.attr('content');
|
||||
return content === 'image' ? 'photo' : content;
|
||||
const content = node.attr(`content`);
|
||||
return content === `image` ? `photo` : content;
|
||||
}
|
||||
return (
|
||||
metaTagContent(doc, 'og:type', 'property') || metaTagContent(doc, 'og:type', 'name')
|
||||
metaTagContent(doc, `og:type`, `property`) || metaTagContent(doc, `og:type`, `name`)
|
||||
);
|
||||
}
|
||||
|
||||
@@ -77,14 +83,14 @@ function getImages(
|
||||
let src: string | undefined;
|
||||
let dic: Record<string, boolean> = {};
|
||||
|
||||
const imagePropertyType = imagesPropertyType ?? 'og';
|
||||
const imagePropertyType = imagesPropertyType ?? `og`;
|
||||
nodes =
|
||||
metaTag(doc, `${imagePropertyType}:image`, 'property') ||
|
||||
metaTag(doc, `${imagePropertyType}:image`, 'name');
|
||||
metaTag(doc, `${imagePropertyType}:image`, `property`) ||
|
||||
metaTag(doc, `${imagePropertyType}:image`, `name`);
|
||||
|
||||
if (nodes) {
|
||||
nodes.each((_: number, node: cheerio.Element) => {
|
||||
if (node.type === 'tag') {
|
||||
if (node.type === `tag`) {
|
||||
src = node.attribs.content;
|
||||
if (src) {
|
||||
src = new URL(src, rootUrl).href;
|
||||
@@ -95,18 +101,18 @@ function getImages(
|
||||
}
|
||||
|
||||
if (images.length <= 0 && !imagesPropertyType) {
|
||||
src = doc('link[rel=image_src]').attr('href');
|
||||
src = doc(`link[rel=image_src]`).attr(`href`);
|
||||
if (src) {
|
||||
src = new URL(src, rootUrl).href;
|
||||
images = [src];
|
||||
} else {
|
||||
nodes = doc('img');
|
||||
nodes = doc(`img`);
|
||||
|
||||
if (nodes?.length) {
|
||||
dic = {};
|
||||
images = [];
|
||||
nodes.each((_: number, node: cheerio.Element) => {
|
||||
if (node.type === 'tag') src = node.attribs.src;
|
||||
if (node.type === `tag`) src = node.attribs.src;
|
||||
if (src && !dic[src]) {
|
||||
dic[src] = true;
|
||||
// width = node.attribs.width;
|
||||
@@ -135,32 +141,32 @@ function getVideos(doc: cheerio.CheerioAPI) {
|
||||
let videoObj;
|
||||
let index;
|
||||
|
||||
const nodes = metaTag(doc, 'og:video', 'property') || metaTag(doc, 'og:video', 'name');
|
||||
const nodes = metaTag(doc, `og:video`, `property`) || metaTag(doc, `og:video`, `name`);
|
||||
|
||||
if (nodes?.length) {
|
||||
nodeTypes =
|
||||
metaTag(doc, 'og:video:type', 'property') || metaTag(doc, 'og:video:type', 'name');
|
||||
metaTag(doc, `og:video:type`, `property`) || metaTag(doc, `og:video:type`, `name`);
|
||||
nodeSecureUrls =
|
||||
metaTag(doc, 'og:video:secure_url', 'property') ||
|
||||
metaTag(doc, 'og:video:secure_url', 'name');
|
||||
metaTag(doc, `og:video:secure_url`, `property`) ||
|
||||
metaTag(doc, `og:video:secure_url`, `name`);
|
||||
width =
|
||||
metaTagContent(doc, 'og:video:width', 'property') ||
|
||||
metaTagContent(doc, 'og:video:width', 'name');
|
||||
metaTagContent(doc, `og:video:width`, `property`) ||
|
||||
metaTagContent(doc, `og:video:width`, `name`);
|
||||
height =
|
||||
metaTagContent(doc, 'og:video:height', 'property') ||
|
||||
metaTagContent(doc, 'og:video:height', 'name');
|
||||
metaTagContent(doc, `og:video:height`, `property`) ||
|
||||
metaTagContent(doc, `og:video:height`, `name`);
|
||||
|
||||
for (index = 0; index < nodes.length; index += 1) {
|
||||
const node = nodes[index];
|
||||
if (node.type === 'tag') video = node.attribs.content;
|
||||
if (node.type === `tag`) video = node.attribs.content;
|
||||
|
||||
nodeType = nodeTypes?.[index];
|
||||
if (nodeType?.type === 'tag') {
|
||||
if (nodeType?.type === `tag`) {
|
||||
videoType = nodeType ? nodeType.attribs.content : null;
|
||||
}
|
||||
|
||||
nodeSecureUrl = nodeSecureUrls?.[index];
|
||||
if (nodeSecureUrl?.type === 'tag') {
|
||||
if (nodeSecureUrl?.type === `tag`) {
|
||||
videoSecureUrl = nodeSecureUrl ? nodeSecureUrl.attribs.content : null;
|
||||
}
|
||||
|
||||
@@ -171,7 +177,7 @@ function getVideos(doc: cheerio.CheerioAPI) {
|
||||
width,
|
||||
height,
|
||||
};
|
||||
if (videoType && videoType.indexOf('video/') === 0) {
|
||||
if (videoType && videoType.indexOf(`video/`) === 0) {
|
||||
videos.splice(0, 0, videoObj);
|
||||
} else {
|
||||
videos.push(videoObj);
|
||||
@@ -193,7 +199,7 @@ function getFavicons(doc: cheerio.CheerioAPI, rootUrl: string) {
|
||||
let nodes: cheerio.Cheerio<cheerio.Element> | never[] = [];
|
||||
let src: string | undefined;
|
||||
|
||||
const relSelectors = ['rel=icon', `rel="shortcut icon"`, 'rel=apple-touch-icon'];
|
||||
const relSelectors = [`rel=icon`, `rel="shortcut icon"`, `rel=apple-touch-icon`];
|
||||
|
||||
relSelectors.forEach((relSelector) => {
|
||||
// look for all icon tags
|
||||
@@ -202,9 +208,9 @@ function getFavicons(doc: cheerio.CheerioAPI, rootUrl: string) {
|
||||
// collect all images from icon tags
|
||||
if (nodes.length) {
|
||||
nodes.each((_: number, node: cheerio.Element) => {
|
||||
if (node.type === 'tag') src = node.attribs.href;
|
||||
if (node.type === `tag`) src = node.attribs.href;
|
||||
if (src) {
|
||||
src = new URL(rootUrl).href;
|
||||
src = new URL(src, rootUrl).href;
|
||||
images.push(src);
|
||||
}
|
||||
});
|
||||
@@ -222,7 +228,7 @@ function getFavicons(doc: cheerio.CheerioAPI, rootUrl: string) {
|
||||
function parseImageResponse(url: string, contentType: string) {
|
||||
return {
|
||||
url,
|
||||
mediaType: 'image',
|
||||
mediaType: `image`,
|
||||
contentType,
|
||||
favicons: [getDefaultFavicon(url)],
|
||||
};
|
||||
@@ -231,7 +237,7 @@ function parseImageResponse(url: string, contentType: string) {
|
||||
function parseAudioResponse(url: string, contentType: string) {
|
||||
return {
|
||||
url,
|
||||
mediaType: 'audio',
|
||||
mediaType: `audio`,
|
||||
contentType,
|
||||
favicons: [getDefaultFavicon(url)],
|
||||
};
|
||||
@@ -240,7 +246,7 @@ function parseAudioResponse(url: string, contentType: string) {
|
||||
function parseVideoResponse(url: string, contentType: string) {
|
||||
return {
|
||||
url,
|
||||
mediaType: 'video',
|
||||
mediaType: `video`,
|
||||
contentType,
|
||||
favicons: [getDefaultFavicon(url)],
|
||||
};
|
||||
@@ -249,7 +255,7 @@ function parseVideoResponse(url: string, contentType: string) {
|
||||
function parseApplicationResponse(url: string, contentType: string) {
|
||||
return {
|
||||
url,
|
||||
mediaType: 'application',
|
||||
mediaType: `application`,
|
||||
contentType,
|
||||
favicons: [getDefaultFavicon(url)],
|
||||
};
|
||||
@@ -268,7 +274,7 @@ function parseTextResponse(
|
||||
title: getTitle(doc),
|
||||
siteName: getSiteName(doc),
|
||||
description: getDescription(doc),
|
||||
mediaType: getMediaType(doc) || 'website',
|
||||
mediaType: getMediaType(doc) || `website`,
|
||||
contentType,
|
||||
images: getImages(doc, url, options.imagesPropertyType),
|
||||
videos: getVideos(doc),
|
||||
@@ -287,11 +293,11 @@ function parseUnknownResponse(
|
||||
|
||||
function parseResponse(response: IPreFetchedResource, options?: ILinkPreviewOptions) {
|
||||
try {
|
||||
let contentType = response.headers['content-type'];
|
||||
let contentType = response.headers[`content-type`];
|
||||
// console.warn(`original content type`, contentType);
|
||||
if (contentType?.indexOf(';')) {
|
||||
if (contentType?.indexOf(`;`)) {
|
||||
// eslint-disable-next-line prefer-destructuring
|
||||
contentType = contentType.split(';')[0];
|
||||
contentType = contentType.split(`;`)[0];
|
||||
// console.warn(`splitting content type`, contentType);
|
||||
}
|
||||
|
||||
@@ -330,19 +336,117 @@ function parseResponse(response: IPreFetchedResource, options?: ILinkPreviewOpti
|
||||
}
|
||||
}
|
||||
|
||||
export async function getLinkPreview(text: string) {
|
||||
const fetchUrl = text;
|
||||
const options = {
|
||||
method: 'GET',
|
||||
timeout: 5,
|
||||
};
|
||||
|
||||
let response = await fetch(fetchUrl, options);
|
||||
|
||||
if (response.status > 300 && response.status < 309) {
|
||||
const forwardedUrl = response.headers.location || '';
|
||||
response = await fetch(forwardedUrl, options);
|
||||
/**
|
||||
* Parses the text, extracts the first link it finds and does a HTTP request
|
||||
* to fetch the website content, afterwards it tries to parse the internal HTML
|
||||
* and extract the information via meta tags
|
||||
* @param text string, text to be parsed
|
||||
* @param options ILinkPreviewOptions
|
||||
*/
|
||||
export async function getLinkPreview(text: string, options?: ILinkPreviewOptions) {
|
||||
if (!text || typeof text !== `string`) {
|
||||
throw new Error(`link-preview-js did not receive a valid url or text`);
|
||||
}
|
||||
|
||||
return parseResponse(response);
|
||||
const detectedUrl = text
|
||||
.replace(/\n/g, ` `)
|
||||
.split(` `)
|
||||
.find((token) => OPENGRAPH.REGEX_VALID_URL.test(token));
|
||||
|
||||
if (!detectedUrl) {
|
||||
throw new Error(`link-preview-js did not receive a valid a url or text`);
|
||||
}
|
||||
|
||||
if (options?.followRedirects === `manual` && !options?.handleRedirects) {
|
||||
throw new Error(
|
||||
`link-preview-js followRedirects is set to manual, but no handleRedirects function was provided`
|
||||
);
|
||||
}
|
||||
|
||||
if (options?.resolveDNSHost) {
|
||||
const resolvedUrl = await options.resolveDNSHost(detectedUrl);
|
||||
|
||||
throwOnLoopback(resolvedUrl);
|
||||
}
|
||||
|
||||
const timeout = options?.timeout ?? 3000; // 3 second timeout default
|
||||
const controller = new AbortController();
|
||||
const timeoutCounter = setTimeout(() => controller.abort(), timeout);
|
||||
|
||||
const fetchOptions = {
|
||||
headers: options?.headers ?? {},
|
||||
redirect: options?.followRedirects ?? `error`,
|
||||
signal: controller.signal,
|
||||
};
|
||||
|
||||
const fetchUrl = options?.proxyUrl ? options.proxyUrl.concat(detectedUrl) : detectedUrl;
|
||||
|
||||
// Seems like fetchOptions type definition is out of date
|
||||
// https://github.com/node-fetch/node-fetch/issues/741
|
||||
let response = await fetch(fetchUrl, fetchOptions as any).catch((e) => {
|
||||
if (e.name === `AbortError`) {
|
||||
throw new Error(`Request timeout`);
|
||||
}
|
||||
|
||||
clearTimeout(timeoutCounter);
|
||||
throw e;
|
||||
});
|
||||
|
||||
if (
|
||||
response.status > 300 &&
|
||||
response.status < 309 &&
|
||||
fetchOptions.redirect === `manual` &&
|
||||
options?.handleRedirects
|
||||
) {
|
||||
const forwardedUrl = response.headers.get(`location`) || ``;
|
||||
|
||||
if (!options.handleRedirects(fetchUrl, forwardedUrl)) {
|
||||
throw new Error(`link-preview-js could not handle redirect`);
|
||||
}
|
||||
|
||||
if (options?.resolveDNSHost) {
|
||||
const resolvedUrl = await options.resolveDNSHost(forwardedUrl);
|
||||
|
||||
throwOnLoopback(resolvedUrl);
|
||||
}
|
||||
|
||||
response = await fetch(forwardedUrl, fetchOptions as any);
|
||||
}
|
||||
|
||||
clearTimeout(timeoutCounter);
|
||||
|
||||
const headers: Record<string, string> = {};
|
||||
response.headers.forEach((header, key) => {
|
||||
headers[key] = header;
|
||||
});
|
||||
|
||||
const normalizedResponse: IPreFetchedResource = {
|
||||
url: options?.proxyUrl ? response.url.replace(options.proxyUrl, ``) : response.url,
|
||||
headers,
|
||||
data: await response.text(),
|
||||
};
|
||||
|
||||
return parseResponse(normalizedResponse, options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip the library fetching the website for you, instead pass a response object
|
||||
* from whatever source you get and use the internal parsing of the HTML to return
|
||||
* the necessary information
|
||||
* @param response Preview Response
|
||||
* @param options IPreviewLinkOptions
|
||||
*/
|
||||
export async function getPreviewFromContent(
|
||||
response: IPreFetchedResource,
|
||||
options?: ILinkPreviewOptions
|
||||
) {
|
||||
if (!response || typeof response !== `object`) {
|
||||
throw new Error(`link-preview-js did not receive a valid response object`);
|
||||
}
|
||||
|
||||
if (!response.url) {
|
||||
throw new Error(`link-preview-js did not receive a valid response object`);
|
||||
}
|
||||
|
||||
return parseResponse(response, options);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user