/**
* @module utils
*/
import * as path from "node:path";
import fse from "fs-extra";
import YAML from "yaml";
import * as parse5 from "parse5";
import readdirp from "readdirp";
import picomatch from "picomatch";
import nunjucks from "nunjucks";
import {Site, File, Category, Tag, TOC} from "./types.js";
/**
* @description Hikaru's package dir.
*/
const hikaruDir = path.resolve(
path.dirname(new URL(import.meta.url).pathname),
"../"
);
/**
* @param {String} path
* @return {Promise<Object>}
*/
const loadJSON = async (path) => {
return JSON.parse(await fse.readFile(path, "utf8"));
};
/**
* @param {String} path
* @return {Object}
*/
const loadJSONSync = (path) => {
return JSON.parse(fse.readFileSync(path, "utf8"));
};
/**
* @private
* @description This is Hikaru's package.json, it is used internally.
*/
const pkgJSON = loadJSONSync(path.join(hikaruDir, "package.json"));
/**
* @param {String} path
* @return {Promise<Object>}
*/
const loadYAML = async (path) => {
return YAML.parse(await fse.readFile(path, "utf8"));
};
/**
* @param {String} path
* @return {Object}
*/
const loadYAMLSync = (path) => {
return YAML.parse(fse.readFileSync(path, "utf8"));
};
/**
* @param {*} o
* @return {Boolean}
*/
const isNumber = (o) => {
return typeof o === "number" || o instanceof Number;
};
/**
* @param {*} o
* @return {Boolean}
*/
const isString = (o) => {
return typeof o === "string" || o instanceof String;
};
/**
* @param {*} o
* @return {Boolean}
*/
const isArray = (o) => {
return Array.isArray(o);
};
/**
* @param {*} o
* @return {Boolean}
*/
const isFunction = (o) => {
return o instanceof Function;
};
/**
* @param {*} o
* @return {Boolean} Return `false` when `o == null`.
*/
const isObject = (o) => {
return typeof o === "object" && o != null;
};
/**
* @param {*} o
* @return {Boolean}
*/
const isBuffer = (o) => {
return Buffer.isBuffer(o);
};
/**
* @private
*/
const typeCheckers = {
"Number": isNumber,
"String": isString,
"Array": isArray,
"Function": isFunction,
"Buffer": isBuffer,
"Object": isObject,
"null": (o) => {
return o == null;
}
};
/**
* @description Check whether a variable is one of give types.
* @param {*} var
* @param {String} name Variable name.
* @param {...String} types
*/
const checkType = (variable, name, ...types) => {
// Previously we accept String or Array of String, keep compatible.
types = types.flat();
for (const type of types) {
if (typeCheckers[type] == null) {
throw new TypeError(`\`types\` should only contain following types: ${
Object.keys(typeCheckers).join(", ")
}.`);
}
}
const ok = types.some((type) => {
return typeCheckers[type](variable);
});
if (!ok) {
throw new TypeError(`\`${name}\` should only be one of following types: ${
types.join(", ")
}.`);
}
};
/**
* @description Check whether a buffer is a binary. You should use
* `isBinaryPath(p)` because this is costly and mostly a false positive is
* harmless so we don't need an exact check. For non-costly exact check, add
* `isbinaryfile` dependency for your project.
* @param {Buffer} b
* @return {Boolean}
*/
const isBinary = (b) => {
return isBuffer(b) && !b.equals(Buffer.from(b.toString("utf8"), "utf8"));
};
/**
* @private
*/
const binExt = loadJSONSync(path.join(hikaruDir, "hikaru", "bin-ext.json"));
/**
* @description Check whether a path is a binary via ext name. For non-costly
* exact check, add `isbinaryfile` dependency for your project.
* @param {String} p
* @return {Boolean}
*/
const isBinaryPath = (p) => {
return binExt.includes(path.extname(p).toLowerCase());
};
/**
* @deprecated
* @description You should use `isBinaryPath(p)` mostly. This is only for
* compatibility because `isbinaryfile` dependency is dropped.
* @param {String|Buffer} o
* @return {Boolean}
*/
const isBinaryFile = (o) => {
return isBuffer(o) ? isBinary(o) : isBinaryPath(o);
};
/**
* @deprecated
* @description You should use `isBinaryPath(p)` mostly. This is only for
* compatibility because `isbinaryfile` dependency is dropped.
* @param {String|Buffer} o
* @return {Boolean}
*/
const isBinaryFileSync = isBinaryFile;
/**
* @description Node.js marks `fs.exists()` deprecated and suggest to use
* `fs.access()`, but it throws error instead of return a boolean, this is a
* wrapper for it.
* @see https://nodejs.org/api/fs.html#fsaccesssyncpath-mode
* @param {String} p
* @return {Boolean}
*/
const isReadableSync = (p) => {
try {
fse.accessSync(p, fse.constants.R_OK);
return true;
} catch (error) {
return false;
}
};
/**
* @description Escape HTML chars.
* @param {String} str
* @return {String} Escaped HTML string.
*/
const escapeHTML = (str) => {
return str.replace(/&/g, "&")
.replace(/</g, "<")
.replace(/>/g, ">")
.replace(/"/g, """)
.replace(/'/g, "'");
};
/**
* @description Remove all HTML tags.
* @param {String} str
* @return {String} String without HTML tags.
*/
const removeHTMLTags = (str) => {
return str.replace(/<\/?[^>]+>/gi, "");
};
/**
* @callback compareCallback
* @description Comparing function accepted by `Array.sort()`.
* @param {*} a
* @param {*} b
* @return {Number}
*/
/**
* @description Sort an array with a fallback list, call comparing function
* from first to last until there is a difference.
* @param {Array} arr
* @param {...compareCallback} fns
*/
const fallbackSort = (arr, ...fns) => {
fns = fns.filter((fn) => {
return isFunction(fn);
});
if (!isArray(arr) || fns.length === 0) {
return;
}
arr.sort((a, b) => {
for (const fn of fns) {
const result = fn(a, b);
if (result !== 0) {
return result;
}
}
return 0;
});
};
/**
* @description A Promised glob.
* @param {String} pattern
* @param {Objects} [opts] Optional match opts.
* @param {String} [opts.workDir=.] Working dir for this match.
* @param {Boolean} [opts.ignoreDir=true] Ignore directories or not.
* @param {Boolean} [opts.ignoreHidden=true] Ignore hidden files or not.
* @param {Boolean} [opts.recursive=true] Set to false if you don't care about
* subdirs to get better performance.
* @return {Promise<String[]>}
*/
const matchFiles = (pattern, opts = {}) => {
if (opts["ignoreDir"] == null) {
opts["ignoreDir"] = true;
}
if (opts["ignoreHidden"] == null) {
opts["ignoreHidden"] = true;
}
if (opts["recursive"] == null) {
opts["recursive"] = true;
}
opts["workDir"] = opts["workDir"] || ".";
const isMatch = picomatch(pattern, {"dot": !opts["ignoreHidden"]});
const matchWithHidden = (entry) => {
return isMatch(entry["path"]);
};
// We don't use picomatch for hidden files, basename is more reliable.
const matchWithoutHidden = (entry) => {
const basename = path.basename(entry["path"]);
return !basename.startsWith(".") && matchWithHidden(entry);
};
const readdirpOpts = {
// readdirp supports glob patterns as filter and will call picomatch
// internally, but we won't use it because it does not support `**` and
// patterns must be all inclusive or all exclusive.
"fileFilter": opts["ignoreHidden"] ? matchWithoutHidden : matchWithHidden,
"type": opts["ignoreDir"] ? "files" : "files_directories"
};
if (!opts["recursive"]) {
readdirpOpts["depth"] = 1;
}
return readdirp.promise(opts["workDir"], readdirpOpts).then((entries) => {
return entries.map((entry) => {return entry["path"];});
});
};
/**
* @description Remove XML control chars.
* @param {String} str
* @return {String} XML string.
*/
const removeControlChars = (str) => {
/* eslint-disable-next-line no-control-regex */
return str.replace(/[\x00-\x1F\x7F]/g, "");
};
/**
* @typedef {Object} FrontMatter
* @property {Object} attributes Parsed front-matter properties.
* @property {String} body String after front-matter.
* @property {String} frontMatter Front-matter string.
*/
/**
* @description Get front-matter from string, front-matter here should be at the
* beginning of string (so UTF-8 BOM is not supported), and should begin with
* `---` in the first line, and should have valid YAML in lines after that, and
* should end with `---` in the next line.
* @param {String} str
* @return {FrontMatter}
*/
const getFrontMatter = (str) => {
// Return if no front matter. Only check once so `g` is not required.
if (!/^---+\r?\n/.test(str)) {
return {"attributes": {}, "body": str};
}
// We split string manually instead of using `str.split(regexp, 3)`, this
// function will split the whole string first and then return the first 3
// results, but we want to only split twice.
//
// Flag `m` can be used for per line test, and `exec()`, `matchAll()` requires
// flag `g`.
//
// We need to use /\r?\n/ here instead of /$/, so `\r` and `\n` will be part
// of the match results so we could exclude them from front-matter and body.
const regexp = /^---+\r?\n/gm;
// RegExp is stateful so `exec()` will start after last matched result.
const fmBegin = regexp.exec(str);
const fmEnd = regexp.exec(str);
// `null` is returned if not match, which means no front-matter at all.
if (fmBegin == null || fmEnd == null) {
return {"attributes": {}, "body": str};
}
// Matched result looks like
// `{"---\n", "index": 0, "input": "---\n", "groups": undefined}`.
// It is a mix of Array and Object. We use it to split string manually.
const result = {
"body": str.substring(fmEnd.index + fmEnd[0].length),
"frontMatter": str.substring(fmBegin.index + fmBegin[0].length, fmEnd.index)
};
try {
result["attributes"] = YAML.parse(result["frontMatter"]);
} catch (error) {
result["attributes"] = {};
}
return result;
};
/**
* @description Parse front-matter and set properties to file.
* @param {File} file
* @return {File}
*/
const parseFrontMatter = (file) => {
if (file["text"] == null) {
return file;
}
const parsed = getFrontMatter(file["text"]);
file["text"] = parsed["body"];
file["frontMatter"] = parsed["attributes"];
file = Object.assign(file, parsed["attributes"]);
// See <https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Date/parse>.
//
// ISO 8601 date time format is expected, and by default a string with date
// and time will be parsed as local time.
file["updated"] = file["updated"] || file["updatedDate"];
if (file["updated"] != null) {
file["updated"] = new Date(file["updated"]);
}
file["updatedDate"] = file["updated"];
file["created"] = file["created"] || file["createdDate"];
if (file["created"] != null) {
file["created"] = new Date(file["created"]);
}
file["createdDate"] = file["created"];
return file;
};
/**
* @private
* @description This is only used for serving, so we lazy load it to save time
* on building.
*/
let extMIME = null;
/**
* @description Detect Content-Type via filename.
* @param {String} docPath
* @return {String} Content-Type value.
*/
const getContentType = (docPath) => {
if (extMIME == null) {
extMIME = loadJSONSync(path.join(hikaruDir, "hikaru", "ext-mime.json"));
}
const ext = path.extname(docPath).toLowerCase();
return extMIME[ext] || "application/octet-stream";
};
/**
* @description Paginate page's posts.
* @param {File} p Original page.
* @param {File[]} posts Page related posts.
* @param {Number} [perPage=10] How many posts per page.
* @return {File[]} Paginated pages, original page's index is 0.
*/
const paginate = (p, posts = [], perPage = 10) => {
const results = [];
let perPagePosts = [];
for (const post of posts) {
if (perPagePosts.length === perPage) {
results.push(new File(p, {"posts": perPagePosts}));
perPagePosts = [];
}
perPagePosts.push(post);
}
results.push(new File(p, {"posts": perPagePosts}));
results[0]["pages"] = results;
results[0]["index"] = 0;
// Keep compatible and don't break themes.
results[0]["pageIndex"] = results[0]["index"];
results[0]["pageArray"] = results[0]["pages"];
results[0]["docPath"] = p["docPath"];
for (let i = 1; i < results.length; ++i) {
results[i]["pages"] = results;
results[i]["index"] = i;
// Keep compatible and don't break themes.
results[i]["pageArray"] = results[i]["pages"];
results[i]["pageIndex"] = results[i]["index"];
results[i]["docPath"] = path.join(
path.dirname(p["docPath"]),
`${path.basename(
p["docPath"], path.extname(p["docPath"])
)}-${i + 1}.html`
);
}
return results;
};
/**
* @description Generate pages and paginate posts of categories.
* @param {Category[]} categories
* @param {String} parentPath Parent category's dir.
* @param {docDir} docDir
* @param {Number} [perPage=10] How many posts per page.
* @return {File[]} All category and it's subs pages.
*/
const paginateCategoriesPosts = (
categories, parentPath, docDir, perPage = 10
) => {
const results = [];
for (const category of categories) {
category["docPath"] = path.join(parentPath, category["name"], "index.html");
const sp = new File({
"layout": "category",
"docDir": docDir,
"docPath": category["docPath"],
"title": "category",
"category": category,
"name": category["name"],
"comment": false,
"reward": false
});
results.push(...paginate(sp, category["posts"], perPage));
results.push(...paginateCategoriesPosts(category["subs"], path.join(
parentPath, category["name"]
), docDir, perPage));
}
return results;
};
/**
* @callback getPath
* @description Get full website path starts from `/` after domain. This
* function can only handle site internal path.
* @param {String} [docPath]
* @param {Boolean} [skipEncode=false] If true, skip `encodeURI()`.
* @return {String} Full path that starts with site rootDir.
*/
/**
* @description Get a function to handle full website path.
* @param {String} [rootDir] Site rootDir.
* @return {getPath}
*/
const getPathFn = (rootDir = path.posix.sep) => {
// Anyway, we need to escape backslash literally using RegExp.
const winSepRegExp = new RegExp(`\\${path.win32.sep}`, "g");
rootDir = rootDir.replace(winSepRegExp, path.posix.sep);
if (!rootDir.endsWith(path.posix.sep)) {
rootDir = path.posix.join(rootDir, path.posix.sep);
}
if (!path.posix.isAbsolute(rootDir)) {
rootDir = path.posix.join(path.posix.sep, rootDir);
}
return (docPath = "", skipEncode = false) => {
// Handle link with query string or hash.
// Use assertion to prevent `?` and `#` to be removed.
const array = docPath.split(/(?=[?#])/);
array[0] = array[0].replace(winSepRegExp, path.posix.sep);
const baseName = path.posix.basename(array[0]);
const dirName = path.posix.dirname(array[0]);
if (baseName === "index.html" || baseName === "index.htm") {
array[0] = path.posix.join(dirName, path.posix.sep);
}
// marked.js and CommonMark tends to do URL encode by themselves. Maybe I
// should not do `encodeURL()` here.
//
// See <https://github.com/markedjs/marked/issues/1285>.
return skipEncode
? path.posix.join(rootDir, ...array)
: encodeURI(path.posix.join(rootDir, ...array));
};
};
/**
* @callback getURL
* @description Get full website URL including the domain. This function can
* only handle site internal path.
* @param {String} [docPath]
* @return {URL} Full website URL.
*/
/**
* @description Get a function to handle full website URL.
* @param {String} [baseURL] Site baseURL.
* @param {String} [rootDir] Site rootDir.
* @return {getURL}
*/
const getURLFn = (baseURL, rootDir = path.posix.sep) => {
const getPath = getPathFn(rootDir);
return (docPath = "") => {
return new URL(getPath(docPath), baseURL);
};
};
/**
* @callback isCurrentHost
* @description Test if given URL is on current host. This function does not
* care about query string and hash.
* @param {String} [testURL] URL needed to test.
* @return {Boolean}
*/
/**
* @description Get a function to handle if parameter is on current host.
* @param {String} baseURL Site baseURL.
* @param {String} [rootDir] Site rootDir.
* @return {isCurrentHost}
*/
const isCurrentHostFn = (baseURL, rootDir = path.posix.sep) => {
const getURL = getURLFn(baseURL, rootDir);
const currentHost = getURL().host;
return (testURL) => {
// If `testURL` is a valid URL, `baseURL` will be ignored, so we can compare
// host for all links here.
const url = new URL(testURL, baseURL);
// It returns `""` for data URL!
return url.host === currentHost || url.host === "";
};
};
/**
* @callback isCurrentPath
* @description Test if given path is current path. This function does not care
* about query string and hash. This function can only handle site internal
* path.
* @param {String} [testPath] Path needed to test.
* @param {Boolean} [strict=false] If not strict, true is also returned if given
* path is parent path of current path.
* @return {Boolean}
*/
/**
* @description Get a function to handle if parameter is current path.
* @param {String} [rootDir] Site rootDir.
* @param {String} [currentPath] current page's path.
* @return {isCurrentPath}
*/
const isCurrentPathFn = (rootDir = path.posix.sep, currentPath = "") => {
const getPath = getPathFn(rootDir);
currentPath = getPath(currentPath).split(/[?#]/)[0];
const currentToken = currentPath.split(path.posix.sep);
// `"/a/b/"` will be `["", "a", "b", ""]`, and `"/a/b/c"` will be
// `["", "a", "b", "c"]`, so we always ignore the last empty string.
if (currentToken[currentToken.length - 1].length === 0) {
currentToken.pop();
}
return (testPath = "", strict = false) => {
if (!isString(testPath)) {
strict = testPath;
testPath = "";
}
testPath = getPath(testPath).split(/[?#]/)[0];
if (currentPath === testPath) {
return true;
}
const testToken = testPath.split(path.posix.sep);
if (testToken[testToken.length - 1].length === 0) {
testToken.pop();
}
if (strict && testToken.length !== currentToken.length) {
return false;
}
// `testPath` is shorter and usually be a menu link.
for (let i = 0; i < testToken.length; ++i) {
if (testToken[i] !== currentToken[i]) {
return false;
}
}
return true;
};
};
/**
* @description Compare strings.
* @param {String} a
* @param {String} b
* @return {Number}
*/
const localeCompareSimple = (a, b) => {
if (!(isString(a) && isString(b))) {
return 0;
}
return a.localeCompare(b);
};
/**
* @callback localeCompare
* @description Compare strings, with locale support if `Intl` is available.
* @param {String} a
* @param {String} b
* @return {Number}
*/
/**
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Collator
* @description Get a function to compare strings, with locale support if `Intl`
* is available.
* @param {String} locales
* @return {localCompare}
*/
const localeCompareFn = (locales) => {
if (!isObject(Intl)) {
return localeCompareSimple;
}
const collator = new Intl.Collator(locales, {
"usage": "sort",
"localeMatcher": "best fit",
"numeric": true,
// `R` < `r` but `re` < `RI`, why?
"caseFirst": "upper",
"sensitivity": "variant",
"ignorePunctuation": false
});
return collator.compare.bind(collator);
};
/**
* @description Format date and time to `YYYY-MM-DD HH:mm:ss`.
* @param {*} [dt=new Date()]
* @return {String}
*/
const formatDateTimeSimple = (dt = new Date()) => {
if (!(dt instanceof Date)) {
dt = new Date(dt);
}
const year = dt.getFullYear().toString();
const month = (dt.getMonth() + 1).toString().padStart(2, "0");
const date = dt.getDate().toString().padStart(2, "0");
const hour = dt.getHours().toString().padStart(2, "0");
const minute = dt.getMinutes().toString().padStart(2, "0");
const second = dt.getSeconds().toString().padStart(2, "0");
return `${year}-${month}-${date} ${hour}:${minute}:${second}`;
};
/**
* @callback formatDateTime
* @description Format date and time, with locale support if `Intl` is
* available.
* @param {*} [dt=new Date()]
* @return {String}
*/
/**
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/DateTimeFormat
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/DateTimeFormat/formatToParts
* @description Get a function to format date and time, with locale support if
* `Intl` is available.
* @param {String} locales
* @return {formatDateTime}
*/
const formatDateTimeFn = (locales) => {
if (!isObject(Intl)) {
return formatDateTimeSimple;
}
const formatter = new Intl.DateTimeFormat(locales, {
"year": "numeric",
"month": "2-digit",
"day": "2-digit",
"weekday": "short",
"hour": "2-digit",
"minute": "2-digit",
"second": "2-digit",
"timeZoneName": "short",
"hour12": false
});
return (dt = new Date()) => {
if (!(dt instanceof Date)) {
dt = new Date(dt);
}
const parts = formatter.formatToParts(dt);
const obj = {};
for (const {type, value} of parts) {
obj[type] = value;
}
return `${obj["year"]}-${obj["month"]}-${obj["day"]} ${obj["weekday"]} ${obj["hour"]}:${obj["minute"]}:${obj["second"]} ${obj["timeZoneName"]}`;
};
};
/**
* @typedef {Object} CategoriesData
* @property {Category[]} categories
* @property {Number} categoriesLength
*/
/**
* @description Generate categories from posts.
* @param {File[]} posts
* @return {CategoriesData}
*/
const genCategories = (posts) => {
const categories = [];
let categoriesLength = 0;
for (const post of posts) {
if (post["frontMatter"]["categories"] == null) {
continue;
}
const postCategories = [];
let subCategories = categories;
for (const c of post["frontMatter"]["categories"]) {
// Maybe Numbers? I don't know.
const cateName = c.toString();
let category = subCategories.find((category) => {
return category["name"] === cateName;
});
if (category == null) {
category = new Category(cateName);
++categoriesLength;
subCategories.push(category);
}
postCategories.push(category);
category["posts"].push(post);
subCategories = category["subs"];
}
post["categories"] = postCategories;
}
return {categories, categoriesLength};
};
/**
* @typedef {Object} TagsData
* @property {Tag[]} tags
* @property {Number} tagsLength
*/
/**
* @description Generate tags from posts.
* @param {File[]} posts
* @return {TagsData}
*/
const genTags = (posts) => {
const tags = [];
let tagsLength = 0;
for (const post of posts) {
if (post["frontMatter"]["tags"] == null) {
continue;
}
const postTags = [];
for (const t of post["frontMatter"]["tags"]) {
// Maybe Numbers? I don't know.
const tagName = t.toString();
let tag = tags.find((tag) => {
return tag["name"] === tagName;
});
if (tag == null) {
tag = new Tag(tagName);
++tagsLength;
tags.push(tag);
}
postTags.push(tag);
tag["posts"].push(post);
}
post["tags"] = postTags;
}
return {tags, tagsLength};
};
/**
* @description Put file into an array in site, will replace file with the same
* destination.
* @param {Site} site
* @param {String} key
* @param {File} file
*/
const putSite = (site, key, file) => {
if (file == null || !Site.arrayKeys.includes(key)) {
return;
}
const i = site[key].findIndex((element) => {
return (
element["docPath"] === file["docPath"] &&
element["docDir"] === file["docDir"]
);
});
if (i !== -1) {
site[key][i] = file;
} else {
site[key].push(file);
}
};
/**
* @description Delete file from an array in site which have the same source.
* @param {Site} site
* @param {String} key
* @param {File} file
*/
const delSite = (site, key, file) => {
if (file == null || !Site.arrayKeys.includes(key)) {
return;
}
for (let i = 0; i < site[key].length; ++i) {
if (
site[key][i]["srcPath"] === file["srcPath"] &&
site[key][i]["srcDir"] === file["srcDir"]
) {
// Don't use break here because we may have many files created with the
// same source.
site[key].splice(i, 1);
// Don't forget this because we removed one element from array!
--i;
}
}
};
/**
* @description Get File's full src path.
* @param {File} file
* @return {String}
*/
const getFullSrcPath = (file) => {
if (file == null || file["srcDir"] == null || file["srcPath"] == null) {
return null;
}
return path.join(file["srcDir"], file["srcPath"]);
};
/**
* @description Get File's full document path.
* @param {File} file
* @return {String}
*/
const getFullDocPath = (file) => {
if (file == null || file["docDir"] == null || file["docPath"] == null) {
return null;
}
return path.join(file["docDir"], file["docPath"]);
};
/**
* @see https://github.com/inikulin/parse5/blob/master/packages/parse5/docs/index.md#parsefragment
* @see https://github.com/inikulin/parse5/blob/master/packages/parse5/docs/tree-adapter/default/document-fragment.md
* @description Parse HTML string into parse5 Node.
* @param {Object} [node] If specified, given fragment will be parsed as it was
* set to the context element's `innerHTML` property.
* @param {String} html HTML string to parse.
* @param {Object} [options] parse5 options.
* @return {Object}
*/
const parseNode = (node, html, options) => {
return parse5.parseFragment(node, html, options);
};
/**
* @see https://github.com/inikulin/parse5/blob/master/packages/parse5/docs/index.md#serialize
* @description Serialize parse5 Node into HTML string.
* @param {Object} node parse5 Node to serialize.
* @param {Object} [options] parse5 options.
* @return {String}
*/
const serializeNode = (node, options) => {
return parse5.serialize(node, options);
};
/**
* @description Quick and not so dirty way to replace a Node with given HTML
* string.
* @param {Object} node parse5 Node to replace.
* @param {String} html
*/
const replaceNode = (node, html) => {
const parentNode = node["parentNode"];
if (parentNode != null && html != null) {
const newNode = parseNode(html);
if (newNode["childNodes"] != null && newNode["childNodes"].length > 0) {
const index = parentNode["childNodes"].indexOf(node);
parentNode["childNodes"].splice(
index,
1,
...newNode["childNodes"].map((childNode) => {
childNode["parentNode"] = parentNode;
return childNode;
})
);
}
}
};
/**
* @callback traversalCallback
* @param {Object} node parse5 Node.
*/
/**
* @see https://github.com/inikulin/parse5/blob/master/packages/parse5/docs/tree-adapter/default/element.md
* @description Recursively Pre-Order Traversal of parse5 Node.
* @param {Object} node Root parse5 Node of a tree.
* @param {traversalCallback} callback
*/
const nodesEach = (node, callback) => {
if (isFunction(callback)) {
callback(node);
if (node["childNodes"] != null) {
for (const childNode of node["childNodes"]) {
nodesEach(childNode, callback);
}
}
}
};
/**
* @callback filterCallback
* @param {Object} node parse5 Node.
* @return {Boolean} True to collect a node into an Array.
*/
/**
* @see https://github.com/inikulin/parse5/blob/master/packages/parse5/docs/tree-adapter/default/element.md
* @description Recursively Pre-Order Traversal of parse5 Node.
* @param {Object} node Root parse5 Node of a tree.
* @param {filterCallback} callback
* @return {Object[]} An Array of filtered parse5 Nodes.
*/
const nodesFilter = (node, callback) => {
const results = [];
if (isFunction(callback)) {
nodesEach(node, (node) => {
if (callback(node)) {
results.push(node);
}
});
}
return results;
};
/**
* @see https://github.com/inikulin/parse5/blob/master/packages/parse5/docs/tree-adapter/default/element.md
* @see https://github.com/inikulin/parse5/blob/master/packages/parse5/docs/tree-adapter/default/text-node.md
* @description Get text content of a parse5 Node.
* @param {Object} node parse5 Node.
* @return {String}
*/
const getNodeText = (node) => {
if (node["childNodes"] != null) {
for (const childNode of node["childNodes"]) {
if (childNode["nodeName"] === "#text") {
return childNode["value"];
}
}
}
return null;
};
/**
* @description Set text content (or innerHTML) of a parse5 Node.
* @param {Object} node parse5 Node.
* @param {String} html
*/
const setNodeText = (node, html) => {
// Add HTML to childNodes via parsing and replacing to keep tree reference,
// and skip the parse5-generated `#document-fragment` node.
// Text nodes have no childNode.
// Only append to nodes that already have childNodes.
if (node["childNodes"] != null) {
// Don't forget to replace childNode's parentNode.
node["childNodes"] = parseNode(html)["childNodes"].map((childNode) => {
childNode["parentNode"] = node;
return childNode;
});
}
};
/**
* @see https://github.com/inikulin/parse5/blob/master/packages/parse5/docs/tree-adapter/default/element.md
* @see https://github.com/inikulin/parse5/blob/master/packages/parse5/docs/tree-adapter/default/attribute.md
* @description Get an attribute value from parse5 Node.
* @param {Object} node parse5 Node.
* @param {String} attrName
* @return {String} Value of the attribute, `null` if not available.
*/
const getNodeAttr = (node, attrName) => {
if (node["attrs"] != null) {
for (const attr of node["attrs"]) {
if (attr["name"] === attrName) {
return attr["value"];
}
}
}
return null;
};
/**
* @description Set an attribute value to parse5 Node.
* @param {Object} node parse5 Node.
* @param {String} attrName
* @param {String} attrValue
*/
const setNodeAttr = (node, attrName, attrValue) => {
// Do not add attr to nodes without attrs array,
// for example text node.
if (node["attrs"] != null) {
for (const attr of node["attrs"]) {
// Already have this attr, then replace.
if (attr["name"] === attrName) {
attr["value"] = attrValue;
return;
}
}
// Have other attrs but not this, so append.
node["attrs"].push({"name": attrName, "value": attrValue});
}
};
/**
* @description Update headings' IDs for bootstrap scrollspy.
* @param {Object} node parse5 Node.
* @param {Object} [opts]
* @param {String} [opts.safeChar="-"] Char used to replace unsupported chars.
*/
const resolveHeadingIDs = (node, opts = {}) => {
const headingNames = ["h1", "h2", "h3", "h4", "h5", "h6"];
const headingIDs = {};
const headingNodes = nodesFilter(node, (node) => {
return headingNames.includes(node["tagName"]);
});
for (const node of headingNodes) {
const text = getNodeText(node);
if (text != null) {
// Replace some chars in escaped ID because scrollspy cannot support it.
const encoded = encodeURI(text.trim().replace(
/[\s()[\]{}<>.,!@#$%^&*=|`'/?~]/g,
opts["safeChar"] || "-"
));
const id = headingIDs[encoded] == null
? encoded
: `${encoded}-${headingIDs[encoded]++}`;
// If we have `abc`, `abc` and `abc-1`, we must save the `abc-1` generated
// by the second `abc`, to prevent 2 `abc-1` for the last `abc-1`.
headingIDs[id] = 1;
setNodeAttr(node, "id", id);
setNodeText(
node, `<a class="heading-link header-link" href="#${id}"></a>${text}`
);
}
}
};
/**
* @deprecated
* @description Use `resolveHeadingIDs(node)` instead.
*/
const resolveHeaderIDs = resolveHeadingIDs;
/**
* @description Generate TOC from HTML headings.
* @param {Object} node parse5 Node.
* @param {Object} [opts]
*/
const genTOC = (node, opts = {}) => {
const headingNames = ["h1", "h2", "h3", "h4", "h5", "h6"];
const toc = [];
const headingNodes = nodesFilter(node, (node) => {
return headingNames.includes(node["tagName"]);
});
for (const node of headingNodes) {
let level = toc;
while (
level.length > 0 &&
headingNames.indexOf(
level[level.length - 1]["name"]
) < headingNames.indexOf(node["tagName"])
) {
level = level[level.length - 1]["subs"];
}
const id = getNodeAttr(node, "id");
const text = getNodeText(node);
if (id != null && text != null) {
// Don't set anchor to absolute path,
// because it's hard to write selector for scrollspy.
level.push(new TOC(node["tagName"], `#${id}`, text.trim()));
}
}
return toc;
};
/**
* @description Get protocol of a URL.
* @param {String} url
* @return {String} If no protocol return `null`.
*/
const getURLProtocol = (url) => {
try {
// If no protocol in url, it will throw an error.
return new URL(url).protocol;
} catch (error) {
return null;
}
};
/**
* @description Update site's internal link to absolute path, and add attributes
* for external link.
* @param {Object} node parse5 Node.
* @param {String} baseURL Site baseURL.
* @param {String} rootDir Site rootDir.
* @param {String} docPath
* @param {Object} [opts]
*/
const resolveAnchors = (node, baseURL, rootDir, docPath, opts = {}) => {
const isCurrentHost = isCurrentHostFn(baseURL, rootDir);
const getPath = getPathFn(rootDir);
// Replace relative path to absolute path.
const anchorNodes = nodesFilter(node, (node) => {
return node["tagName"] === "a";
});
for (const node of anchorNodes) {
const href = getNodeAttr(node, "href");
if (href != null) {
if (!isCurrentHost(href)) {
setNodeAttr(node, "target", "_blank");
setNodeAttr(node, "rel", "external nofollow noreferrer noopener");
}
// `path.posix.isAbsolute()` detects `/` or `//`.
if (!(path.posix.isAbsolute(href) || getURLProtocol(href) != null)) {
/**
* marked.js and CommonMark tends to do URL encode by themselves.
* I should skip `encodeURI()` here and do it for heading ID only.
* See <https://github.com/markedjs/marked/issues/1285>.
*/
setNodeAttr(
node, "href", getPath(path.join(path.dirname(docPath), href), true)
);
}
}
}
};
/**
* @description Update site's internal image src to absolute path.
* @param {Object} node parse5 Node.
* @param {String} rootDir Site rootDir.
* @param {String} docPath
* @param {Object} [opts]
*/
const resolveImages = (node, rootDir, docPath, opts = {}) => {
const getPath = getPathFn(rootDir);
// Replace relative path to absolute path.
const imageNodes = nodesFilter(node, (node) => {
return node["tagName"] === "img";
});
for (const node of imageNodes) {
const src = getNodeAttr(node, "src");
if (src != null) {
// `path.posix.isAbsolute()` detects `/` or `//`.
if (!(path.posix.isAbsolute(src) || getURLProtocol(src) != null)) {
setNodeAttr(
node, "src", getPath(path.join(path.dirname(docPath), src))
);
}
}
}
};
/**
* @description Update code blocks.
* @param {Object} node parse5 Node.
* @param {Object} [opts]
* @param {Boolean} [opts.lineNumbers=false] Generate line numbers.
*/
const resolveCodeBlocks = (node, opts = {}) => {
const codeBlockNodes = nodesFilter(node, (node) => {
return node["tagName"] === "pre" &&
// Prevent re-resolving code blocks when re-process.
node["parentNode"]["tagName"] !== "figure" &&
node["childNodes"].length === 1 &&
node["childNodes"][0]["tagName"] === "code";
});
for (const node of codeBlockNodes) {
const code = getNodeText(node["childNodes"][0]);
if (code != null) {
const info = getNodeAttr(node["childNodes"][0], "class");
// Many Markdown renderer add `language-` prefix to code block's info.
// Better to remove it while processing to keep consistent.
let lang = info;
const langPrefix = "language-";
if (info != null && info.startsWith(langPrefix)) {
lang = info.substring(langPrefix.length);
}
const escapedCode = escapeHTML(code);
const results = [`<figure data-raw="${escapedCode}"`];
if (info != null) {
results.push(` data-info="${info}" data-lang="${lang}"`);
}
results.push(" class=\"code-block\">");
if (opts["lineNumbers"]) {
results.push("<pre class=\"line-numbers gutter\">");
// Highlight should not change lines, but may replace `\n` with `<br>`,
// so use original code here.
const codeLines = escapedCode.split(/\r?\n/g);
// It seems marked.js starts to keep the last `\n`, which will leave an
// empty line after splitting, and we should not add line number for the
// last empty line. Don't do trim here, we only ignore!
if (codeLines[codeLines.length - 1].length === 0) {
codeLines.pop();
}
for (let i = 0; i < codeLines.length; ++i) {
results.push(`<span class="line-number">${i + 1}</span>`);
if (i !== codeLines.length - 1) {
results.push("\n");
}
}
results.push("</pre>");
}
results.push("<pre class=\"code\">");
if (info != null) {
results.push(`<code class="${info}">`);
} else {
results.push("<code>");
}
results.push(escapedCode);
results.push("</code></pre>");
results.push("</figure>");
replaceNode(node, results.join(""));
}
}
};
/**
* @description Get Hikaru version.
* @return {String}
*/
const getVersion = () => {
return pkgJSON["version"];
};
/**
* @description Hikaru's default 404 page content for server.
* @type {String}
*/
const default404 = [
"<!DOCTYPE html>",
"<html>",
" <head>",
" <meta charset=\"utf-8\">",
" <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">",
" <meta name=\"viewport\" content=\"width=device-width, initial-scale=1, maximum-scale=1\">",
" <title>404 Not Found</title>",
" </head>",
" <body>",
" <h1>404 Not Found</h1>",
` <p>Hikaru v${getVersion()}</p>`,
" </body>",
"</html>",
""
].join("\n");
// Nunjucks uses runtime including (we treat extending as another kind if
// including), which means it will try to call included template everytime it
// renders instead of call included template once during compiling. So if a
// included template is updated, all templates that include it will be updated.
// That means we don't need to handle file dependencies of nunjucks templates.
//
// We already load and compile toplevel templates by ourselves, but for included
// templates, they are loaded and compiled by nunjucks loader. By default, the
// FileSystemLoader of nunjucks could watch and reload files, so we only need to
// watch the toplevel layout files (because we load and compile them, not
// nunjucks). However, FileSystemLoader refuses to load files out of search
// paths, which is needed for plugins. And users may use other templating engine
// that does not support watching and reloading, so it is useful to control
// watching and reloading by ourselves, then we could also bypass nunjucks to
// load file contents from Hikaru via custom loader.
/**
* @private
*/
class NjkLoader extends nunjucks.Loader {
constructor(hikaru) {
super();
this.watcher = hikaru.watcher;
this.layouts = hikaru.site["layouts"];
this.layoutDir = hikaru.site["siteConfig"]["themeLayoutDir"];
if (this.watcher != null) {
// This is mainly for non-toplevel templates that will be included by
// other templates, because they are loaded by nunjucks, we need to tell
// nunjucks to update them.
this.watcher.register(
this.layoutDir, (srcDir, srcPaths) => {
const {added, changed, removed} = srcPaths;
const all = added.concat(changed).concat(removed);
for (const srcPath of all) {
// Mark that template as dirty in the internal cache of nunjucks,
// so it will re-fetch content from this loader when including.
this.emit("update", srcPath);
}
}
);
}
}
getSource(srcPath) {
// If template is not in theme's layout dir, for example plugin's templates,
// fallback to read from disk.
let result = null;
if (!this.layouts.has(srcPath)) {
// Ignore non-existing files.
if (!isReadableSync(srcPath)) {
return null;
}
// Async including in nunjucks for loops is hard to handle, so always read
// file sync.
result = {
"src": fse.readFileSync(srcPath, "utf8"),
"path": srcPath,
// We have no way in plugins to tell nunjucks to update included
// templates which will be loaded and compiled by loader, so if we are
// serving, we never cache those templates, this is not good for
// performance, but plugins should not use too complex templates.
"noCache": this.watcher !== null
};
} else {
result = {
"src": this.layouts.get(srcPath),
"path": srcPath,
"noCache": false
};
}
this.emit("load", srcPath, result);
return result;
}
}
export {
hikaruDir,
loadJSON,
loadJSONSync,
loadYAML,
loadYAMLSync,
pkgJSON,
isNumber,
isString,
isArray,
isFunction,
isObject,
isBuffer,
checkType,
isBinary,
isBinaryPath,
isBinaryFile,
isBinaryFileSync,
isReadableSync,
escapeHTML,
removeHTMLTags,
fallbackSort,
matchFiles,
removeControlChars,
getFrontMatter,
parseFrontMatter,
getContentType,
paginate,
paginateCategoriesPosts,
getPathFn,
getURLFn,
isCurrentHostFn,
isCurrentPathFn,
localeCompareSimple,
localeCompareFn,
formatDateTimeSimple,
formatDateTimeFn,
genCategories,
genTags,
putSite,
delSite,
getFullSrcPath,
getFullDocPath,
parseNode,
serializeNode,
replaceNode,
nodesEach,
nodesFilter,
getNodeText,
setNodeText,
getNodeAttr,
setNodeAttr,
resolveHeadingIDs,
resolveHeaderIDs,
genTOC,
getURLProtocol,
resolveAnchors,
resolveImages,
resolveCodeBlocks,
getVersion,
default404,
NjkLoader
};