gitea源码

html2markdown.ts 3.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. import {html, htmlRaw} from '../utils/html.ts';
  2. type Processor = (el: HTMLElement) => string | HTMLElement | void;
  3. type Processors = {
  4. [tagName: string]: Processor;
  5. };
  6. type ProcessorContext = {
  7. elementIsFirst: boolean;
  8. elementIsLast: boolean;
  9. listNestingLevel: number;
  10. };
  11. function prepareProcessors(ctx:ProcessorContext): Processors {
  12. const processors: Processors = {
  13. H1(el: HTMLElement) {
  14. const level = parseInt(el.tagName.slice(1));
  15. el.textContent = `${'#'.repeat(level)} ${el.textContent.trim()}`;
  16. },
  17. STRONG(el: HTMLElement) {
  18. return `**${el.textContent}**`;
  19. },
  20. EM(el: HTMLElement) {
  21. return `_${el.textContent}_`;
  22. },
  23. DEL(el: HTMLElement) {
  24. return `~~${el.textContent}~~`;
  25. },
  26. A(el: HTMLElement) {
  27. const text = el.textContent || 'link';
  28. const href = el.getAttribute('href');
  29. if (/^https?:/.test(text) && text === href) {
  30. return text;
  31. }
  32. return href ? `[${text}](${href})` : text;
  33. },
  34. IMG(el: HTMLElement) {
  35. const alt = el.getAttribute('alt') || 'image';
  36. const src = el.getAttribute('src');
  37. const widthAttr = el.hasAttribute('width') ? htmlRaw` width="${el.getAttribute('width') || ''}"` : '';
  38. const heightAttr = el.hasAttribute('height') ? htmlRaw` height="${el.getAttribute('height') || ''}"` : '';
  39. if (widthAttr || heightAttr) {
  40. return html`<img alt="${alt}"${widthAttr}${heightAttr} src="${src}">`;
  41. }
  42. return `![${alt}](${src})`;
  43. },
  44. P(el: HTMLElement) {
  45. el.textContent = `${el.textContent}\n`;
  46. },
  47. BLOCKQUOTE(el: HTMLElement) {
  48. el.textContent = `${el.textContent.replace(/^/mg, '> ')}\n`;
  49. },
  50. OL(el: HTMLElement) {
  51. const preNewLine = ctx.listNestingLevel ? '\n' : '';
  52. el.textContent = `${preNewLine}${el.textContent}\n`;
  53. },
  54. LI(el: HTMLElement) {
  55. const parent = el.parentNode as HTMLElement;
  56. const bullet = parent.tagName === 'OL' ? `1. ` : '* ';
  57. const nestingIdentLevel = Math.max(0, ctx.listNestingLevel - 1);
  58. el.textContent = `${' '.repeat(nestingIdentLevel * 4)}${bullet}${el.textContent}${ctx.elementIsLast ? '' : '\n'}`;
  59. return el;
  60. },
  61. INPUT(el: HTMLElement) {
  62. return (el as HTMLInputElement).checked ? '[x] ' : '[ ] ';
  63. },
  64. CODE(el: HTMLElement) {
  65. const text = el.textContent;
  66. if (el.parentNode && (el.parentNode as HTMLElement).tagName === 'PRE') {
  67. el.textContent = `\`\`\`\n${text}\n\`\`\`\n`;
  68. return el;
  69. }
  70. if (text.includes('`')) {
  71. return `\`\` ${text} \`\``;
  72. }
  73. return `\`${text}\``;
  74. },
  75. };
  76. processors['UL'] = processors.OL;
  77. for (let level = 2; level <= 6; level++) {
  78. processors[`H${level}`] = processors.H1;
  79. }
  80. return processors;
  81. }
  82. function processElement(ctx :ProcessorContext, processors: Processors, el: HTMLElement): string | void {
  83. if (el.hasAttribute('data-markdown-generated-content')) return el.textContent;
  84. if (el.tagName === 'A' && el.children.length === 1 && el.children[0].tagName === 'IMG') {
  85. return processElement(ctx, processors, el.children[0] as HTMLElement);
  86. }
  87. const isListContainer = el.tagName === 'OL' || el.tagName === 'UL';
  88. if (isListContainer) ctx.listNestingLevel++;
  89. for (let i = 0; i < el.children.length; i++) {
  90. ctx.elementIsFirst = i === 0;
  91. ctx.elementIsLast = i === el.children.length - 1;
  92. processElement(ctx, processors, el.children[i] as HTMLElement);
  93. }
  94. if (isListContainer) ctx.listNestingLevel--;
  95. if (processors[el.tagName]) {
  96. const ret = processors[el.tagName](el);
  97. if (ret && ret !== el) {
  98. el.replaceWith(typeof ret === 'string' ? document.createTextNode(ret) : ret);
  99. }
  100. }
  101. }
  102. export function convertHtmlToMarkdown(el: HTMLElement): string {
  103. const div = document.createElement('div');
  104. div.append(el);
  105. const ctx = {} as ProcessorContext;
  106. ctx.listNestingLevel = 0;
  107. processElement(ctx, prepareProcessors(ctx), el);
  108. return div.textContent;
  109. }