6cbae57e1183e411033cc2b005b5e0b2cb1975b7bc2a09bae698664919fca56740475d17d47b6a6494a4eb0949a659502c4c7d54a3e91ee5cbc23ef31a753f 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. const ignoreRegexp = /\bmso-list:[^;]*ignore/i;
  2. const idRegexp = /\bmso-list:[^;]*\bl(\d+)/i;
  3. const indentRegexp = /\bmso-list:[^;]*\blevel(\d+)/i;
  4. const parseListItem = (element, html) => {
  5. const style = element.getAttribute('style');
  6. const idMatch = style?.match(idRegexp);
  7. if (!idMatch) {
  8. return null;
  9. }
  10. const id = Number(idMatch[1]);
  11. const indentMatch = style?.match(indentRegexp);
  12. const indent = indentMatch ? Number(indentMatch[1]) : 1;
  13. const typeRegexp = new RegExp(`@list l${id}:level${indent}\\s*\\{[^\\}]*mso-level-number-format:\\s*([\\w-]+)`, 'i');
  14. const typeMatch = html.match(typeRegexp);
  15. const type = typeMatch && typeMatch[1] === 'bullet' ? 'bullet' : 'ordered';
  16. return {
  17. id,
  18. indent,
  19. type,
  20. element
  21. };
  22. };
  23. // list items are represented as `p` tags with styles like `mso-list: l0 level1` where:
  24. // 1. "0" in "l0" means the list item id;
  25. // 2. "1" in "level1" means the indent level, starting from 1.
  26. const normalizeListItem = doc => {
  27. const msoList = Array.from(doc.querySelectorAll('[style*=mso-list]'));
  28. const ignored = [];
  29. const others = [];
  30. msoList.forEach(node => {
  31. const shouldIgnore = (node.getAttribute('style') || '').match(ignoreRegexp);
  32. if (shouldIgnore) {
  33. ignored.push(node);
  34. } else {
  35. others.push(node);
  36. }
  37. });
  38. // Each list item contains a marker wrapped with "mso-list: Ignore".
  39. ignored.forEach(node => node.parentNode?.removeChild(node));
  40. // The list stype is not defined inline with the tag, instead, it's in the
  41. // style tag so we need to pass the html as a string.
  42. const html = doc.documentElement.innerHTML;
  43. const listItems = others.map(element => parseListItem(element, html)).filter(parsed => parsed);
  44. while (listItems.length) {
  45. const childListItems = [];
  46. let current = listItems.shift();
  47. // Group continuous items into the same group (aka "ul")
  48. while (current) {
  49. childListItems.push(current);
  50. current = listItems.length && listItems[0]?.element === current.element.nextElementSibling &&
  51. // Different id means the next item doesn't belong to this group.
  52. listItems[0].id === current.id ? listItems.shift() : null;
  53. }
  54. const ul = document.createElement('ul');
  55. childListItems.forEach(listItem => {
  56. const li = document.createElement('li');
  57. li.setAttribute('data-list', listItem.type);
  58. if (listItem.indent > 1) {
  59. li.setAttribute('class', `ql-indent-${listItem.indent - 1}`);
  60. }
  61. li.innerHTML = listItem.element.innerHTML;
  62. ul.appendChild(li);
  63. });
  64. const element = childListItems[0]?.element;
  65. const {
  66. parentNode
  67. } = element ?? {};
  68. if (element) {
  69. parentNode?.replaceChild(ul, element);
  70. }
  71. childListItems.slice(1).forEach(_ref => {
  72. let {
  73. element: e
  74. } = _ref;
  75. parentNode?.removeChild(e);
  76. });
  77. }
  78. };
  79. export default function normalize(doc) {
  80. if (doc.documentElement.getAttribute('xmlns:w') === 'urn:schemas-microsoft-com:office:word') {
  81. normalizeListItem(doc);
  82. }
  83. }
  84. //# sourceMappingURL=msWord.js.map