feat: Complete fleet — 94 skills across 10+ domains
Pulled ALL skills from 15 source repositories: - anthropics/skills: 16 (docs, design, MCP, testing) - obra/superpowers: 14 (TDD, debugging, agents, planning) - coreyhaines31/marketingskills: 25 (marketing, CRO, SEO, growth) - better-auth/skills: 5 (auth patterns) - vercel-labs/agent-skills: 5 (React, design, Vercel) - antfu/skills: 16 (Vue, Vite, Vitest, pnpm, Turborepo) - Plus 13 individual skills from various repos Mosaic Stack is not limited to coding — the Orchestrator and subagents serve coding, business, design, marketing, writing, logistics, analysis, and more. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
0
skills/docx/scripts/office/helpers/__init__.py
Normal file
0
skills/docx/scripts/office/helpers/__init__.py
Normal file
199
skills/docx/scripts/office/helpers/merge_runs.py
Normal file
199
skills/docx/scripts/office/helpers/merge_runs.py
Normal file
@@ -0,0 +1,199 @@
|
||||
"""Merge adjacent runs with identical formatting in DOCX.
|
||||
|
||||
Merges adjacent <w:r> elements that have identical <w:rPr> properties.
|
||||
Works on runs in paragraphs and inside tracked changes (<w:ins>, <w:del>).
|
||||
|
||||
Also:
|
||||
- Removes rsid attributes from runs (revision metadata that doesn't affect rendering)
|
||||
- Removes proofErr elements (spell/grammar markers that block merging)
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import defusedxml.minidom
|
||||
|
||||
|
||||
def merge_runs(input_dir: str) -> tuple[int, str]:
|
||||
doc_xml = Path(input_dir) / "word" / "document.xml"
|
||||
|
||||
if not doc_xml.exists():
|
||||
return 0, f"Error: {doc_xml} not found"
|
||||
|
||||
try:
|
||||
dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding="utf-8"))
|
||||
root = dom.documentElement
|
||||
|
||||
_remove_elements(root, "proofErr")
|
||||
_strip_run_rsid_attrs(root)
|
||||
|
||||
containers = {run.parentNode for run in _find_elements(root, "r")}
|
||||
|
||||
merge_count = 0
|
||||
for container in containers:
|
||||
merge_count += _merge_runs_in(container)
|
||||
|
||||
doc_xml.write_bytes(dom.toxml(encoding="UTF-8"))
|
||||
return merge_count, f"Merged {merge_count} runs"
|
||||
|
||||
except Exception as e:
|
||||
return 0, f"Error: {e}"
|
||||
|
||||
|
||||
|
||||
|
||||
def _find_elements(root, tag: str) -> list:
|
||||
results = []
|
||||
|
||||
def traverse(node):
|
||||
if node.nodeType == node.ELEMENT_NODE:
|
||||
name = node.localName or node.tagName
|
||||
if name == tag or name.endswith(f":{tag}"):
|
||||
results.append(node)
|
||||
for child in node.childNodes:
|
||||
traverse(child)
|
||||
|
||||
traverse(root)
|
||||
return results
|
||||
|
||||
|
||||
def _get_child(parent, tag: str):
|
||||
for child in parent.childNodes:
|
||||
if child.nodeType == child.ELEMENT_NODE:
|
||||
name = child.localName or child.tagName
|
||||
if name == tag or name.endswith(f":{tag}"):
|
||||
return child
|
||||
return None
|
||||
|
||||
|
||||
def _get_children(parent, tag: str) -> list:
|
||||
results = []
|
||||
for child in parent.childNodes:
|
||||
if child.nodeType == child.ELEMENT_NODE:
|
||||
name = child.localName or child.tagName
|
||||
if name == tag or name.endswith(f":{tag}"):
|
||||
results.append(child)
|
||||
return results
|
||||
|
||||
|
||||
def _is_adjacent(elem1, elem2) -> bool:
|
||||
node = elem1.nextSibling
|
||||
while node:
|
||||
if node == elem2:
|
||||
return True
|
||||
if node.nodeType == node.ELEMENT_NODE:
|
||||
return False
|
||||
if node.nodeType == node.TEXT_NODE and node.data.strip():
|
||||
return False
|
||||
node = node.nextSibling
|
||||
return False
|
||||
|
||||
|
||||
|
||||
|
||||
def _remove_elements(root, tag: str):
|
||||
for elem in _find_elements(root, tag):
|
||||
if elem.parentNode:
|
||||
elem.parentNode.removeChild(elem)
|
||||
|
||||
|
||||
def _strip_run_rsid_attrs(root):
|
||||
for run in _find_elements(root, "r"):
|
||||
for attr in list(run.attributes.values()):
|
||||
if "rsid" in attr.name.lower():
|
||||
run.removeAttribute(attr.name)
|
||||
|
||||
|
||||
|
||||
|
||||
def _merge_runs_in(container) -> int:
|
||||
merge_count = 0
|
||||
run = _first_child_run(container)
|
||||
|
||||
while run:
|
||||
while True:
|
||||
next_elem = _next_element_sibling(run)
|
||||
if next_elem and _is_run(next_elem) and _can_merge(run, next_elem):
|
||||
_merge_run_content(run, next_elem)
|
||||
container.removeChild(next_elem)
|
||||
merge_count += 1
|
||||
else:
|
||||
break
|
||||
|
||||
_consolidate_text(run)
|
||||
run = _next_sibling_run(run)
|
||||
|
||||
return merge_count
|
||||
|
||||
|
||||
def _first_child_run(container):
|
||||
for child in container.childNodes:
|
||||
if child.nodeType == child.ELEMENT_NODE and _is_run(child):
|
||||
return child
|
||||
return None
|
||||
|
||||
|
||||
def _next_element_sibling(node):
|
||||
sibling = node.nextSibling
|
||||
while sibling:
|
||||
if sibling.nodeType == sibling.ELEMENT_NODE:
|
||||
return sibling
|
||||
sibling = sibling.nextSibling
|
||||
return None
|
||||
|
||||
|
||||
def _next_sibling_run(node):
|
||||
sibling = node.nextSibling
|
||||
while sibling:
|
||||
if sibling.nodeType == sibling.ELEMENT_NODE:
|
||||
if _is_run(sibling):
|
||||
return sibling
|
||||
sibling = sibling.nextSibling
|
||||
return None
|
||||
|
||||
|
||||
def _is_run(node) -> bool:
|
||||
name = node.localName or node.tagName
|
||||
return name == "r" or name.endswith(":r")
|
||||
|
||||
|
||||
def _can_merge(run1, run2) -> bool:
|
||||
rpr1 = _get_child(run1, "rPr")
|
||||
rpr2 = _get_child(run2, "rPr")
|
||||
|
||||
if (rpr1 is None) != (rpr2 is None):
|
||||
return False
|
||||
if rpr1 is None:
|
||||
return True
|
||||
return rpr1.toxml() == rpr2.toxml()
|
||||
|
||||
|
||||
def _merge_run_content(target, source):
|
||||
for child in list(source.childNodes):
|
||||
if child.nodeType == child.ELEMENT_NODE:
|
||||
name = child.localName or child.tagName
|
||||
if name != "rPr" and not name.endswith(":rPr"):
|
||||
target.appendChild(child)
|
||||
|
||||
|
||||
def _consolidate_text(run):
|
||||
t_elements = _get_children(run, "t")
|
||||
|
||||
for i in range(len(t_elements) - 1, 0, -1):
|
||||
curr, prev = t_elements[i], t_elements[i - 1]
|
||||
|
||||
if _is_adjacent(prev, curr):
|
||||
prev_text = prev.firstChild.data if prev.firstChild else ""
|
||||
curr_text = curr.firstChild.data if curr.firstChild else ""
|
||||
merged = prev_text + curr_text
|
||||
|
||||
if prev.firstChild:
|
||||
prev.firstChild.data = merged
|
||||
else:
|
||||
prev.appendChild(run.ownerDocument.createTextNode(merged))
|
||||
|
||||
if merged.startswith(" ") or merged.endswith(" "):
|
||||
prev.setAttribute("xml:space", "preserve")
|
||||
elif prev.hasAttribute("xml:space"):
|
||||
prev.removeAttribute("xml:space")
|
||||
|
||||
run.removeChild(curr)
|
||||
197
skills/docx/scripts/office/helpers/simplify_redlines.py
Normal file
197
skills/docx/scripts/office/helpers/simplify_redlines.py
Normal file
@@ -0,0 +1,197 @@
|
||||
"""Simplify tracked changes by merging adjacent w:ins or w:del elements.
|
||||
|
||||
Merges adjacent <w:ins> elements from the same author into a single element.
|
||||
Same for <w:del> elements. This makes heavily-redlined documents easier to
|
||||
work with by reducing the number of tracked change wrappers.
|
||||
|
||||
Rules:
|
||||
- Only merges w:ins with w:ins, w:del with w:del (same element type)
|
||||
- Only merges if same author (ignores timestamp differences)
|
||||
- Only merges if truly adjacent (only whitespace between them)
|
||||
"""
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
import defusedxml.minidom
|
||||
|
||||
WORD_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
||||
|
||||
|
||||
def simplify_redlines(input_dir: str) -> tuple[int, str]:
|
||||
doc_xml = Path(input_dir) / "word" / "document.xml"
|
||||
|
||||
if not doc_xml.exists():
|
||||
return 0, f"Error: {doc_xml} not found"
|
||||
|
||||
try:
|
||||
dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding="utf-8"))
|
||||
root = dom.documentElement
|
||||
|
||||
merge_count = 0
|
||||
|
||||
containers = _find_elements(root, "p") + _find_elements(root, "tc")
|
||||
|
||||
for container in containers:
|
||||
merge_count += _merge_tracked_changes_in(container, "ins")
|
||||
merge_count += _merge_tracked_changes_in(container, "del")
|
||||
|
||||
doc_xml.write_bytes(dom.toxml(encoding="UTF-8"))
|
||||
return merge_count, f"Simplified {merge_count} tracked changes"
|
||||
|
||||
except Exception as e:
|
||||
return 0, f"Error: {e}"
|
||||
|
||||
|
||||
def _merge_tracked_changes_in(container, tag: str) -> int:
|
||||
merge_count = 0
|
||||
|
||||
tracked = [
|
||||
child
|
||||
for child in container.childNodes
|
||||
if child.nodeType == child.ELEMENT_NODE and _is_element(child, tag)
|
||||
]
|
||||
|
||||
if len(tracked) < 2:
|
||||
return 0
|
||||
|
||||
i = 0
|
||||
while i < len(tracked) - 1:
|
||||
curr = tracked[i]
|
||||
next_elem = tracked[i + 1]
|
||||
|
||||
if _can_merge_tracked(curr, next_elem):
|
||||
_merge_tracked_content(curr, next_elem)
|
||||
container.removeChild(next_elem)
|
||||
tracked.pop(i + 1)
|
||||
merge_count += 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
return merge_count
|
||||
|
||||
|
||||
def _is_element(node, tag: str) -> bool:
|
||||
name = node.localName or node.tagName
|
||||
return name == tag or name.endswith(f":{tag}")
|
||||
|
||||
|
||||
def _get_author(elem) -> str:
|
||||
author = elem.getAttribute("w:author")
|
||||
if not author:
|
||||
for attr in elem.attributes.values():
|
||||
if attr.localName == "author" or attr.name.endswith(":author"):
|
||||
return attr.value
|
||||
return author
|
||||
|
||||
|
||||
def _can_merge_tracked(elem1, elem2) -> bool:
|
||||
if _get_author(elem1) != _get_author(elem2):
|
||||
return False
|
||||
|
||||
node = elem1.nextSibling
|
||||
while node and node != elem2:
|
||||
if node.nodeType == node.ELEMENT_NODE:
|
||||
return False
|
||||
if node.nodeType == node.TEXT_NODE and node.data.strip():
|
||||
return False
|
||||
node = node.nextSibling
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _merge_tracked_content(target, source):
|
||||
while source.firstChild:
|
||||
child = source.firstChild
|
||||
source.removeChild(child)
|
||||
target.appendChild(child)
|
||||
|
||||
|
||||
def _find_elements(root, tag: str) -> list:
|
||||
results = []
|
||||
|
||||
def traverse(node):
|
||||
if node.nodeType == node.ELEMENT_NODE:
|
||||
name = node.localName or node.tagName
|
||||
if name == tag or name.endswith(f":{tag}"):
|
||||
results.append(node)
|
||||
for child in node.childNodes:
|
||||
traverse(child)
|
||||
|
||||
traverse(root)
|
||||
return results
|
||||
|
||||
|
||||
def get_tracked_change_authors(doc_xml_path: Path) -> dict[str, int]:
|
||||
if not doc_xml_path.exists():
|
||||
return {}
|
||||
|
||||
try:
|
||||
tree = ET.parse(doc_xml_path)
|
||||
root = tree.getroot()
|
||||
except ET.ParseError:
|
||||
return {}
|
||||
|
||||
namespaces = {"w": WORD_NS}
|
||||
author_attr = f"{{{WORD_NS}}}author"
|
||||
|
||||
authors: dict[str, int] = {}
|
||||
for tag in ["ins", "del"]:
|
||||
for elem in root.findall(f".//w:{tag}", namespaces):
|
||||
author = elem.get(author_attr)
|
||||
if author:
|
||||
authors[author] = authors.get(author, 0) + 1
|
||||
|
||||
return authors
|
||||
|
||||
|
||||
def _get_authors_from_docx(docx_path: Path) -> dict[str, int]:
|
||||
try:
|
||||
with zipfile.ZipFile(docx_path, "r") as zf:
|
||||
if "word/document.xml" not in zf.namelist():
|
||||
return {}
|
||||
with zf.open("word/document.xml") as f:
|
||||
tree = ET.parse(f)
|
||||
root = tree.getroot()
|
||||
|
||||
namespaces = {"w": WORD_NS}
|
||||
author_attr = f"{{{WORD_NS}}}author"
|
||||
|
||||
authors: dict[str, int] = {}
|
||||
for tag in ["ins", "del"]:
|
||||
for elem in root.findall(f".//w:{tag}", namespaces):
|
||||
author = elem.get(author_attr)
|
||||
if author:
|
||||
authors[author] = authors.get(author, 0) + 1
|
||||
return authors
|
||||
except (zipfile.BadZipFile, ET.ParseError):
|
||||
return {}
|
||||
|
||||
|
||||
def infer_author(modified_dir: Path, original_docx: Path, default: str = "Claude") -> str:
|
||||
modified_xml = modified_dir / "word" / "document.xml"
|
||||
modified_authors = get_tracked_change_authors(modified_xml)
|
||||
|
||||
if not modified_authors:
|
||||
return default
|
||||
|
||||
original_authors = _get_authors_from_docx(original_docx)
|
||||
|
||||
new_changes: dict[str, int] = {}
|
||||
for author, count in modified_authors.items():
|
||||
original_count = original_authors.get(author, 0)
|
||||
diff = count - original_count
|
||||
if diff > 0:
|
||||
new_changes[author] = diff
|
||||
|
||||
if not new_changes:
|
||||
return default
|
||||
|
||||
if len(new_changes) == 1:
|
||||
return next(iter(new_changes))
|
||||
|
||||
raise ValueError(
|
||||
f"Multiple authors added new changes: {new_changes}. "
|
||||
"Cannot infer which author to validate."
|
||||
)
|
||||
Reference in New Issue
Block a user