Pulled ALL skills from 15 source repositories: - anthropics/skills: 16 (docs, design, MCP, testing) - obra/superpowers: 14 (TDD, debugging, agents, planning) - coreyhaines31/marketingskills: 25 (marketing, CRO, SEO, growth) - better-auth/skills: 5 (auth patterns) - vercel-labs/agent-skills: 5 (React, design, Vercel) - antfu/skills: 16 (Vue, Vite, Vitest, pnpm, Turborepo) - Plus 13 individual skills from various repos Mosaic Stack is not limited to coding — the Orchestrator and subagents serve coding, business, design, marketing, writing, logistics, analysis, and more. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
198 lines
5.6 KiB
Python
198 lines
5.6 KiB
Python
"""Simplify tracked changes by merging adjacent w:ins or w:del elements.
|
|
|
|
Merges adjacent <w:ins> elements from the same author into a single element.
|
|
Same for <w:del> elements. This makes heavily-redlined documents easier to
|
|
work with by reducing the number of tracked change wrappers.
|
|
|
|
Rules:
|
|
- Only merges w:ins with w:ins, w:del with w:del (same element type)
|
|
- Only merges if same author (ignores timestamp differences)
|
|
- Only merges if truly adjacent (only whitespace between them)
|
|
"""
|
|
|
|
import xml.etree.ElementTree as ET
|
|
import zipfile
|
|
from pathlib import Path
|
|
|
|
import defusedxml.minidom
|
|
|
|
WORD_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
|
|
|
|
|
def simplify_redlines(input_dir: str) -> tuple[int, str]:
|
|
doc_xml = Path(input_dir) / "word" / "document.xml"
|
|
|
|
if not doc_xml.exists():
|
|
return 0, f"Error: {doc_xml} not found"
|
|
|
|
try:
|
|
dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding="utf-8"))
|
|
root = dom.documentElement
|
|
|
|
merge_count = 0
|
|
|
|
containers = _find_elements(root, "p") + _find_elements(root, "tc")
|
|
|
|
for container in containers:
|
|
merge_count += _merge_tracked_changes_in(container, "ins")
|
|
merge_count += _merge_tracked_changes_in(container, "del")
|
|
|
|
doc_xml.write_bytes(dom.toxml(encoding="UTF-8"))
|
|
return merge_count, f"Simplified {merge_count} tracked changes"
|
|
|
|
except Exception as e:
|
|
return 0, f"Error: {e}"
|
|
|
|
|
|
def _merge_tracked_changes_in(container, tag: str) -> int:
|
|
merge_count = 0
|
|
|
|
tracked = [
|
|
child
|
|
for child in container.childNodes
|
|
if child.nodeType == child.ELEMENT_NODE and _is_element(child, tag)
|
|
]
|
|
|
|
if len(tracked) < 2:
|
|
return 0
|
|
|
|
i = 0
|
|
while i < len(tracked) - 1:
|
|
curr = tracked[i]
|
|
next_elem = tracked[i + 1]
|
|
|
|
if _can_merge_tracked(curr, next_elem):
|
|
_merge_tracked_content(curr, next_elem)
|
|
container.removeChild(next_elem)
|
|
tracked.pop(i + 1)
|
|
merge_count += 1
|
|
else:
|
|
i += 1
|
|
|
|
return merge_count
|
|
|
|
|
|
def _is_element(node, tag: str) -> bool:
|
|
name = node.localName or node.tagName
|
|
return name == tag or name.endswith(f":{tag}")
|
|
|
|
|
|
def _get_author(elem) -> str:
|
|
author = elem.getAttribute("w:author")
|
|
if not author:
|
|
for attr in elem.attributes.values():
|
|
if attr.localName == "author" or attr.name.endswith(":author"):
|
|
return attr.value
|
|
return author
|
|
|
|
|
|
def _can_merge_tracked(elem1, elem2) -> bool:
|
|
if _get_author(elem1) != _get_author(elem2):
|
|
return False
|
|
|
|
node = elem1.nextSibling
|
|
while node and node != elem2:
|
|
if node.nodeType == node.ELEMENT_NODE:
|
|
return False
|
|
if node.nodeType == node.TEXT_NODE and node.data.strip():
|
|
return False
|
|
node = node.nextSibling
|
|
|
|
return True
|
|
|
|
|
|
def _merge_tracked_content(target, source):
|
|
while source.firstChild:
|
|
child = source.firstChild
|
|
source.removeChild(child)
|
|
target.appendChild(child)
|
|
|
|
|
|
def _find_elements(root, tag: str) -> list:
|
|
results = []
|
|
|
|
def traverse(node):
|
|
if node.nodeType == node.ELEMENT_NODE:
|
|
name = node.localName or node.tagName
|
|
if name == tag or name.endswith(f":{tag}"):
|
|
results.append(node)
|
|
for child in node.childNodes:
|
|
traverse(child)
|
|
|
|
traverse(root)
|
|
return results
|
|
|
|
|
|
def get_tracked_change_authors(doc_xml_path: Path) -> dict[str, int]:
|
|
if not doc_xml_path.exists():
|
|
return {}
|
|
|
|
try:
|
|
tree = ET.parse(doc_xml_path)
|
|
root = tree.getroot()
|
|
except ET.ParseError:
|
|
return {}
|
|
|
|
namespaces = {"w": WORD_NS}
|
|
author_attr = f"{{{WORD_NS}}}author"
|
|
|
|
authors: dict[str, int] = {}
|
|
for tag in ["ins", "del"]:
|
|
for elem in root.findall(f".//w:{tag}", namespaces):
|
|
author = elem.get(author_attr)
|
|
if author:
|
|
authors[author] = authors.get(author, 0) + 1
|
|
|
|
return authors
|
|
|
|
|
|
def _get_authors_from_docx(docx_path: Path) -> dict[str, int]:
|
|
try:
|
|
with zipfile.ZipFile(docx_path, "r") as zf:
|
|
if "word/document.xml" not in zf.namelist():
|
|
return {}
|
|
with zf.open("word/document.xml") as f:
|
|
tree = ET.parse(f)
|
|
root = tree.getroot()
|
|
|
|
namespaces = {"w": WORD_NS}
|
|
author_attr = f"{{{WORD_NS}}}author"
|
|
|
|
authors: dict[str, int] = {}
|
|
for tag in ["ins", "del"]:
|
|
for elem in root.findall(f".//w:{tag}", namespaces):
|
|
author = elem.get(author_attr)
|
|
if author:
|
|
authors[author] = authors.get(author, 0) + 1
|
|
return authors
|
|
except (zipfile.BadZipFile, ET.ParseError):
|
|
return {}
|
|
|
|
|
|
def infer_author(modified_dir: Path, original_docx: Path, default: str = "Claude") -> str:
|
|
modified_xml = modified_dir / "word" / "document.xml"
|
|
modified_authors = get_tracked_change_authors(modified_xml)
|
|
|
|
if not modified_authors:
|
|
return default
|
|
|
|
original_authors = _get_authors_from_docx(original_docx)
|
|
|
|
new_changes: dict[str, int] = {}
|
|
for author, count in modified_authors.items():
|
|
original_count = original_authors.get(author, 0)
|
|
diff = count - original_count
|
|
if diff > 0:
|
|
new_changes[author] = diff
|
|
|
|
if not new_changes:
|
|
return default
|
|
|
|
if len(new_changes) == 1:
|
|
return next(iter(new_changes))
|
|
|
|
raise ValueError(
|
|
f"Multiple authors added new changes: {new_changes}. "
|
|
"Cannot infer which author to validate."
|
|
)
|