feat: Complete fleet — 94 skills across 10+ domains
Pulled ALL skills from 15 source repositories: - anthropics/skills: 16 (docs, design, MCP, testing) - obra/superpowers: 14 (TDD, debugging, agents, planning) - coreyhaines31/marketingskills: 25 (marketing, CRO, SEO, growth) - better-auth/skills: 5 (auth patterns) - vercel-labs/agent-skills: 5 (React, design, Vercel) - antfu/skills: 16 (Vue, Vite, Vitest, pnpm, Turborepo) - Plus 13 individual skills from various repos Mosaic Stack is not limited to coding — the Orchestrator and subagents serve coding, business, design, marketing, writing, logistics, analysis, and more. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
197
skills/pptx/scripts/office/helpers/simplify_redlines.py
Normal file
197
skills/pptx/scripts/office/helpers/simplify_redlines.py
Normal file
@@ -0,0 +1,197 @@
|
||||
"""Simplify tracked changes by merging adjacent w:ins or w:del elements.
|
||||
|
||||
Merges adjacent <w:ins> elements from the same author into a single element.
|
||||
Same for <w:del> elements. This makes heavily-redlined documents easier to
|
||||
work with by reducing the number of tracked change wrappers.
|
||||
|
||||
Rules:
|
||||
- Only merges w:ins with w:ins, w:del with w:del (same element type)
|
||||
- Only merges if same author (ignores timestamp differences)
|
||||
- Only merges if truly adjacent (only whitespace between them)
|
||||
"""
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
import defusedxml.minidom
|
||||
|
||||
WORD_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
||||
|
||||
|
||||
def simplify_redlines(input_dir: str) -> tuple[int, str]:
|
||||
doc_xml = Path(input_dir) / "word" / "document.xml"
|
||||
|
||||
if not doc_xml.exists():
|
||||
return 0, f"Error: {doc_xml} not found"
|
||||
|
||||
try:
|
||||
dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding="utf-8"))
|
||||
root = dom.documentElement
|
||||
|
||||
merge_count = 0
|
||||
|
||||
containers = _find_elements(root, "p") + _find_elements(root, "tc")
|
||||
|
||||
for container in containers:
|
||||
merge_count += _merge_tracked_changes_in(container, "ins")
|
||||
merge_count += _merge_tracked_changes_in(container, "del")
|
||||
|
||||
doc_xml.write_bytes(dom.toxml(encoding="UTF-8"))
|
||||
return merge_count, f"Simplified {merge_count} tracked changes"
|
||||
|
||||
except Exception as e:
|
||||
return 0, f"Error: {e}"
|
||||
|
||||
|
||||
def _merge_tracked_changes_in(container, tag: str) -> int:
|
||||
merge_count = 0
|
||||
|
||||
tracked = [
|
||||
child
|
||||
for child in container.childNodes
|
||||
if child.nodeType == child.ELEMENT_NODE and _is_element(child, tag)
|
||||
]
|
||||
|
||||
if len(tracked) < 2:
|
||||
return 0
|
||||
|
||||
i = 0
|
||||
while i < len(tracked) - 1:
|
||||
curr = tracked[i]
|
||||
next_elem = tracked[i + 1]
|
||||
|
||||
if _can_merge_tracked(curr, next_elem):
|
||||
_merge_tracked_content(curr, next_elem)
|
||||
container.removeChild(next_elem)
|
||||
tracked.pop(i + 1)
|
||||
merge_count += 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
return merge_count
|
||||
|
||||
|
||||
def _is_element(node, tag: str) -> bool:
|
||||
name = node.localName or node.tagName
|
||||
return name == tag or name.endswith(f":{tag}")
|
||||
|
||||
|
||||
def _get_author(elem) -> str:
|
||||
author = elem.getAttribute("w:author")
|
||||
if not author:
|
||||
for attr in elem.attributes.values():
|
||||
if attr.localName == "author" or attr.name.endswith(":author"):
|
||||
return attr.value
|
||||
return author
|
||||
|
||||
|
||||
def _can_merge_tracked(elem1, elem2) -> bool:
|
||||
if _get_author(elem1) != _get_author(elem2):
|
||||
return False
|
||||
|
||||
node = elem1.nextSibling
|
||||
while node and node != elem2:
|
||||
if node.nodeType == node.ELEMENT_NODE:
|
||||
return False
|
||||
if node.nodeType == node.TEXT_NODE and node.data.strip():
|
||||
return False
|
||||
node = node.nextSibling
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _merge_tracked_content(target, source):
|
||||
while source.firstChild:
|
||||
child = source.firstChild
|
||||
source.removeChild(child)
|
||||
target.appendChild(child)
|
||||
|
||||
|
||||
def _find_elements(root, tag: str) -> list:
|
||||
results = []
|
||||
|
||||
def traverse(node):
|
||||
if node.nodeType == node.ELEMENT_NODE:
|
||||
name = node.localName or node.tagName
|
||||
if name == tag or name.endswith(f":{tag}"):
|
||||
results.append(node)
|
||||
for child in node.childNodes:
|
||||
traverse(child)
|
||||
|
||||
traverse(root)
|
||||
return results
|
||||
|
||||
|
||||
def get_tracked_change_authors(doc_xml_path: Path) -> dict[str, int]:
|
||||
if not doc_xml_path.exists():
|
||||
return {}
|
||||
|
||||
try:
|
||||
tree = ET.parse(doc_xml_path)
|
||||
root = tree.getroot()
|
||||
except ET.ParseError:
|
||||
return {}
|
||||
|
||||
namespaces = {"w": WORD_NS}
|
||||
author_attr = f"{{{WORD_NS}}}author"
|
||||
|
||||
authors: dict[str, int] = {}
|
||||
for tag in ["ins", "del"]:
|
||||
for elem in root.findall(f".//w:{tag}", namespaces):
|
||||
author = elem.get(author_attr)
|
||||
if author:
|
||||
authors[author] = authors.get(author, 0) + 1
|
||||
|
||||
return authors
|
||||
|
||||
|
||||
def _get_authors_from_docx(docx_path: Path) -> dict[str, int]:
|
||||
try:
|
||||
with zipfile.ZipFile(docx_path, "r") as zf:
|
||||
if "word/document.xml" not in zf.namelist():
|
||||
return {}
|
||||
with zf.open("word/document.xml") as f:
|
||||
tree = ET.parse(f)
|
||||
root = tree.getroot()
|
||||
|
||||
namespaces = {"w": WORD_NS}
|
||||
author_attr = f"{{{WORD_NS}}}author"
|
||||
|
||||
authors: dict[str, int] = {}
|
||||
for tag in ["ins", "del"]:
|
||||
for elem in root.findall(f".//w:{tag}", namespaces):
|
||||
author = elem.get(author_attr)
|
||||
if author:
|
||||
authors[author] = authors.get(author, 0) + 1
|
||||
return authors
|
||||
except (zipfile.BadZipFile, ET.ParseError):
|
||||
return {}
|
||||
|
||||
|
||||
def infer_author(modified_dir: Path, original_docx: Path, default: str = "Claude") -> str:
|
||||
modified_xml = modified_dir / "word" / "document.xml"
|
||||
modified_authors = get_tracked_change_authors(modified_xml)
|
||||
|
||||
if not modified_authors:
|
||||
return default
|
||||
|
||||
original_authors = _get_authors_from_docx(original_docx)
|
||||
|
||||
new_changes: dict[str, int] = {}
|
||||
for author, count in modified_authors.items():
|
||||
original_count = original_authors.get(author, 0)
|
||||
diff = count - original_count
|
||||
if diff > 0:
|
||||
new_changes[author] = diff
|
||||
|
||||
if not new_changes:
|
||||
return default
|
||||
|
||||
if len(new_changes) == 1:
|
||||
return next(iter(new_changes))
|
||||
|
||||
raise ValueError(
|
||||
f"Multiple authors added new changes: {new_changes}. "
|
||||
"Cannot infer which author to validate."
|
||||
)
|
||||
Reference in New Issue
Block a user