Source code for aiecs.tools.docs.document_creator_tool
#!/usr/bin/env python3
# /*---------------------------------------------------------------------------------------------
# * Copyright (c) IRETBL Corporation. All rights reserved.
# * Licensed under the Apache-2.0. See License.txt in the project root for license information.
# *--------------------------------------------------------------------------------------------*/
"""
Document Creator Tool
This tool is responsible for creating new documents from templates,
initializing document structure, and managing document metadata.
Key Features:
1. Template-based document creation
2. Document structure initialization
3. Metadata management (title, author, date, etc.)
4. Style configuration and presets
5. Multi-format support (MD, HTML, DOCX, PDF, etc.)
"""
import os
import json
import uuid
import tempfile
import logging
from datetime import datetime
from typing import Dict, Any, List, Optional, cast
from enum import Enum
from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings, SettingsConfigDict
from aiecs.tools.base_tool import BaseTool
from aiecs.tools import register_tool
[docs]
class DocumentType(str, Enum):
"""Supported document types"""
REPORT = "report"
ARTICLE = "article"
PRESENTATION = "presentation"
MANUAL = "manual"
LETTER = "letter"
PROPOSAL = "proposal"
ACADEMIC = "academic"
TECHNICAL = "technical"
CREATIVE = "creative"
CUSTOM = "custom"
[docs]
class DocumentFormat(str, Enum):
"""Supported output formats"""
MARKDOWN = "markdown"
HTML = "html"
DOCX = "docx"
PDF = "pdf"
LATEX = "latex"
PLAIN_TEXT = "txt"
JSON = "json"
XML = "xml"
PPTX = "pptx"
PPT = "ppt"
[docs]
class TemplateType(str, Enum):
"""Document template types"""
BLANK = "blank"
BUSINESS_REPORT = "business_report"
TECHNICAL_DOC = "technical_doc"
ACADEMIC_PAPER = "academic_paper"
PROJECT_PROPOSAL = "project_proposal"
USER_MANUAL = "user_manual"
PRESENTATION = "presentation"
NEWSLETTER = "newsletter"
INVOICE = "invoice"
CUSTOM = "custom"
[docs]
class StylePreset(str, Enum):
"""Style presets for documents"""
DEFAULT = "default"
CORPORATE = "corporate"
ACADEMIC = "academic"
MODERN = "modern"
CLASSIC = "classic"
MINIMAL = "minimal"
COLORFUL = "colorful"
PROFESSIONAL = "professional"
[docs]
@register_tool("document_creator")
class DocumentCreatorTool(BaseTool):
"""
Document Creator Tool for creating new documents from templates
This tool provides:
1. Template management and selection
2. Document structure initialization
3. Metadata configuration
4. Style and format setup
5. Multi-format document creation
Integrates with:
- DocumentWriterTool for content writing
- DocumentLayoutTool for layout configuration
- ContentInsertionTool for complex content
"""
# Configuration schema
[docs]
class Config(BaseSettings):
"""Configuration for the document creator tool
Automatically reads from environment variables with DOC_CREATOR_ prefix.
Example: DOC_CREATOR_TEMPLATES_DIR -> templates_dir
"""
model_config = SettingsConfigDict(env_prefix="DOC_CREATOR_")
templates_dir: str = Field(
default=os.path.join(tempfile.gettempdir(), "document_templates"),
description="Directory for document templates",
)
output_dir: str = Field(
default=os.path.join(tempfile.gettempdir(), "created_documents"),
description="Directory for created documents",
)
default_format: str = Field(default="markdown", description="Default output format")
default_style: str = Field(default="default", description="Default style preset")
auto_backup: bool = Field(
default=True,
description="Whether to automatically backup created documents",
)
include_metadata: bool = Field(
default=True,
description="Whether to include metadata in created documents",
)
generate_toc: bool = Field(
default=True,
description="Whether to generate table of contents automatically",
)
[docs]
def __init__(self, config: Optional[Dict] = None, **kwargs):
"""Initialize Document Creator Tool with settings
Configuration is automatically loaded by BaseTool from:
1. Explicit config dict (highest priority)
2. YAML config files (config/tools/document_creator.yaml)
3. Environment variables (via dotenv from .env files)
4. Tool defaults (lowest priority)
Args:
config: Optional configuration overrides
**kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
"""
super().__init__(config, **kwargs)
# Configuration is automatically loaded by BaseTool into self._config_obj
# Access config via self._config_obj (BaseSettings instance)
self.config: DocumentCreatorTool.Config = cast(DocumentCreatorTool.Config, self._config_obj) if self._config_obj else self.Config()
self.logger = logging.getLogger(__name__)
# Initialize directories
self._init_directories()
# Initialize templates
self._init_templates()
# Initialize office tool for PPTX/DOCX creation
self._init_office_tool()
# Initialize document tracking
self._documents_created: List[Any] = []
def _init_directories(self):
"""Initialize required directories"""
os.makedirs(self.config.templates_dir, exist_ok=True)
os.makedirs(self.config.output_dir, exist_ok=True)
def _init_templates(self):
"""Initialize built-in templates"""
self.templates = {
TemplateType.BLANK: self._get_blank_template(),
TemplateType.BUSINESS_REPORT: self._get_business_report_template(),
TemplateType.TECHNICAL_DOC: self._get_technical_doc_template(),
TemplateType.ACADEMIC_PAPER: self._get_academic_paper_template(),
TemplateType.PROJECT_PROPOSAL: self._get_project_proposal_template(),
TemplateType.USER_MANUAL: self._get_user_manual_template(),
TemplateType.PRESENTATION: self._get_presentation_template(),
TemplateType.NEWSLETTER: self._get_newsletter_template(),
TemplateType.INVOICE: self._get_invoice_template(),
}
def _init_office_tool(self):
"""Initialize office tool for PPTX/DOCX creation"""
self.office_tool: Any = None
try:
from aiecs.tools.task_tools.office_tool import OfficeTool
self.office_tool = OfficeTool()
self.logger.info("OfficeTool initialized successfully for PPTX/DOCX support")
except ImportError:
self.logger.warning("OfficeTool not available, PPTX/DOCX creation will be limited")
self.office_tool = None
# Schema definitions
[docs]
class Create_documentSchema(BaseModel):
"""Schema for create_document operation"""
document_type: DocumentType = Field(description="Type of document to create")
template_type: TemplateType = Field(description="Template to use")
output_format: DocumentFormat = Field(description="Output format")
metadata: Dict[str, Any] = Field(description="Document metadata")
style_preset: Optional[StylePreset] = Field(default=None, description="Style preset")
output_path: Optional[str] = Field(default=None, description="Custom output path")
[docs]
class Create_from_templateSchema(BaseModel):
"""Schema for create_from_template operation"""
template_name: str = Field(description="Name of template to use")
template_variables: Dict[str, Any] = Field(description="Variables to fill in template")
output_format: DocumentFormat = Field(description="Output format")
output_path: Optional[str] = Field(default=None, description="Custom output path")
[docs]
class Setup_document_structureSchema(BaseModel):
"""Schema for setup_document_structure operation"""
document_path: str = Field(description="Path to document")
sections: List[Dict[str, Any]] = Field(description="Document sections configuration")
generate_toc: bool = Field(default=True, description="Generate table of contents")
numbering_style: Optional[str] = Field(default=None, description="Section numbering style")
[docs]
class Configure_metadataSchema(BaseModel):
"""Schema for configure_metadata operation"""
document_path: str = Field(description="Path to document")
metadata: Dict[str, Any] = Field(description="Metadata to configure")
format_specific: bool = Field(default=True, description="Use format-specific metadata")
[docs]
class Get_template_infoSchema(BaseModel):
"""Schema for get_template_info operation"""
template_type: TemplateType = Field(description="Type of template")
[docs]
def create_document(
self,
document_type: DocumentType,
template_type: TemplateType,
output_format: DocumentFormat,
metadata: Dict[str, Any],
style_preset: Optional[StylePreset] = None,
output_path: Optional[str] = None,
) -> Dict[str, Any]:
"""
Create a new document from template
Args:
document_type: Type of document to create
template_type: Template to use
output_format: Output format for the document
metadata: Document metadata (title, author, etc.)
style_preset: Style preset to apply
output_path: Custom output path
Returns:
Dict containing document creation results
"""
try:
start_time = datetime.now()
document_id = str(uuid.uuid4())
self.logger.info(f"Creating document {document_id}: {document_type} using {template_type}")
# Step 1: Validate and prepare template
template = self._get_template(template_type)
# Step 1b: Enforce supported_formats contract
template_supported = template.get("supported_formats", [])
if template_supported and output_format.value not in template_supported:
raise DocumentCreationError(
f"Format '{output_format.value}' is not supported by template "
f"'{template_type.value}'. "
f"Supported formats: {template_supported}. "
f"Please choose one of the supported formats."
)
# Step 2: Generate output path
if not output_path:
output_path = self._generate_output_path(document_type, output_format, document_id)
# Step 3: Process metadata
processed_metadata = self._process_metadata(metadata, output_format)
# Step 4: Apply style preset
preset_value = style_preset or self.config.default_style
style_preset_enum = StylePreset(preset_value) if isinstance(preset_value, str) else preset_value
style_config = self._get_style_config(style_preset_enum)
# Step 5: Create document from template
document_content = self._create_document_from_template(template, processed_metadata, style_config, output_format)
# Step 6: Write document to file
self._write_document_file(output_path, document_content, output_format)
# Step 7: Track created document
document_info = {
"document_id": document_id,
"document_type": document_type,
"template_type": template_type,
"output_format": output_format,
"output_path": output_path,
"metadata": processed_metadata,
"style_preset": style_preset,
"creation_metadata": {
"created_at": start_time.isoformat(),
"file_size": (os.path.getsize(output_path) if os.path.exists(output_path) else 0),
"duration": (datetime.now() - start_time).total_seconds(),
},
}
self._documents_created.append(document_info)
self.logger.info(f"Document {document_id} created successfully at {output_path}")
return document_info
except Exception as e:
raise DocumentCreationError(f"Failed to create document: {str(e)}")
[docs]
def create_from_template(
self,
template_name: str,
template_variables: Dict[str, Any],
output_format: DocumentFormat,
output_path: Optional[str] = None,
) -> Dict[str, Any]:
"""
Create document from custom template with variables
Args:
template_name: Name of template file
template_variables: Variables to substitute in template
output_format: Output format
output_path: Custom output path
Returns:
Dict containing creation results
"""
try:
# Load custom template
template_path = os.path.join(self.config.templates_dir, template_name)
if not os.path.exists(template_path):
raise TemplateError(f"Template not found: {template_name}")
with open(template_path, "r", encoding="utf-8") as f:
template_content = f.read()
# Process template variables
processed_content = self._process_template_variables(template_content, template_variables)
# Generate output path if not provided
if not output_path:
output_path = self._generate_output_path("custom", output_format, str(uuid.uuid4()))
# Write processed content
self._write_document_file(output_path, processed_content, output_format)
return {
"template_name": template_name,
"output_path": output_path,
"output_format": output_format,
"variables_used": template_variables,
"creation_time": datetime.now().isoformat(),
}
except Exception as e:
raise DocumentCreationError(f"Failed to create from template: {str(e)}")
[docs]
def setup_document_structure(
self,
document_path: str,
sections: List[Dict[str, Any]],
generate_toc: bool = True,
numbering_style: Optional[str] = None,
) -> Dict[str, Any]:
"""
Setup document structure with sections and headers
Args:
document_path: Path to document
sections: List of section configurations
generate_toc: Whether to generate table of contents
numbering_style: Section numbering style
Returns:
Dict containing structure setup results
"""
try:
self.logger.info(f"Setting up structure for document: {document_path}")
# Read existing document
if os.path.exists(document_path):
with open(document_path, "r", encoding="utf-8") as f:
content = f.read()
else:
content = ""
# Generate structure
structure_content = self._generate_document_structure(sections, generate_toc, numbering_style)
# Combine with existing content
final_content = self._combine_structure_with_content(structure_content, content)
# Write back to file
with open(document_path, "w", encoding="utf-8") as f:
f.write(final_content)
return {
"document_path": document_path,
"sections_created": len(sections),
"toc_generated": generate_toc,
"numbering_style": numbering_style,
"structure_setup_time": datetime.now().isoformat(),
}
except Exception as e:
raise DocumentCreationError(f"Failed to setup document structure: {str(e)}")
[docs]
def configure_metadata(
self,
document_path: str,
metadata: Dict[str, Any],
format_specific: bool = True,
) -> Dict[str, Any]:
"""
Configure document metadata
Args:
document_path: Path to document
metadata: Metadata to configure
format_specific: Use format-specific metadata syntax
Returns:
Dict containing metadata configuration results
"""
try:
# Detect document format
file_format = self._detect_document_format(document_path)
# Generate metadata content
if format_specific:
metadata_content = self._generate_format_specific_metadata(metadata, file_format)
else:
metadata_content = self._generate_generic_metadata(metadata)
# Insert metadata into document
self._insert_metadata_into_document(document_path, metadata_content, file_format)
return {
"document_path": document_path,
"metadata_configured": metadata,
"format": file_format,
"format_specific": format_specific,
"configuration_time": datetime.now().isoformat(),
}
except Exception as e:
raise DocumentCreationError(f"Failed to configure metadata: {str(e)}")
[docs]
def list_templates(self) -> Dict[str, Any]:
"""
List available document templates
Returns:
Dict containing available templates
"""
built_in_templates = list(self.templates.keys())
# Scan for custom templates
custom_templates = []
if os.path.exists(self.config.templates_dir):
for file in os.listdir(self.config.templates_dir):
if file.endswith((".md", ".html", ".txt", ".json")):
custom_templates.append(file)
return {
"built_in_templates": [t.value for t in built_in_templates],
"custom_templates": custom_templates,
"templates_directory": self.config.templates_dir,
"total_templates": len(built_in_templates) + len(custom_templates),
}
[docs]
def get_template_info(self, template_type: TemplateType) -> Dict[str, Any]:
"""
Get information about a specific template
Args:
template_type: Type of template
Returns:
Dict containing template information
"""
if template_type not in self.templates:
raise TemplateError(f"Template not found: {template_type}")
template = self.templates[template_type]
return {
"template_type": template_type.value,
"name": template.get("name", ""),
"description": template.get("description", ""),
"sections": template.get("sections", []),
"variables": template.get("variables", []),
"supported_formats": template.get("supported_formats", []),
"style_presets": template.get("style_presets", []),
}
[docs]
def get_created_documents(self) -> List[Dict[str, Any]]:
"""
Get list of documents created in this session
Returns:
List of created document information
"""
return self._documents_created.copy()
# Template definitions
def _get_blank_template(self) -> Dict[str, Any]:
"""Get blank document template"""
return {
"name": "Blank Document",
"description": "Empty document with basic structure",
"content": "",
"sections": [],
"variables": [],
"supported_formats": ["markdown", "html", "txt", "docx"],
"metadata_template": {
"title": "New Document",
"author": "Author",
"date": datetime.now().strftime("%Y-%m-%d"),
},
}
def _get_business_report_template(self) -> Dict[str, Any]:
"""Get business report template"""
return {
"name": "Business Report",
"description": "Professional business report template",
"content": """# {title}
**Date:** {date}
**Author:** {author}
**Department:** {department}
## Executive Summary
{executive_summary}
## Introduction
{introduction}
## Analysis
### Key Findings
{key_findings}
### Data Analysis
{data_analysis}
## Recommendations
{recommendations}
## Conclusion
{conclusion}
## Appendices
{appendices}
""",
"sections": [
{"name": "Executive Summary", "level": 2, "required": True},
{"name": "Introduction", "level": 2, "required": True},
{"name": "Analysis", "level": 2, "required": True},
{"name": "Recommendations", "level": 2, "required": True},
{"name": "Conclusion", "level": 2, "required": True},
],
"variables": [
"title",
"date",
"author",
"department",
"executive_summary",
"introduction",
"key_findings",
"data_analysis",
"recommendations",
"conclusion",
"appendices",
],
"supported_formats": ["markdown", "html", "docx"], # pdf: not yet implemented
"style_presets": ["corporate", "professional", "modern"],
}
def _get_technical_doc_template(self) -> Dict[str, Any]:
"""Get technical documentation template"""
return {
"name": "Technical Documentation",
"description": "Technical documentation with code examples",
"content": """# {title}
**Version:** {version}
**Last Updated:** {date}
**Author:** {author}
## Overview
{overview}
## Prerequisites
{prerequisites}
## Installation
{installation}
## Configuration
{configuration}
## Usage
{usage}
## API Reference
{api_reference}
## Examples
{examples}
## Troubleshooting
{troubleshooting}
## Changelog
{changelog}
""",
"sections": [
{"name": "Overview", "level": 2, "required": True},
{"name": "Prerequisites", "level": 2, "required": False},
{"name": "Installation", "level": 2, "required": True},
{"name": "Configuration", "level": 2, "required": False},
{"name": "Usage", "level": 2, "required": True},
{"name": "API Reference", "level": 2, "required": False},
{"name": "Examples", "level": 2, "required": True},
{"name": "Troubleshooting", "level": 2, "required": False},
],
"variables": [
"title",
"version",
"date",
"author",
"overview",
"prerequisites",
"installation",
"configuration",
"usage",
"api_reference",
"examples",
"troubleshooting",
"changelog",
],
"supported_formats": ["markdown", "html"], # pdf: not yet implemented
"style_presets": ["technical", "modern", "minimal"],
}
def _get_academic_paper_template(self) -> Dict[str, Any]:
"""Get academic paper template"""
return {
"name": "Academic Paper",
"description": "Academic research paper template",
"content": """# {title}
**Author:** {author}
**Institution:** {institution}
**Email:** {email}
**Date:** {date}
## Abstract
{abstract}
**Keywords:** {keywords}
## 1. Introduction
{introduction}
## 2. Literature Review
{literature_review}
## 3. Methodology
{methodology}
## 4. Results
{results}
## 5. Discussion
{discussion}
## 6. Conclusion
{conclusion}
## References
{references}
## Appendices
{appendices}
""",
"sections": [
{"name": "Abstract", "level": 2, "required": True},
{"name": "Introduction", "level": 2, "required": True},
{"name": "Literature Review", "level": 2, "required": True},
{"name": "Methodology", "level": 2, "required": True},
{"name": "Results", "level": 2, "required": True},
{"name": "Discussion", "level": 2, "required": True},
{"name": "Conclusion", "level": 2, "required": True},
{"name": "References", "level": 2, "required": True},
],
"variables": [
"title",
"author",
"institution",
"email",
"date",
"abstract",
"keywords",
"introduction",
"literature_review",
"methodology",
"results",
"discussion",
"conclusion",
"references",
"appendices",
],
"supported_formats": ["markdown", "latex"], # pdf: not yet implemented
"style_presets": ["academic", "classic", "formal"],
}
def _get_project_proposal_template(self) -> Dict[str, Any]:
"""Get project proposal template"""
return {
"name": "Project Proposal",
"description": "Project proposal and planning template",
"content": """# {project_name}
**Proposal Date:** {date}
**Project Manager:** {project_manager}
**Department:** {department}
**Budget:** {budget}
## Project Overview
{project_overview}
## Objectives
{objectives}
## Scope
### In Scope
{in_scope}
### Out of Scope
{out_scope}
## Timeline
{timeline}
## Resources Required
{resources}
## Budget Breakdown
{budget_breakdown}
## Risk Assessment
{risk_assessment}
## Success Criteria
{success_criteria}
## Next Steps
{next_steps}
""",
"variables": [
"project_name",
"date",
"project_manager",
"department",
"budget",
"project_overview",
"objectives",
"in_scope",
"out_scope",
"timeline",
"resources",
"budget_breakdown",
"risk_assessment",
"success_criteria",
"next_steps",
],
"supported_formats": ["markdown", "html", "docx"], # pdf: not yet implemented
"style_presets": ["professional", "corporate", "modern"],
}
def _get_user_manual_template(self) -> Dict[str, Any]:
"""Get user manual template"""
return {
"name": "User Manual",
"description": "User manual and guide template",
"content": """# {product_name} User Manual
**Version:** {version}
**Date:** {date}
**Support:** {support_contact}
## Table of Contents
1. [Getting Started](#getting-started)
2. [Basic Features](#basic-features)
3. [Advanced Features](#advanced-features)
4. [Troubleshooting](#troubleshooting)
5. [FAQ](#faq)
## Getting Started
{getting_started}
## Basic Features
{basic_features}
## Advanced Features
{advanced_features}
## Troubleshooting
{troubleshooting}
## FAQ
{faq}
## Contact Support
{support_info}
""",
"variables": [
"product_name",
"version",
"date",
"support_contact",
"getting_started",
"basic_features",
"advanced_features",
"troubleshooting",
"faq",
"support_info",
],
"supported_formats": ["markdown", "html"], # pdf: not yet implemented
"style_presets": ["user-friendly", "modern", "minimal"],
}
def _get_presentation_template(self) -> Dict[str, Any]:
"""Get presentation template"""
return {
"name": "Presentation",
"description": "Slide presentation template",
"content": """# {title}
---
## Slide 1: Title Slide
### {title}
**Presenter:** {presenter}
**Date:** {date}
**Organization:** {organization}
---
## Slide 2: Agenda
{agenda}
---
## Slide 3: Introduction
{introduction}
---
## Slide 4: Main Content
{main_content}
---
## Slide 5: Conclusion
{conclusion}
---
## Slide 6: Questions
{questions}
---
## Slide 7: Thank You
**Contact Information:**
{contact_info}
""",
"variables": [
"title",
"presenter",
"date",
"organization",
"agenda",
"introduction",
"main_content",
"conclusion",
"questions",
"contact_info",
],
"supported_formats": ["markdown", "html", "pptx"],
"style_presets": ["presentation", "modern", "colorful"],
}
def _get_newsletter_template(self) -> Dict[str, Any]:
"""Get newsletter template"""
return {
"name": "Newsletter",
"description": "Newsletter and bulletin template",
"content": """# {newsletter_name}
**Issue #{issue_number}** | {date}
## Headlines
{headlines}
## Feature Article
{feature_article}
## News Briefs
{news_briefs}
## Upcoming Events
{upcoming_events}
## Community Spotlight
{community_spotlight}
## Contact Us
{contact_info}
""",
"variables": [
"newsletter_name",
"issue_number",
"date",
"headlines",
"feature_article",
"news_briefs",
"upcoming_events",
"community_spotlight",
"contact_info",
],
"supported_formats": ["markdown", "html"],
"style_presets": ["newsletter", "colorful", "modern"],
}
def _get_invoice_template(self) -> Dict[str, Any]:
"""Get invoice template"""
return {
"name": "Invoice",
"description": "Business invoice template",
"content": """# INVOICE
**Invoice #:** {invoice_number}
**Date:** {date}
**Due Date:** {due_date}
## Bill To:
{client_info}
## Bill From:
{company_info}
## Items
{items_table}
## Summary
**Subtotal:** {subtotal}
**Tax:** {tax}
**Total:** {total}
## Payment Terms
{payment_terms}
## Notes
{notes}
""",
"variables": [
"invoice_number",
"date",
"due_date",
"client_info",
"company_info",
"items_table",
"subtotal",
"tax",
"total",
"payment_terms",
"notes",
],
"supported_formats": ["markdown", "html"], # pdf: not yet implemented
"style_presets": ["professional", "corporate", "minimal"],
}
# Helper methods
def _get_template(self, template_type: TemplateType) -> Dict[str, Any]:
"""Get template by type"""
if template_type not in self.templates:
raise TemplateError(f"Template not found: {template_type}")
return self.templates[template_type]
def _generate_output_path(
self,
document_type: str,
output_format: DocumentFormat,
document_id: str,
) -> str:
"""Generate output path for document"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# Handle PPT format - use pptx extension
file_extension = output_format.value
if output_format == DocumentFormat.PPT:
file_extension = "pptx" # PPT format uses PPTX extension
filename = f"{document_type}_{timestamp}_{document_id[:8]}.{file_extension}"
return os.path.join(self.config.output_dir, filename)
def _process_metadata(self, metadata: Dict[str, Any], output_format: DocumentFormat) -> Dict[str, Any]:
"""Process and validate metadata"""
processed = metadata.copy()
# Add default metadata if missing
if "date" not in processed:
processed["date"] = datetime.now().strftime("%Y-%m-%d")
if "created_by" not in processed:
processed["created_by"] = "AIECS Document Creator"
if "format" not in processed:
processed["format"] = output_format.value
return processed
def _get_style_config(self, style_preset: StylePreset) -> Dict[str, Any]:
"""Get style configuration for preset"""
style_configs = {
StylePreset.DEFAULT: {
"font_family": "Arial",
"font_size": 12,
"colors": {"primary": "#000000"},
},
StylePreset.CORPORATE: {
"font_family": "Calibri",
"font_size": 11,
"colors": {"primary": "#2E5D92"},
},
StylePreset.ACADEMIC: {
"font_family": "Times New Roman",
"font_size": 12,
"colors": {"primary": "#000000"},
},
StylePreset.MODERN: {
"font_family": "Helvetica",
"font_size": 11,
"colors": {"primary": "#333333"},
},
StylePreset.CLASSIC: {
"font_family": "Georgia",
"font_size": 12,
"colors": {"primary": "#1a1a1a"},
},
StylePreset.MINIMAL: {
"font_family": "Arial",
"font_size": 10,
"colors": {"primary": "#444444"},
},
StylePreset.COLORFUL: {
"font_family": "Verdana",
"font_size": 11,
"colors": {"primary": "#2E8B57"},
},
StylePreset.PROFESSIONAL: {
"font_family": "Segoe UI",
"font_size": 11,
"colors": {"primary": "#2F4F4F"},
},
}
return style_configs.get(style_preset, style_configs[StylePreset.DEFAULT])
def _create_document_from_template(
self,
template: Dict[str, Any],
metadata: Dict[str, Any],
style_config: Dict[str, Any],
output_format: DocumentFormat,
) -> str:
"""Create document content from template"""
content = template.get("content", "")
# Apply metadata to template
if content and template.get("variables"):
# Replace template variables with metadata values
for var in template["variables"]:
placeholder = f"{{{var}}}"
value = metadata.get(var, f"[{var}]")
content = content.replace(placeholder, str(value))
# Add metadata header if required
if self.config.include_metadata:
metadata_header = self._generate_metadata_header(metadata, output_format)
content = metadata_header + "\n\n" + content
return cast(str, content)
def _generate_metadata_header(self, metadata: Dict[str, Any], output_format: DocumentFormat) -> str:
"""Generate metadata header for document"""
if output_format == DocumentFormat.MARKDOWN:
return "---\n" + "\n".join([f"{k}: {v}" for k, v in metadata.items()]) + "\n---"
elif output_format == DocumentFormat.HTML:
meta_tags = "\n".join([f'<meta name="{k}" content="{v}">' for k, v in metadata.items()])
return f"<!-- Document Metadata -->\n{meta_tags}\n<!-- End Metadata -->"
else:
return "# Document Metadata\n" + "\n".join([f"{k}: {v}" for k, v in metadata.items()])
def _write_document_file(self, output_path: str, content: str, output_format: DocumentFormat):
"""Write document content to file"""
os.makedirs(os.path.dirname(output_path), exist_ok=True)
if output_format in [
DocumentFormat.MARKDOWN,
DocumentFormat.HTML,
DocumentFormat.PLAIN_TEXT,
DocumentFormat.LATEX,
]:
with open(output_path, "w", encoding="utf-8") as f:
f.write(content)
elif output_format == DocumentFormat.JSON:
with open(output_path, "w", encoding="utf-8") as f:
json.dump({"content": content}, f, indent=2, ensure_ascii=False)
elif output_format in [DocumentFormat.PPTX, DocumentFormat.PPT]:
# Use office_tool to create PPTX file
self._write_pptx_file(output_path, content)
elif output_format == DocumentFormat.DOCX:
# Use office_tool to create DOCX file
self._write_docx_file(output_path, content)
elif output_format == DocumentFormat.PDF:
# PDF generation is not yet implemented.
# A real implementation would require a rendering library
# (e.g. WeasyPrint, ReportLab, or a headless-browser pipeline).
# Refusing here prevents a text file with a .pdf extension from
# being silently produced and passed to downstream consumers.
raise DocumentCreationError(
"PDF output is not yet implemented. " "Please choose a supported format such as 'markdown', 'html', or 'docx'. " "PDF support will be added in a future release."
)
else:
# For other formats, write as text for now
with open(output_path, "w", encoding="utf-8") as f:
f.write(content)
def _write_pptx_file(self, output_path: str, content: str):
"""Write content to PPTX file using office_tool"""
if not self.office_tool:
raise DocumentCreationError("OfficeTool not available. Cannot create PPTX files.")
try:
# Parse content to extract slides
# Slides are separated by "---" or slide markers like "## Slide X:"
slides = self._parse_content_to_slides(content)
# Use office_tool to create PPTX
result = self.office_tool.write_pptx(
slides=slides,
output_path=output_path,
image_path=None, # Can be enhanced to extract image paths from metadata
)
if not result.get("success"):
raise DocumentCreationError(f"Failed to create PPTX file: {result}")
self.logger.info(f"PPTX file created successfully: {output_path}")
except Exception as e:
raise DocumentCreationError(f"Failed to write PPTX file: {str(e)}")
def _write_docx_file(self, output_path: str, content: str):
"""Write content to DOCX file using office_tool"""
if not self.office_tool:
raise DocumentCreationError("OfficeTool not available. Cannot create DOCX files.")
try:
# Use office_tool to create DOCX
result = self.office_tool.write_docx(
text=content,
output_path=output_path,
table_data=None, # Can be enhanced to extract tables from content
)
if not result.get("success"):
raise DocumentCreationError(f"Failed to create DOCX file: {result}")
self.logger.info(f"DOCX file created successfully: {output_path}")
except Exception as e:
raise DocumentCreationError(f"Failed to write DOCX file: {str(e)}")
def _parse_content_to_slides(self, content: str) -> List[str]:
"""Parse content string into list of slide contents
Supports multiple slide separation formats:
- "---" separator (markdown style)
- "## Slide X:" headers
- Empty lines between slides
"""
slides = []
# Split by "---" separator (common in markdown presentations)
if "---" in content:
parts = content.split("---")
for part in parts:
part = part.strip()
if part:
# Remove slide headers like "## Slide X: Title"
lines = part.split("\n")
cleaned_lines = []
for line in lines:
# Skip slide headers
if line.strip().startswith("## Slide") and ":" in line:
continue
cleaned_lines.append(line)
slide_content = "\n".join(cleaned_lines).strip()
if slide_content:
slides.append(slide_content)
else:
# Try to split by "## Slide" headers
if "## Slide" in content:
parts = content.split("## Slide")
for i, part in enumerate(parts):
if i == 0:
# First part might be title slide
part = part.strip()
if part:
slides.append(part)
else:
# Extract content after "Slide X: Title"
lines = part.split("\n", 1)
if len(lines) > 1:
slide_content = lines[1].strip()
if slide_content:
slides.append(slide_content)
else:
# Fallback: split by double newlines (paragraph breaks)
parts = content.split("\n\n")
current_slide: List[str] = []
for part in parts:
part = part.strip()
if part:
# If it's a header, start a new slide
if part.startswith("#"):
if current_slide:
slides.append("\n".join(current_slide))
current_slide = []
current_slide.append(part)
if current_slide:
slides.append("\n".join(current_slide))
# If no slides found, create a single slide with all content
if not slides:
slides = [content.strip()] if content.strip() else [""]
return slides
def _process_template_variables(self, template_content: str, variables: Dict[str, Any]) -> str:
"""Process template variables in content"""
result = template_content
for key, value in variables.items():
placeholder = f"{{{key}}}"
result = result.replace(placeholder, str(value))
return result
def _generate_document_structure(
self,
sections: List[Dict[str, Any]],
generate_toc: bool,
numbering_style: Optional[str],
) -> str:
"""Generate document structure from sections"""
structure_parts = []
# Generate table of contents
if generate_toc:
toc = self._generate_table_of_contents(sections, numbering_style)
structure_parts.append(toc)
# Generate section headers
for i, section in enumerate(sections, 1):
level = section.get("level", 2)
title = section.get("title", f"Section {i}")
if numbering_style == "numeric":
header = f"{'#' * level} {i}. {title}"
elif numbering_style == "alpha":
alpha = chr(ord("A") + i - 1) if i <= 26 else f"Section{i}"
header = f"{'#' * level} {alpha}. {title}"
else:
header = f"{'#' * level} {title}"
structure_parts.append(header)
structure_parts.append("") # Empty line
# Add placeholder content
placeholder = section.get("placeholder", f"Content for {title} goes here...")
structure_parts.append(placeholder)
structure_parts.append("") # Empty line
return "\n".join(structure_parts)
def _generate_table_of_contents(self, sections: List[Dict[str, Any]], numbering_style: Optional[str]) -> str:
"""Generate table of contents"""
toc_parts = ["# Table of Contents", ""]
for i, section in enumerate(sections, 1):
title = section.get("title", f"Section {i}")
level = section.get("level", 2)
indent = " " * (level - 1)
if numbering_style == "numeric":
toc_line = f"{indent}- {i}. {title}"
elif numbering_style == "alpha":
alpha = chr(ord("A") + i - 1) if i <= 26 else f"Section{i}"
toc_line = f"{indent}- {alpha}. {title}"
else:
toc_line = f"{indent}- {title}"
toc_parts.append(toc_line)
toc_parts.extend(["", "---", ""])
return "\n".join(toc_parts)
def _combine_structure_with_content(self, structure: str, existing_content: str) -> str:
"""Combine generated structure with existing content"""
if not existing_content.strip():
return structure
# If existing content has structure markers, replace them
if "# Table of Contents" in existing_content:
# Replace existing structure
lines = existing_content.split("\n")
content_start = -1
for i, line in enumerate(lines):
if line.startswith("---") and i > 0:
content_start = i + 1
break
if content_start > 0:
existing_body = "\n".join(lines[content_start:])
return structure + "\n" + existing_body
return structure + "\n\n" + existing_content
def _detect_document_format(self, document_path: str) -> DocumentFormat:
"""Detect document format from file extension"""
ext = os.path.splitext(document_path)[1].lower()
format_map = {
".md": DocumentFormat.MARKDOWN,
".markdown": DocumentFormat.MARKDOWN,
".html": DocumentFormat.HTML,
".htm": DocumentFormat.HTML,
".txt": DocumentFormat.PLAIN_TEXT,
".json": DocumentFormat.JSON,
".xml": DocumentFormat.XML,
".tex": DocumentFormat.LATEX,
".docx": DocumentFormat.DOCX,
".pdf": DocumentFormat.PDF,
".pptx": DocumentFormat.PPTX,
".ppt": DocumentFormat.PPT,
}
return format_map.get(ext, DocumentFormat.PLAIN_TEXT)
def _generate_format_specific_metadata(self, metadata: Dict[str, Any], file_format: DocumentFormat) -> str:
"""Generate format-specific metadata"""
if file_format == DocumentFormat.MARKDOWN:
return "---\n" + "\n".join([f"{k}: {v}" for k, v in metadata.items()]) + "\n---"
elif file_format == DocumentFormat.HTML:
meta_tags = "\n".join([f'<meta name="{k}" content="{v}">' for k, v in metadata.items()])
return f"<head>\n{meta_tags}\n</head>"
elif file_format == DocumentFormat.LATEX:
return "\n".join([f"\\{k}{{{v}}}" for k, v in metadata.items()])
else:
return self._generate_generic_metadata(metadata)
def _generate_generic_metadata(self, metadata: Dict[str, Any]) -> str:
"""Generate generic metadata"""
return "% " + "\n% ".join([f"{k}: {v}" for k, v in metadata.items()])
def _insert_metadata_into_document(
self,
document_path: str,
metadata_content: str,
file_format: DocumentFormat,
):
"""Insert metadata into document"""
with open(document_path, "r", encoding="utf-8") as f:
content = f.read()
# Insert metadata at the beginning
if file_format == DocumentFormat.HTML and "<head>" in content:
# Insert into existing head section
content = content.replace("<head>", f"<head>\n{metadata_content}")
else:
# Insert at the beginning
content = metadata_content + "\n\n" + content
with open(document_path, "w", encoding="utf-8") as f:
f.write(content)
DocumentCreatorSettings = DocumentCreatorTool.Config