Coverage for me2ai_mcp\utils.py: 0%
103 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-13 11:31 +0200
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-13 11:31 +0200
1"""
2Utility functions for ME2AI MCP servers.
4This module provides common utility functions for processing inputs,
5formatting responses, and handling data in MCP servers.
6"""
7from typing import Dict, List, Any, Optional, Union
8import re
9import json
10import logging
11from datetime import datetime
12import textwrap
14# Optional dependencies
15try:
16 import bleach
17 BLEACH_AVAILABLE = True
18except ImportError:
19 BLEACH_AVAILABLE = False
21try:
22 from bs4 import BeautifulSoup
23 BS4_AVAILABLE = True
24except ImportError:
25 BS4_AVAILABLE = False
27# Configure logging
28logger = logging.getLogger("me2ai-mcp-utils")
31def sanitize_input(text: str, max_length: int = 10000) -> str:
32 """Sanitize text input for safe processing.
34 Args:
35 text: Input text
36 max_length: Maximum allowed length
38 Returns:
39 Sanitized text
40 """
41 # Check input type
42 if not isinstance(text, str):
43 logger.warning(f"Expected string input, got {type(text)}")
44 text = str(text)
46 # Truncate to maximum length
47 if len(text) > max_length:
48 logger.warning(f"Input truncated from {len(text)} to {max_length} characters")
49 text = text[:max_length]
51 # Remove null bytes and other control characters
52 text = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', text)
54 # Use bleach for HTML sanitization if available
55 if BLEACH_AVAILABLE:
56 text = bleach.clean(text, strip=True)
58 return text
61def format_response(
62 data: Any,
63 format_type: str = "auto",
64 success: bool = True,
65 error: Optional[str] = None
66) -> Dict[str, Any]:
67 """Format a standard MCP response.
69 Args:
70 data: Response data
71 format_type: Response format type (auto, text, json, html)
72 success: Whether the operation was successful
73 error: Error message if unsuccessful
75 Returns:
76 Formatted response dictionary
77 """
78 # Basic response structure
79 response = {
80 "success": success,
81 "timestamp": datetime.now().isoformat()
82 }
84 # Add error message if provided
85 if error:
86 response["error"] = error
87 response["success"] = False
89 # Auto-detect format if needed
90 if format_type == "auto":
91 if isinstance(data, dict) or isinstance(data, list):
92 format_type = "json"
93 elif isinstance(data, str) and data.strip().startswith(("<html", "<!DOCTYPE")):
94 format_type = "html"
95 else:
96 format_type = "text"
98 # Format data based on type
99 if format_type == "json":
100 response["format"] = "json"
101 response["content"] = data
102 elif format_type == "html":
103 response["format"] = "html"
104 response["content"] = data
105 else: # Default to text
106 response["format"] = "text"
107 response["content"] = str(data)
109 # Add format-specific metadata
110 if format_type == "text":
111 response["content_length"] = len(response["content"])
112 lines = response["content"].split("\n")
113 response["line_count"] = len(lines)
115 return response
118def extract_text(
119 html_content: str,
120 max_length: int = 10000,
121 include_headings: bool = True,
122 include_links: bool = True
123) -> str:
124 """Extract readable text from HTML content.
126 Args:
127 html_content: HTML content
128 max_length: Maximum length of extracted text
129 include_headings: Whether to include headings
130 include_links: Whether to include link information
132 Returns:
133 Extracted text
134 """
135 # Check if BeautifulSoup is available
136 if not BS4_AVAILABLE:
137 logger.warning("BeautifulSoup not available, falling back to basic HTML removal")
138 # Basic HTML tag removal with regex
139 text = re.sub(r'<[^>]*>', ' ', html_content)
140 text = re.sub(r'\s+', ' ', text).strip()
141 return text[:max_length]
143 # Parse HTML with BeautifulSoup
144 soup = BeautifulSoup(html_content, 'html.parser')
146 # Remove script and style elements
147 for element in soup(["script", "style", "noscript", "iframe", "footer"]):
148 element.extract()
150 # Get all text
151 text = soup.get_text(separator='\n', strip=True)
153 # Add headings with emphasis if requested
154 if include_headings:
155 headings = []
156 for tag in ['h1', 'h2', 'h3', 'h4']:
157 for heading in soup.find_all(tag):
158 heading_text = heading.get_text(strip=True)
159 if heading_text:
160 level = int(tag[1])
161 prefix = "#" * level
162 headings.append(f"{prefix} {heading_text}")
164 if headings:
165 text = '\n\n'.join(headings) + '\n\n' + text
167 # Add link information if requested
168 if include_links:
169 links = []
170 for link in soup.find_all('a', href=True):
171 link_text = link.get_text(strip=True)
172 href = link['href']
173 if link_text and href and not href.startswith('#') and not href.startswith('javascript:'):
174 links.append(f"- [{link_text}]({href})")
176 if links:
177 text += '\n\n### Links:\n' + '\n'.join(links)
179 # Normalize whitespace
180 text = re.sub(r'\n{3,}', '\n\n', text)
182 # Truncate if necessary
183 if len(text) > max_length:
184 text = text[:max_length] + "..."
186 return text
189def summarize_text(text: str, max_length: int = 1000, preserve_sentences: bool = True) -> str:
190 """Summarize text by truncation with sentence preservation.
192 Args:
193 text: Text to summarize
194 max_length: Maximum length of summary
195 preserve_sentences: Whether to preserve complete sentences
197 Returns:
198 Summarized text
199 """
200 if len(text) <= max_length:
201 return text
203 if preserve_sentences:
204 # Split text into sentences
205 sentences = re.split(r'(?<=[.!?])\s+', text)
207 # Add sentences until we reach the max length
208 summary = ""
209 for sentence in sentences:
210 if len(summary) + len(sentence) + 1 <= max_length:
211 summary += sentence + " "
212 else:
213 break
215 return summary.strip()
216 else:
217 # Simple truncation
218 return text[:max_length] + "..."
221def wrap_text_block(text: str, width: int = 80, prefix: str = "") -> str:
222 """Wrap text to a specified width with optional prefix.
224 Args:
225 text: Text to wrap
226 width: Line width
227 prefix: Prefix to add to each line
229 Returns:
230 Wrapped text
231 """
232 wrapper = textwrap.TextWrapper(
233 width=width,
234 initial_indent=prefix,
235 subsequent_indent=prefix
236 )
238 lines = text.split('\n')
239 wrapped_lines = [wrapper.fill(line) if line.strip() else '' for line in lines]
241 return '\n'.join(wrapped_lines)