Coverage for src/mcp_atlassian/confluence.py: 77%
121 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-08 18:10 +0900
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-08 18:10 +0900
1import logging
2import os
4from atlassian import Confluence
6from .config import ConfluenceConfig
7from .document_types import Document
8from .preprocessing import TextPreprocessor
10# Configure logging
11logger = logging.getLogger("mcp-atlassian")
14class ConfluenceFetcher:
15 """Handles fetching and parsing content from Confluence."""
17 def __init__(self):
18 url = os.getenv("CONFLUENCE_URL")
19 username = os.getenv("CONFLUENCE_USERNAME")
20 token = os.getenv("CONFLUENCE_API_TOKEN")
22 if not all([url, username, token]):
23 raise ValueError("Missing required Confluence environment variables")
25 self.config = ConfluenceConfig(url=url, username=username, api_token=token)
26 self.confluence = Confluence(
27 url=self.config.url,
28 username=self.config.username,
29 password=self.config.api_token, # API token is used as password
30 cloud=True,
31 )
32 self.preprocessor = TextPreprocessor(self.config.url, self.confluence)
34 def _process_html_content(self, html_content: str, space_key: str) -> tuple[str, str]:
35 return self.preprocessor.process_html_content(html_content, space_key)
37 def get_spaces(self, start: int = 0, limit: int = 10):
38 """Get all available spaces."""
39 return self.confluence.get_all_spaces(start=start, limit=limit)
41 def get_page_content(self, page_id: str, clean_html: bool = True) -> Document:
42 """Get content of a specific page."""
43 page = self.confluence.get_page_by_id(page_id=page_id, expand="body.storage,version,space")
44 space_key = page.get("space", {}).get("key", "")
46 content = page["body"]["storage"]["value"]
47 processed_html, processed_markdown = self._process_html_content(content, space_key)
49 # Get author information from version
50 version = page.get("version", {})
51 author = version.get("by", {})
53 metadata = {
54 "page_id": page_id,
55 "title": page["title"],
56 "version": version.get("number"),
57 "url": f"{self.config.url}/spaces/{space_key}/pages/{page_id}",
58 "space_key": space_key,
59 "author_name": author.get("displayName"),
60 "space_name": page.get("space", {}).get("name", ""),
61 "last_modified": version.get("when"),
62 }
64 return Document(
65 page_content=processed_markdown if clean_html else processed_html,
66 metadata=metadata,
67 )
69 def get_page_by_title(self, space_key: str, title: str, clean_html: bool = True) -> Document | None:
70 """Get page content by space key and title."""
71 try:
72 page = self.confluence.get_page_by_title(space=space_key, title=title, expand="body.storage,version")
74 if not page:
75 return None
77 content = page["body"]["storage"]["value"]
78 processed_html, processed_markdown = self._process_html_content(content, space_key)
80 metadata = {
81 "page_id": page["id"],
82 "title": page["title"],
83 "version": page.get("version", {}).get("number"),
84 "space_key": space_key,
85 "url": f"{self.config.url}/spaces/{space_key}/pages/{page['id']}",
86 }
88 return Document(
89 page_content=processed_markdown if clean_html else processed_html,
90 metadata=metadata,
91 )
93 except Exception as e:
94 logger.error(f"Error fetching page: {str(e)}")
95 return None
97 def get_space_pages(
98 self, space_key: str, start: int = 0, limit: int = 10, clean_html: bool = True
99 ) -> list[Document]:
100 """Get all pages from a specific space."""
101 pages = self.confluence.get_all_pages_from_space(
102 space=space_key, start=start, limit=limit, expand="body.storage"
103 )
105 documents = []
106 for page in pages:
107 content = page["body"]["storage"]["value"]
108 processed_html, processed_markdown = self._process_html_content(content, space_key)
110 metadata = {
111 "page_id": page["id"],
112 "title": page["title"],
113 "space_key": space_key,
114 "version": page.get("version", {}).get("number"),
115 "url": f"{self.config.url}/spaces/{space_key}/pages/{page['id']}",
116 }
118 documents.append(
119 Document(
120 page_content=processed_markdown if clean_html else processed_html,
121 metadata=metadata,
122 )
123 )
125 return documents
127 def get_page_comments(self, page_id: str, clean_html: bool = True) -> list[Document]:
128 """Get all comments for a specific page."""
129 page = self.confluence.get_page_by_id(page_id=page_id, expand="space")
130 space_key = page.get("space", {}).get("key", "")
131 space_name = page.get("space", {}).get("name", "")
133 comments = self.confluence.get_page_comments(content_id=page_id, expand="body.view.value,version", depth="all")[
134 "results"
135 ]
137 comment_documents = []
138 for comment in comments:
139 body = comment["body"]["view"]["value"]
140 processed_html, processed_markdown = self._process_html_content(body, space_key)
142 # Get author information from version.by instead of author
143 author = comment.get("version", {}).get("by", {})
145 metadata = {
146 "page_id": page_id,
147 "comment_id": comment["id"],
148 "last_modified": comment.get("version", {}).get("when"),
149 "type": "comment",
150 "author_name": author.get("displayName"),
151 "space_key": space_key,
152 "space_name": space_name,
153 }
155 comment_documents.append(
156 Document(
157 page_content=processed_markdown if clean_html else processed_html,
158 metadata=metadata,
159 )
160 )
162 return comment_documents
164 def search(self, cql: str, limit: int = 10) -> list[Document]:
165 """Search content using Confluence Query Language (CQL)."""
166 try:
167 results = self.confluence.cql(cql=cql, limit=limit)
168 documents = []
170 for result in results.get("results", []):
171 content = result.get("content", {})
172 if content.get("type") == "page":
173 metadata = {
174 "page_id": content["id"],
175 "title": result["title"],
176 "space": result.get("resultGlobalContainer", {}).get("title"),
177 "url": f"{self.config.url}{result['url']}",
178 "last_modified": result.get("lastModified"),
179 "type": content["type"],
180 }
182 # Use the excerpt as page_content since it's already a good summary
183 documents.append(Document(page_content=result.get("excerpt", ""), metadata=metadata))
185 return documents
186 except Exception as e:
187 logger.error(f"Search failed with error: {str(e)}")
188 return []
190 def create_page(self, space_key: str, title: str, body: str, parent_id: str = None) -> Document:
191 """
192 Create a new page in a Confluence space.
194 Args:
195 space_key: The key of the space
196 title: The title of the page
197 body: The content of the page in storage format (HTML)
198 parent_id: Optional parent page ID
200 Returns:
201 Document representing the newly created page
202 """
203 try:
204 # Create the page
205 page = self.confluence.create_page(
206 space=space_key, title=title, body=body, parent_id=parent_id, representation="storage"
207 )
209 # Return the created page as a Document
210 return self.get_page_content(page["id"])
211 except Exception as e:
212 logger.error(f"Error creating page in space {space_key}: {str(e)}")
213 raise
215 def update_page(
216 self, page_id: str, title: str, body: str, minor_edit: bool = False, version_comment: str = ""
217 ) -> Document:
218 """
219 Update an existing Confluence page.
221 Args:
222 page_id: The ID of the page to update
223 title: The new title of the page
224 body: The new content of the page in storage format (HTML)
225 minor_edit: Whether this is a minor edit
226 version_comment: Optional comment for this version
228 Returns:
229 Document representing the updated page
230 """
231 try:
232 # Get the current page to get its version number
233 current_page = self.confluence.get_page_by_id(page_id=page_id)
235 # Update the page
236 self.confluence.update_page(
237 page_id=page_id, title=title, body=body, minor_edit=minor_edit, version_comment=version_comment
238 )
240 # Return the updated page as a Document
241 return self.get_page_content(page_id)
242 except Exception as e:
243 logger.error(f"Error updating page {page_id}: {str(e)}")
244 raise
246 def get_user_contributed_spaces(self, limit: int = 250) -> dict:
247 """
248 Get spaces the current user has contributed to.
250 Args:
251 limit: Maximum number of results to return
253 Returns:
254 Dictionary of space keys to space information
255 """
256 try:
257 # Use CQL to find content the user has contributed to
258 cql = "contributor = currentUser() order by lastmodified DESC"
259 results = self.confluence.cql(cql=cql, limit=limit)
261 # Extract and deduplicate spaces
262 spaces = {}
263 for result in results.get("results", []):
264 space_key = None
265 space_name = None
267 # Try to extract space from container
268 if "resultGlobalContainer" in result:
269 container = result.get("resultGlobalContainer", {})
270 space_name = container.get("title")
271 display_url = container.get("displayUrl", "")
272 if display_url and "/spaces/" in display_url:
273 space_key = display_url.split("/spaces/")[1].split("/")[0]
275 # Try to extract from content expandable
276 if not space_key and "content" in result and "_expandable" in result["content"]:
277 expandable = result["content"].get("_expandable", {})
278 space_path = expandable.get("space", "")
279 if space_path and space_path.startswith("/rest/api/space/"):
280 space_key = space_path.split("/rest/api/space/")[1]
282 # Try to extract from URL
283 if not space_key and "url" in result:
284 url = result.get("url", "")
285 if url and url.startswith("/spaces/"):
286 space_key = url.split("/spaces/")[1].split("/")[0]
288 # If we found a space key, add it to our dictionary
289 if space_key and space_key not in spaces:
290 spaces[space_key] = {"key": space_key, "name": space_name or space_key, "description": ""}
292 return spaces
293 except Exception as e:
294 logger.error(f"Error getting user contributed spaces: {str(e)}")
295 return {}