Coverage for src/mcp_atlassian/confluence.py: 100%
77 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-22 16:34 +0900
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-22 16:34 +0900
1import logging
2import os
4from atlassian import Confluence
6from .config import ConfluenceConfig
7from .document_types import Document
8from .preprocessing import TextPreprocessor
10# Configure logging
11logger = logging.getLogger("mcp-atlassian")
14class ConfluenceFetcher:
15 """Handles fetching and parsing content from Confluence."""
17 def __init__(self):
18 url = os.getenv("CONFLUENCE_URL")
19 username = os.getenv("CONFLUENCE_USERNAME")
20 token = os.getenv("CONFLUENCE_API_TOKEN")
22 if not all([url, username, token]):
23 raise ValueError("Missing required Confluence environment variables")
25 self.config = ConfluenceConfig(url=url, username=username, api_token=token)
26 self.confluence = Confluence(
27 url=self.config.url,
28 username=self.config.username,
29 password=self.config.api_token, # API token is used as password
30 cloud=True,
31 )
32 self.preprocessor = TextPreprocessor(self.config.url, self.confluence)
34 def _process_html_content(self, html_content: str, space_key: str) -> tuple[str, str]:
35 return self.preprocessor.process_html_content(html_content, space_key)
37 def get_spaces(self, start: int = 0, limit: int = 10):
38 """Get all available spaces."""
39 return self.confluence.get_all_spaces(start=start, limit=limit)
41 def get_page_content(self, page_id: str, clean_html: bool = True) -> Document:
42 """Get content of a specific page."""
43 page = self.confluence.get_page_by_id(page_id=page_id, expand="body.storage,version,space")
44 space_key = page.get("space", {}).get("key", "")
46 content = page["body"]["storage"]["value"]
47 processed_html, processed_markdown = self._process_html_content(content, space_key)
49 # Get author information from version
50 version = page.get("version", {})
51 author = version.get("by", {})
53 metadata = {
54 "page_id": page_id,
55 "title": page["title"],
56 "version": version.get("number"),
57 "url": f"{self.config.url}/spaces/{space_key}/pages/{page_id}",
58 "space_key": space_key,
59 "author_name": author.get("displayName"),
60 "space_name": page.get("space", {}).get("name", ""),
61 "last_modified": version.get("when"),
62 }
64 return Document(
65 page_content=processed_markdown if clean_html else processed_html,
66 metadata=metadata,
67 )
69 def get_page_by_title(self, space_key: str, title: str, clean_html: bool = True) -> Document | None:
70 """Get page content by space key and title."""
71 try:
72 page = self.confluence.get_page_by_title(space=space_key, title=title, expand="body.storage,version")
74 if not page:
75 return None
77 content = page["body"]["storage"]["value"]
78 processed_html, processed_markdown = self._process_html_content(content, space_key)
80 metadata = {
81 "page_id": page["id"],
82 "title": page["title"],
83 "version": page.get("version", {}).get("number"),
84 "space_key": space_key,
85 "url": f"{self.config.url}/spaces/{space_key}/pages/{page['id']}",
86 }
88 return Document(
89 page_content=processed_markdown if clean_html else processed_html,
90 metadata=metadata,
91 )
93 except Exception as e:
94 logger.error(f"Error fetching page: {str(e)}")
95 return None
97 def get_space_pages(
98 self, space_key: str, start: int = 0, limit: int = 10, clean_html: bool = True
99 ) -> list[Document]:
100 """Get all pages from a specific space."""
101 pages = self.confluence.get_all_pages_from_space(
102 space=space_key, start=start, limit=limit, expand="body.storage"
103 )
105 documents = []
106 for page in pages:
107 content = page["body"]["storage"]["value"]
108 processed_html, processed_markdown = self._process_html_content(content, space_key)
110 metadata = {
111 "page_id": page["id"],
112 "title": page["title"],
113 "space_key": space_key,
114 "version": page.get("version", {}).get("number"),
115 "url": f"{self.config.url}/spaces/{space_key}/pages/{page['id']}",
116 }
118 documents.append(
119 Document(
120 page_content=processed_markdown if clean_html else processed_html,
121 metadata=metadata,
122 )
123 )
125 return documents
127 def get_page_comments(self, page_id: str, clean_html: bool = True) -> list[Document]:
128 """Get all comments for a specific page."""
129 page = self.confluence.get_page_by_id(page_id=page_id, expand="space")
130 space_key = page.get("space", {}).get("key", "")
131 space_name = page.get("space", {}).get("name", "")
133 comments = self.confluence.get_page_comments(content_id=page_id, expand="body.view.value,version", depth="all")[
134 "results"
135 ]
137 comment_documents = []
138 for comment in comments:
139 body = comment["body"]["view"]["value"]
140 processed_html, processed_markdown = self._process_html_content(body, space_key)
142 # Get author information from version.by instead of author
143 author = comment.get("version", {}).get("by", {})
145 metadata = {
146 "page_id": page_id,
147 "comment_id": comment["id"],
148 "last_modified": comment.get("version", {}).get("when"),
149 "type": "comment",
150 "author_name": author.get("displayName"),
151 "space_key": space_key,
152 "space_name": space_name,
153 }
155 comment_documents.append(
156 Document(
157 page_content=processed_markdown if clean_html else processed_html,
158 metadata=metadata,
159 )
160 )
162 return comment_documents
164 def search(self, cql: str, limit: int = 10) -> list[Document]:
165 """Search content using Confluence Query Language (CQL)."""
166 try:
167 results = self.confluence.cql(cql=cql, limit=limit)
168 documents = []
170 for result in results.get("results", []):
171 content = result.get("content", {})
172 if content.get("type") == "page":
173 metadata = {
174 "page_id": content["id"],
175 "title": result["title"],
176 "space": result.get("resultGlobalContainer", {}).get("title"),
177 "url": f"{self.config.url}{result['url']}",
178 "last_modified": result.get("lastModified"),
179 "type": content["type"],
180 }
182 # Use the excerpt as page_content since it's already a good summary
183 documents.append(Document(page_content=result.get("excerpt", ""), metadata=metadata))
185 return documents
186 except Exception as e:
187 logger.error(f"Search failed with error: {str(e)}")
188 return []