Coverage for src/mcp_atlassian/confluence.py: 77%

121 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-03-08 18:10 +0900

1import logging 

2import os 

3 

4from atlassian import Confluence 

5 

6from .config import ConfluenceConfig 

7from .document_types import Document 

8from .preprocessing import TextPreprocessor 

9 

10# Configure logging 

11logger = logging.getLogger("mcp-atlassian") 

12 

13 

14class ConfluenceFetcher: 

15 """Handles fetching and parsing content from Confluence.""" 

16 

17 def __init__(self): 

18 url = os.getenv("CONFLUENCE_URL") 

19 username = os.getenv("CONFLUENCE_USERNAME") 

20 token = os.getenv("CONFLUENCE_API_TOKEN") 

21 

22 if not all([url, username, token]): 

23 raise ValueError("Missing required Confluence environment variables") 

24 

25 self.config = ConfluenceConfig(url=url, username=username, api_token=token) 

26 self.confluence = Confluence( 

27 url=self.config.url, 

28 username=self.config.username, 

29 password=self.config.api_token, # API token is used as password 

30 cloud=True, 

31 ) 

32 self.preprocessor = TextPreprocessor(self.config.url, self.confluence) 

33 

34 def _process_html_content(self, html_content: str, space_key: str) -> tuple[str, str]: 

35 return self.preprocessor.process_html_content(html_content, space_key) 

36 

37 def get_spaces(self, start: int = 0, limit: int = 10): 

38 """Get all available spaces.""" 

39 return self.confluence.get_all_spaces(start=start, limit=limit) 

40 

41 def get_page_content(self, page_id: str, clean_html: bool = True) -> Document: 

42 """Get content of a specific page.""" 

43 page = self.confluence.get_page_by_id(page_id=page_id, expand="body.storage,version,space") 

44 space_key = page.get("space", {}).get("key", "") 

45 

46 content = page["body"]["storage"]["value"] 

47 processed_html, processed_markdown = self._process_html_content(content, space_key) 

48 

49 # Get author information from version 

50 version = page.get("version", {}) 

51 author = version.get("by", {}) 

52 

53 metadata = { 

54 "page_id": page_id, 

55 "title": page["title"], 

56 "version": version.get("number"), 

57 "url": f"{self.config.url}/spaces/{space_key}/pages/{page_id}", 

58 "space_key": space_key, 

59 "author_name": author.get("displayName"), 

60 "space_name": page.get("space", {}).get("name", ""), 

61 "last_modified": version.get("when"), 

62 } 

63 

64 return Document( 

65 page_content=processed_markdown if clean_html else processed_html, 

66 metadata=metadata, 

67 ) 

68 

69 def get_page_by_title(self, space_key: str, title: str, clean_html: bool = True) -> Document | None: 

70 """Get page content by space key and title.""" 

71 try: 

72 page = self.confluence.get_page_by_title(space=space_key, title=title, expand="body.storage,version") 

73 

74 if not page: 

75 return None 

76 

77 content = page["body"]["storage"]["value"] 

78 processed_html, processed_markdown = self._process_html_content(content, space_key) 

79 

80 metadata = { 

81 "page_id": page["id"], 

82 "title": page["title"], 

83 "version": page.get("version", {}).get("number"), 

84 "space_key": space_key, 

85 "url": f"{self.config.url}/spaces/{space_key}/pages/{page['id']}", 

86 } 

87 

88 return Document( 

89 page_content=processed_markdown if clean_html else processed_html, 

90 metadata=metadata, 

91 ) 

92 

93 except Exception as e: 

94 logger.error(f"Error fetching page: {str(e)}") 

95 return None 

96 

97 def get_space_pages( 

98 self, space_key: str, start: int = 0, limit: int = 10, clean_html: bool = True 

99 ) -> list[Document]: 

100 """Get all pages from a specific space.""" 

101 pages = self.confluence.get_all_pages_from_space( 

102 space=space_key, start=start, limit=limit, expand="body.storage" 

103 ) 

104 

105 documents = [] 

106 for page in pages: 

107 content = page["body"]["storage"]["value"] 

108 processed_html, processed_markdown = self._process_html_content(content, space_key) 

109 

110 metadata = { 

111 "page_id": page["id"], 

112 "title": page["title"], 

113 "space_key": space_key, 

114 "version": page.get("version", {}).get("number"), 

115 "url": f"{self.config.url}/spaces/{space_key}/pages/{page['id']}", 

116 } 

117 

118 documents.append( 

119 Document( 

120 page_content=processed_markdown if clean_html else processed_html, 

121 metadata=metadata, 

122 ) 

123 ) 

124 

125 return documents 

126 

127 def get_page_comments(self, page_id: str, clean_html: bool = True) -> list[Document]: 

128 """Get all comments for a specific page.""" 

129 page = self.confluence.get_page_by_id(page_id=page_id, expand="space") 

130 space_key = page.get("space", {}).get("key", "") 

131 space_name = page.get("space", {}).get("name", "") 

132 

133 comments = self.confluence.get_page_comments(content_id=page_id, expand="body.view.value,version", depth="all")[ 

134 "results" 

135 ] 

136 

137 comment_documents = [] 

138 for comment in comments: 

139 body = comment["body"]["view"]["value"] 

140 processed_html, processed_markdown = self._process_html_content(body, space_key) 

141 

142 # Get author information from version.by instead of author 

143 author = comment.get("version", {}).get("by", {}) 

144 

145 metadata = { 

146 "page_id": page_id, 

147 "comment_id": comment["id"], 

148 "last_modified": comment.get("version", {}).get("when"), 

149 "type": "comment", 

150 "author_name": author.get("displayName"), 

151 "space_key": space_key, 

152 "space_name": space_name, 

153 } 

154 

155 comment_documents.append( 

156 Document( 

157 page_content=processed_markdown if clean_html else processed_html, 

158 metadata=metadata, 

159 ) 

160 ) 

161 

162 return comment_documents 

163 

164 def search(self, cql: str, limit: int = 10) -> list[Document]: 

165 """Search content using Confluence Query Language (CQL).""" 

166 try: 

167 results = self.confluence.cql(cql=cql, limit=limit) 

168 documents = [] 

169 

170 for result in results.get("results", []): 

171 content = result.get("content", {}) 

172 if content.get("type") == "page": 

173 metadata = { 

174 "page_id": content["id"], 

175 "title": result["title"], 

176 "space": result.get("resultGlobalContainer", {}).get("title"), 

177 "url": f"{self.config.url}{result['url']}", 

178 "last_modified": result.get("lastModified"), 

179 "type": content["type"], 

180 } 

181 

182 # Use the excerpt as page_content since it's already a good summary 

183 documents.append(Document(page_content=result.get("excerpt", ""), metadata=metadata)) 

184 

185 return documents 

186 except Exception as e: 

187 logger.error(f"Search failed with error: {str(e)}") 

188 return [] 

189 

190 def create_page(self, space_key: str, title: str, body: str, parent_id: str = None) -> Document: 

191 """ 

192 Create a new page in a Confluence space. 

193 

194 Args: 

195 space_key: The key of the space 

196 title: The title of the page 

197 body: The content of the page in storage format (HTML) 

198 parent_id: Optional parent page ID 

199 

200 Returns: 

201 Document representing the newly created page 

202 """ 

203 try: 

204 # Create the page 

205 page = self.confluence.create_page( 

206 space=space_key, title=title, body=body, parent_id=parent_id, representation="storage" 

207 ) 

208 

209 # Return the created page as a Document 

210 return self.get_page_content(page["id"]) 

211 except Exception as e: 

212 logger.error(f"Error creating page in space {space_key}: {str(e)}") 

213 raise 

214 

215 def update_page( 

216 self, page_id: str, title: str, body: str, minor_edit: bool = False, version_comment: str = "" 

217 ) -> Document: 

218 """ 

219 Update an existing Confluence page. 

220 

221 Args: 

222 page_id: The ID of the page to update 

223 title: The new title of the page 

224 body: The new content of the page in storage format (HTML) 

225 minor_edit: Whether this is a minor edit 

226 version_comment: Optional comment for this version 

227 

228 Returns: 

229 Document representing the updated page 

230 """ 

231 try: 

232 # Get the current page to get its version number 

233 current_page = self.confluence.get_page_by_id(page_id=page_id) 

234 

235 # Update the page 

236 self.confluence.update_page( 

237 page_id=page_id, title=title, body=body, minor_edit=minor_edit, version_comment=version_comment 

238 ) 

239 

240 # Return the updated page as a Document 

241 return self.get_page_content(page_id) 

242 except Exception as e: 

243 logger.error(f"Error updating page {page_id}: {str(e)}") 

244 raise 

245 

246 def get_user_contributed_spaces(self, limit: int = 250) -> dict: 

247 """ 

248 Get spaces the current user has contributed to. 

249 

250 Args: 

251 limit: Maximum number of results to return 

252 

253 Returns: 

254 Dictionary of space keys to space information 

255 """ 

256 try: 

257 # Use CQL to find content the user has contributed to 

258 cql = "contributor = currentUser() order by lastmodified DESC" 

259 results = self.confluence.cql(cql=cql, limit=limit) 

260 

261 # Extract and deduplicate spaces 

262 spaces = {} 

263 for result in results.get("results", []): 

264 space_key = None 

265 space_name = None 

266 

267 # Try to extract space from container 

268 if "resultGlobalContainer" in result: 

269 container = result.get("resultGlobalContainer", {}) 

270 space_name = container.get("title") 

271 display_url = container.get("displayUrl", "") 

272 if display_url and "/spaces/" in display_url: 

273 space_key = display_url.split("/spaces/")[1].split("/")[0] 

274 

275 # Try to extract from content expandable 

276 if not space_key and "content" in result and "_expandable" in result["content"]: 

277 expandable = result["content"].get("_expandable", {}) 

278 space_path = expandable.get("space", "") 

279 if space_path and space_path.startswith("/rest/api/space/"): 

280 space_key = space_path.split("/rest/api/space/")[1] 

281 

282 # Try to extract from URL 

283 if not space_key and "url" in result: 

284 url = result.get("url", "") 

285 if url and url.startswith("/spaces/"): 

286 space_key = url.split("/spaces/")[1].split("/")[0] 

287 

288 # If we found a space key, add it to our dictionary 

289 if space_key and space_key not in spaces: 

290 spaces[space_key] = {"key": space_key, "name": space_name or space_key, "description": ""} 

291 

292 return spaces 

293 except Exception as e: 

294 logger.error(f"Error getting user contributed spaces: {str(e)}") 

295 return {}