Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python 

2 

3""" 

4camcops_server/cc_modules/cc_tsv.py 

5 

6=============================================================================== 

7 

8 Copyright (C) 2012-2020 Rudolf Cardinal (rudolf@pobox.com). 

9 

10 This file is part of CamCOPS. 

11 

12 CamCOPS is free software: you can redistribute it and/or modify 

13 it under the terms of the GNU General Public License as published by 

14 the Free Software Foundation, either version 3 of the License, or 

15 (at your option) any later version. 

16 

17 CamCOPS is distributed in the hope that it will be useful, 

18 but WITHOUT ANY WARRANTY; without even the implied warranty of 

19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

20 GNU General Public License for more details. 

21 

22 You should have received a copy of the GNU General Public License 

23 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>. 

24 

25=============================================================================== 

26 

27**Helper functions/classes for spreadsheet-style tab-separated value (TSV) 

28exports.** 

29 

30""" 

31 

32from collections import OrderedDict 

33import csv 

34import io 

35import logging 

36import os 

37import random 

38import re 

39from typing import (Any, BinaryIO, Callable, Dict, Iterable, List, Optional, 

40 Sequence, Union) 

41import zipfile 

42 

43from cardinal_pythonlib.datetimefunc import ( 

44 format_datetime, 

45 get_now_localtz_pendulum, 

46) 

47from cardinal_pythonlib.excel import ( 

48 convert_for_openpyxl, 

49 convert_for_pyexcel_ods3, 

50) 

51from cardinal_pythonlib.logs import BraceStyleAdapter 

52from sqlalchemy.engine.result import ResultProxy 

53 

54from camcops_server.cc_modules.cc_constants import DateFormat 

55 

56ODS_VIA_PYEXCEL = True # significantly faster 

57XLSX_VIA_PYEXCEL = True 

58 

59if ODS_VIA_PYEXCEL: 

60 import pyexcel_ods3 # e.g. pip install pyexcel-ods3==0.5.3 

61 ODSWriter = ODSSheet = None 

62else: 

63 from odswriter import ODSWriter, Sheet as ODSSheet # noqa 

64 pyexcel_ods3 = None 

65 

66if XLSX_VIA_PYEXCEL: 

67 import pyexcel_xlsx # e.g. pip install pyexcel-xlsx==0.5.7 

68 openpyxl = XLWorkbook = XLWorksheet = None 

69else: 

70 from openpyxl.workbook.workbook import Workbook as XLWorkbook 

71 from openpyxl.worksheet.worksheet import Worksheet as XLWorksheet 

72 pyexcel_xlsx = None 

73 

74log = BraceStyleAdapter(logging.getLogger(__name__)) 

75 

76 

77# ============================================================================= 

78# TSV output holding structures 

79# ============================================================================= 

80 

81class TsvPage(object): 

82 """ 

83 Represents a single TSV "spreadsheet". 

84 """ 

85 def __init__(self, name: str, 

86 rows: List[Union[Dict[str, Any], OrderedDict]]) -> None: 

87 """ 

88 Args: 

89 name: name for the whole sheet 

90 rows: list of rows, where each row is a dictionary mapping 

91 column name to value 

92 """ 

93 assert name, "Missing name" 

94 self.name = name 

95 self.rows = rows 

96 self.headings = [] # type: List[str] 

97 for row in rows: 

98 self._add_headings_if_absent(row.keys()) 

99 

100 def __str__(self) -> str: 

101 return f"TsvPage: name={self.name}\n{self.get_tsv()}" 

102 

103 @classmethod 

104 def from_headings_rows(cls, name: str, headings: List[str], 

105 rows: List[Sequence[Any]]) -> "TsvPage": 

106 """ 

107 Creates a TsvPage object using a list of headings and the row data 

108 as a list of lists. 

109 """ 

110 page = cls(name=name, rows=[]) 

111 n_cols = len(headings) 

112 page.headings = headings 

113 for row in rows: 

114 assert len(row) == n_cols 

115 page.rows.append(dict(zip(headings, row))) 

116 return page 

117 

118 @classmethod 

119 def from_resultproxy(cls, name: str, rp: ResultProxy) -> "TsvPage": 

120 """ 

121 Creates a TsvPage object from an SQLAlchemy ResultProxy. 

122 

123 Args: 

124 rp: 

125 A :class:` sqlalchemy.engine.result.ResultProxy`. 

126 name: 

127 Name for this sheet. 

128 """ 

129 column_names = rp.keys() 

130 rows = rp.fetchall() 

131 return cls.from_headings_rows( 

132 name=name, headings=column_names, rows=rows) 

133 

134 @property 

135 def empty(self) -> bool: 

136 """ 

137 Do we have zero rows? 

138 """ 

139 return len(self.rows) == 0 

140 

141 def _add_headings_if_absent(self, headings: Iterable[str]) -> None: 

142 """ 

143 Add any headings we've not yet seen to our list of headings. 

144 """ 

145 for h in headings: 

146 if h not in self.headings: 

147 self.headings.append(h) 

148 

149 def add_or_set_value(self, heading: str, value: Any) -> None: 

150 """ 

151 If we contain only a single row, this function will set the value 

152 for a given column (``heading``) to ``value``. 

153 

154 Raises: 

155 :exc:`AssertionError` if we don't have exactly 1 row 

156 """ 

157 assert len(self.rows) == 1, "add_value can only be used if #rows == 1" 

158 self._add_headings_if_absent([heading]) 

159 self.rows[0][heading] = value 

160 

161 def add_or_set_column(self, heading: str, values: List[Any]) -> None: 

162 """ 

163 Set the column labelled ``heading`` so it contains the values specified 

164 in ``values``. The length of ``values`` must equal the number of rows 

165 that we already contain. 

166 

167 Raises: 

168 :exc:`AssertionError` if the number of values doesn't match 

169 the number of existing rows 

170 """ 

171 assert len(values) == len(self.rows), "#values != #existing rows" 

172 self._add_headings_if_absent([heading]) 

173 for i, row in enumerate(self.rows): 

174 row[heading] = values[i] 

175 

176 def add_or_set_columns_from_page(self, other: "TsvPage") -> None: 

177 """ 

178 This function presupposes that ``self`` and ``other`` are two pages 

179 ("spreadsheets") with *matching* rows. 

180 

181 It updates values or creates columns in ``self`` such that the values 

182 from all columns in ``other`` are written to the corresponding rows of 

183 ``self``. 

184 

185 Raises: 

186 :exc:`AssertionError` if the two pages (sheets) don't have 

187 the same number of rows. 

188 """ 

189 assert len(self.rows) == len(other.rows), "Mismatched #rows" 

190 self._add_headings_if_absent(other.headings) 

191 for i, row in enumerate(self.rows): 

192 for k, v in other.rows[i].items(): 

193 row[k] = v 

194 

195 def add_rows_from_page(self, other: "TsvPage") -> None: 

196 """ 

197 Add all rows from ``other`` to ``self``. 

198 """ 

199 self._add_headings_if_absent(other.headings) 

200 self.rows.extend(other.rows) 

201 

202 def sort_headings(self) -> None: 

203 """ 

204 Sort our headings internally. 

205 """ 

206 self.headings.sort() 

207 

208 @property 

209 def plainrows(self) -> List[List[Any]]: 

210 """ 

211 Returns a list of rows, where each row is a list of values. 

212 Does not include a "header" row. 

213 

214 Compare :attr:`rows`, which is a list of dictionaries. 

215 """ 

216 rows = [] 

217 for row in self.rows: 

218 rows.append([row.get(h) for h in self.headings]) 

219 return rows 

220 

221 def spreadsheetrows(self, converter: Callable[[Any], Any]) \ 

222 -> List[List[Any]]: 

223 """ 

224 Like :meth:`plainrows`, but (a) ensures every cell is converted to a 

225 value that can be sent to a spreadsheet converted (e.g. ODS, XLSX), and 

226 (b) includes a header row. 

227 """ 

228 rows = [self.headings.copy()] 

229 for row in self.rows: 

230 rows.append([converter(row.get(h)) 

231 for h in self.headings]) 

232 return rows 

233 

234 def get_tsv(self, dialect: str = "excel-tab") -> str: 

235 r""" 

236 Returns the entire page (sheet) as TSV: one header row and then 

237 lots of data rows. 

238 

239 For the dialect, see 

240 https://docs.python.org/3/library/csv.html#csv.excel_tab. 

241 

242 For CSV files, see RGC 4180: https://tools.ietf.org/html/rfc4180. 

243 

244 For TSV files, see 

245 https://www.iana.org/assignments/media-types/text/tab-separated-values. 

246 

247 Test code: 

248 

249 .. code-block:: python 

250 

251 import io 

252 import csv 

253 from typing import List 

254 

255 def test(row: List[str], dialect: str = "excel-tab") -> str: 

256 f = io.StringIO() 

257 writer = csv.writer(f, dialect=dialect) 

258 writer.writerow(row) 

259 return f.getvalue() 

260 

261 test(["hello", "world"]) 

262 test(["hello\ttab", "world"]) # actual tab within double quotes 

263 test(["hello\nnewline", "world"]) # actual newline within double quotes 

264 test(['hello"doublequote', "world"]) # doubled double quote within double quotes 

265 

266 """ # noqa 

267 f = io.StringIO() 

268 writer = csv.writer(f, dialect=dialect) 

269 writer.writerow(self.headings) 

270 for row in self.rows: 

271 writer.writerow([row.get(h) for h in self.headings]) 

272 return f.getvalue() 

273 

274 def write_to_openpyxl_xlsx_worksheet(self, ws: "XLWorksheet") -> None: 

275 """ 

276 Writes data from this page to an existing ``openpyxl`` XLSX worksheet. 

277 """ 

278 ws.append(self.headings) 

279 for row in self.rows: 

280 ws.append([convert_for_openpyxl(row.get(h)) 

281 for h in self.headings]) 

282 

283 def write_to_odswriter_ods_worksheet(self, ws: "ODSSheet") -> None: 

284 """ 

285 Writes data from this page to an existing ``odswriter`` ODS sheet. 

286 """ 

287 # noinspection PyUnresolvedReferences 

288 ws.writerow(self.headings) 

289 for row in self.rows: 

290 # noinspection PyUnresolvedReferences 

291 ws.writerow([row.get(h) for h in self.headings]) 

292 

293 def r_object_name(self) -> str: 

294 """ 

295 Name of the object when imported into R. 

296 The main thing: no leading underscores. 

297 """ 

298 n = self.name 

299 n = n[1:] if n.startswith("_") else n 

300 return f"camcops_{n}" # less chance of conflict within R 

301 

302 def r_data_table_definition(self) -> str: 

303 """ 

304 Returns a string to define this object as a ``data.table`` in R. 

305 

306 See also: 

307 

308 - https://stackoverflow.com/questions/32103639/read-csv-file-in-r-with-double-quotes 

309 """ # noqa 

310 object_name = self.r_object_name() 

311 csv_text = self.get_tsv(dialect="excel") 

312 csv_text = csv_text.replace('"', r'\"') 

313 definition = ( 

314 f'data.table::fread(sep=",", header=TRUE, text="{csv_text}"\n)' 

315 ) 

316 return f"{object_name} <- {definition}" 

317 

318 

319class TsvCollection(object): 

320 """ 

321 A collection of :class:`camcops_server.cc_modules.cc_tsv.TsvPage` pages 

322 (spreadsheets), like an Excel workbook. 

323 """ 

324 def __init__(self) -> None: 

325 self.pages = [] # type: List[TsvPage] 

326 

327 def __str__(self) -> str: 

328 return ( 

329 "TsvCollection:\n" + 

330 "\n\n".join(page.get_tsv() for page in self.pages) 

331 ) 

332 

333 # ------------------------------------------------------------------------- 

334 # Pages 

335 # ------------------------------------------------------------------------- 

336 

337 def page_with_name(self, page_name: str) -> Optional[TsvPage]: 

338 """ 

339 Returns the page with the specific name, or ``None`` if no such 

340 page exists. 

341 """ 

342 return next((page for page in self.pages if page.name == page_name), 

343 None) 

344 

345 def add_page(self, page: TsvPage) -> None: 

346 """ 

347 Adds a new page to our collection. If the new page has the same name 

348 as an existing page, rows from the new page are added to the existing 

349 page. Does nothing if the new page is empty. 

350 """ 

351 if page.empty: 

352 return 

353 existing_page = self.page_with_name(page.name) 

354 if existing_page: 

355 # Blend with existing page 

356 existing_page.add_rows_from_page(page) 

357 else: 

358 # New page 

359 self.pages.append(page) 

360 

361 def add_pages(self, pages: List[TsvPage]) -> None: 

362 """ 

363 Adds all ``pages`` to our collection, via :func:`add_page`. 

364 """ 

365 for page in pages: 

366 self.add_page(page) 

367 

368 def sort_headings_within_all_pages(self) -> None: 

369 """ 

370 Sort headings within each of our pages. 

371 """ 

372 for page in self.pages: 

373 page.sort_headings() 

374 

375 def sort_pages(self) -> None: 

376 """ 

377 Sort our pages by their page name. 

378 """ 

379 self.pages.sort(key=lambda p: p.name) 

380 

381 def get_page_names(self) -> List[str]: 

382 """ 

383 Return a list of the names of all our pages. 

384 """ 

385 return [p.name for p in self.pages] 

386 

387 # ------------------------------------------------------------------------- 

388 # TSV 

389 # ------------------------------------------------------------------------- 

390 

391 def get_tsv_file(self, page_name: str) -> str: 

392 """ 

393 Returns a TSV file for a named page. 

394 

395 Raises: 

396 :exc:`AssertionError` if the named page does not exist 

397 

398 """ 

399 page = self.page_with_name(page_name) 

400 assert page is not None, f"No such page with name {page_name}" 

401 return page.get_tsv() 

402 

403 # ------------------------------------------------------------------------- 

404 # ZIP of TSVs 

405 # ------------------------------------------------------------------------- 

406 

407 def write_zip(self, 

408 file: Union[str, BinaryIO], 

409 encoding: str = "utf-8", 

410 compression: int = zipfile.ZIP_DEFLATED) -> None: 

411 """ 

412 Writes data to a file, as a ZIP file of TSV files. 

413 

414 Args: 

415 file: filename or file-like object 

416 encoding: encoding to use when writing the TSV files 

417 compression: compression method to use 

418 

419 Choice of compression method: see 

420 

421 - https://docs.python.org/3/library/zipfile.html 

422 - https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT 

423 - https://en.wikipedia.org/wiki/Zip_(file_format)#Compression_methods 

424 

425 Note also that ``openpyxl`` uses ``ZIP_DEFLATED``, which seems to be 

426 the most portable if not the best compression. 

427 """ 

428 if isinstance(file, str): # it's a filename 

429 with open(file, "wb") as binaryfile: 

430 return self.write_zip(binaryfile, encoding) # recurse once 

431 with zipfile.ZipFile(file, mode="w", compression=compression) as z: 

432 # Write to ZIP. 

433 # If there are no valid task instances, there'll be no TSV; 

434 # that's OK. 

435 for filename_stem in self.get_page_names(): 

436 tsv_filename = filename_stem + ".tsv" 

437 tsv_contents = self.get_tsv_file(page_name=filename_stem) 

438 z.writestr(tsv_filename, tsv_contents.encode(encoding)) 

439 

440 def as_zip(self, encoding: str = "utf-8") -> bytes: 

441 """ 

442 Returns the TSV collection as a ZIP file containing TSV files. 

443 

444 Args: 

445 encoding: encoding to use when writing the TSV files 

446 """ 

447 with io.BytesIO() as memfile: 

448 self.write_zip(memfile, encoding) 

449 zip_contents = memfile.getvalue() 

450 return zip_contents 

451 

452 # ------------------------------------------------------------------------- 

453 # XLSX, ODS 

454 # ------------------------------------------------------------------------- 

455 

456 def write_xlsx(self, file: Union[str, BinaryIO]) -> None: 

457 """ 

458 Write the contents in XLSX (Excel) format to a file. 

459 

460 Args: 

461 file: filename or file-like object 

462 """ 

463 if XLSX_VIA_PYEXCEL: # use pyexcel_xlsx 

464 data = self._get_pyexcel_data(convert_for_openpyxl) 

465 pyexcel_xlsx.save_data(file, data) 

466 else: # use openpyxl 

467 # Marginal performance gain with write_only. Does not automatically 

468 # add a blank sheet 

469 wb = XLWorkbook(write_only=True) 

470 valid_name_dict = self.get_pages_with_valid_sheet_names() 

471 for page, title in valid_name_dict.items(): 

472 ws = wb.create_sheet(title=title) 

473 page.write_to_openpyxl_xlsx_worksheet(ws) 

474 wb.save(file) 

475 

476 def as_xlsx(self) -> bytes: 

477 """ 

478 Returns the TSV collection as an XLSX (Excel) file. 

479 """ 

480 with io.BytesIO() as memfile: 

481 self.write_xlsx(memfile) 

482 contents = memfile.getvalue() 

483 return contents 

484 

485 @staticmethod 

486 def get_sheet_title(page: TsvPage) -> str: 

487 r""" 

488 Returns a worksheet name for a :class:`TsvPage`. 

489 

490 See ``openpyxl/workbook/child.py``. 

491 

492 - Excel prohibits ``\``, ``*``, ``?``, ``:``, ``/``, ``[``, ``]`` 

493 - LibreOffice also prohibits ``'`` as first or last character but let's 

494 just replace that globally. 

495 """ 

496 title = re.sub(r"[\\*?:/\[\]']", "_", page.name) 

497 

498 if len(title) > 31: 

499 title = f"{title[:28]}..." 

500 

501 return title 

502 

503 def _get_pyexcel_data(self, converter: Callable[[Any], Any]) \ 

504 -> Dict[str, List[List[Any]]]: 

505 """ 

506 Returns data in the format expected by ``pyexcel``, which is an ordered 

507 dictionary mapping sheet names to a list of rows, where each row is a 

508 list of cell values. 

509 """ 

510 data = OrderedDict() 

511 for page in self.pages: 

512 data[self.get_sheet_title(page)] = page.spreadsheetrows(converter) 

513 return data 

514 

515 def write_ods(self, file: Union[str, BinaryIO]) -> None: 

516 """ 

517 Writes an ODS (OpenOffice spreadsheet document) to a file. 

518 

519 Args: 

520 file: filename or file-like object 

521 """ 

522 if ODS_VIA_PYEXCEL: # use pyexcel_ods3 

523 data = self._get_pyexcel_data(convert_for_pyexcel_ods3) 

524 pyexcel_ods3.save_data(file, data) 

525 else: # use odswriter 

526 if isinstance(file, str): # it's a filename 

527 with open(file, "wb") as binaryfile: 

528 return self.write_ods(binaryfile) # recurse once 

529 # noinspection PyCallingNonCallable 

530 with ODSWriter(file) as odsfile: 

531 valid_name_dict = self.get_pages_with_valid_sheet_names() 

532 for page, title in valid_name_dict.items(): 

533 sheet = odsfile.new_sheet(name=title) 

534 page.write_to_odswriter_ods_worksheet(sheet) 

535 

536 def as_ods(self) -> bytes: 

537 """ 

538 Returns the TSV collection as an ODS (OpenOffice spreadsheet document) 

539 file. 

540 """ 

541 with io.BytesIO() as memfile: 

542 self.write_ods(memfile) 

543 contents = memfile.getvalue() 

544 return contents 

545 

546 def get_pages_with_valid_sheet_names(self) -> Dict[TsvPage, str]: 

547 """ 

548 Returns an ordered mapping from :class:`TsvPage` objects to their 

549 sheet names. 

550 """ 

551 name_dict = OrderedDict() 

552 

553 for page in self.pages: 

554 name_dict[page] = self.get_sheet_title(page) 

555 

556 self.make_sheet_names_unique(name_dict) 

557 

558 return name_dict 

559 

560 @staticmethod 

561 def make_sheet_names_unique(name_dict: Dict[TsvPage, str]) -> None: 

562 """ 

563 Modifies (in place) a mapping from :class:`TsvPage` to worksheet names, 

564 such that all page names are unique. 

565 

566 - See also :func:`avoid_duplicate_name` in 

567 ``openpxl/workbook/child.py`` 

568 - We keep the 31 character restriction 

569 """ 

570 unique_names = [] # type: List[str] 

571 

572 for page, name in name_dict.items(): 

573 attempt = 0 

574 

575 while name.lower() in unique_names: 

576 attempt += 1 

577 

578 if attempt > 1000: 

579 # algorithm failure, better to let Excel deal with the 

580 # consequences than get stuck in a loop 

581 log.debug( 

582 f"Failed to generate a unique sheet name from {name}" 

583 ) 

584 break 

585 

586 match = re.search(r'\d+$', name) 

587 count = 0 

588 if match is not None: 

589 count = int(match.group()) 

590 

591 new_suffix = str(count + 1) 

592 name = name[:-len(new_suffix)] + new_suffix 

593 name_dict[page] = name 

594 unique_names.append(name.lower()) 

595 

596 # ------------------------------------------------------------------------- 

597 # R 

598 # ------------------------------------------------------------------------- 

599 

600 def as_r(self) -> str: 

601 """ 

602 Returns data as an R script. 

603 

604 This could be more sophisticated, e.g. creating factors with 

605 appropriate levels (etc.). 

606 """ 

607 now = format_datetime(get_now_localtz_pendulum(), 

608 DateFormat.ISO8601_HUMANIZED_TO_SECONDS_TZ) 

609 table_definition_str = "\n\n".join( 

610 page.r_data_table_definition() 

611 for page in self.pages 

612 ) 

613 script = f"""#!/usr/bin/env Rscript 

614 

615# R script generated by CamCOPS at {now} 

616 

617# ============================================================================= 

618# Libraries 

619# ============================================================================= 

620 

621library(data.table) 

622 

623# ============================================================================= 

624# Data 

625# ============================================================================= 

626 

627{table_definition_str} 

628 

629""" 

630 return script 

631 

632 def write_r(self, filename: str, encoding: str = "utf-8") -> None: 

633 """ 

634 Write the contents in R format to a file. 

635 

636 Args: 

637 filename: filename or file-like object 

638 encoding: encoding to use 

639 """ 

640 with open(filename, "wt", encoding=encoding) as f: 

641 f.write(self.as_r()) 

642 

643 

644def _make_benchmarking_collection(nsheets: int = 100, 

645 nrows: int = 200, 

646 ncols: int = 30, 

647 mindata: int = 0, 

648 maxdata: int = 1000000) -> TsvCollection: 

649 log.info(f"Creating TsvCollection with nsheets={nsheets}, nrows={nrows}, " 

650 f"ncols={ncols}...") 

651 coll = TsvCollection() 

652 for sheetnum in range(1, nsheets + 1): 

653 rows = [ 

654 { 

655 f"c{colnum}": str(random.randint(mindata, maxdata)) 

656 for colnum in range(1, ncols + 1) 

657 } for _ in range(1, nrows + 1) 

658 ] 

659 page = TsvPage(name=f"sheet{sheetnum}", rows=rows) 

660 coll.add_page(page) 

661 log.info("... done.") 

662 return coll 

663 

664 

665def file_size(filename: str) -> int: 

666 """ 

667 Returns a file's size in bytes. 

668 """ 

669 return os.stat(filename).st_size 

670 

671 

672def benchmark_save(xlsx_filename: str = "test.xlsx", 

673 ods_filename: str = "test.ods", 

674 tsv_zip_filename: str = "test.zip", 

675 r_filename: str = "test.R") -> None: 

676 """ 

677 Use with: 

678 

679 .. code-block:: python 

680 

681 from cardinal_pythonlib.logs import main_only_quicksetup_rootlogger 

682 from camcops_server.cc_modules.cc_tsv import benchmark_save 

683 main_only_quicksetup_rootlogger() 

684 benchmark_save() 

685 

686 Args: 

687 xlsx_filename: XLSX file to create 

688 ods_filename: ODS file to create 

689 tsv_zip_filename: TSV ZIP file to create 

690 r_filename: R script to create 

691 

692 Problem in Nov 2019 is that ODS is extremely slow. Rough timings: 

693 

694 - TSV ZIP: about 4.1 Mb, about 0.2 s. Good. 

695 - XLSX (via openpyxl): about 4.6 Mb, 16 seconds. 

696 - XLSX (via pyexcel_xlsx): about 4.6 Mb, 16 seconds. 

697 - ODS (via odswriter): about 53 Mb, 56 seconds. 

698 - ODS (via pyexcel_ods3): about 2.8 Mb, 29 seconds. 

699 """ 

700 coll = _make_benchmarking_collection() 

701 

702 log.info("Writing TSV ZIP...") 

703 coll.write_zip(tsv_zip_filename) 

704 log.info(f"... done. File size {file_size(tsv_zip_filename)}") 

705 

706 log.info("Writing XLSX...") 

707 coll.write_xlsx(xlsx_filename) 

708 log.info(f"... done. File size {file_size(xlsx_filename)}") 

709 

710 log.info("Writing ODS...") 

711 coll.write_ods(ods_filename) 

712 log.info(f"... done. File size {file_size(ods_filename)}") 

713 

714 log.info("Writing R...") 

715 coll.write_r(r_filename) 

716 log.info(f"... done. File size {file_size(r_filename)}")