Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2 pyexcel_io.io 

3 ~~~~~~~~~~~~~~~~~~~ 

4 

5 The io interface to file extensions 

6 

7 :copyright: (c) 2014-2020 by Onni Software Ltd. 

8 :license: New BSD License, see LICENSE for more details 

9""" 

10import os 

11import warnings 

12from types import GeneratorType 

13 

14from pyexcel_io import constants 

15from pyexcel_io.reader import Reader 

16from pyexcel_io.writer import Writer 

17from pyexcel_io.plugins import OLD_READERS, OLD_WRITERS 

18from pyexcel_io._compact import isstream 

19from pyexcel_io.exceptions import ( 

20 NoSupportingPluginFound, 

21 SupportingPluginAvailableButNotInstalled, 

22) 

23 

24 

25def iget_data(afile, file_type=None, **keywords): 

26 """Get data from an excel file source 

27 

28 The data has not gone into memory yet. If you use dedicated partial read 

29 plugins, such as pyexcel-xlsxr, pyexcel-odsr, you will notice 

30 the memory consumption drop when you work with big files. 

31 

32 :param afile: a file name, a file stream or actual content 

33 :param sheet_name: the name of the sheet to be loaded 

34 :param sheet_index: the index of the sheet to be loaded 

35 :param sheets: a list of sheet to be loaded 

36 :param file_type: used only when filename is not a physical file name 

37 :param force_file_type: used only when filename refers to a physical file 

38 and it is intended to open it as forced file type. 

39 :param library: explicitly name a library for use. 

40 e.g. library='pyexcel-ods' 

41 :param auto_detect_float: defaults to True 

42 :param auto_detect_int: defaults to True 

43 :param auto_detect_datetime: defaults to True 

44 :param ignore_infinity: defaults to True 

45 :param ignore_nan_text: various forms of 'NaN', 'nan' are ignored 

46 :param default_float_nan: choose one form of 'NaN', 'nan' 

47 :param pep_0515_off: turn off pep 0515. default to True. 

48 :param keep_trailing_empty_cells: keep trailing columns. default to False 

49 :param keywords: any other library specific parameters 

50 :returns: an ordered dictionary 

51 """ 

52 data, reader = _get_data( 

53 afile, file_type=file_type, streaming=True, **keywords 

54 ) 

55 return data, reader 

56 

57 

58def get_data(afile, file_type=None, streaming=None, **keywords): 

59 """Get data from an excel file source 

60 

61 :param afile: a file name, a file stream or actual content 

62 :param sheet_name: the name of the sheet to be loaded 

63 :param sheet_index: the index of the sheet to be loaded 

64 :param sheets: a list of sheet to be loaded 

65 :param file_type: used only when filename is not a physial file name 

66 :param force_file_type: used only when filename refers to a physical file 

67 and it is intended to open it as forced file type. 

68 :param streaming: toggles the type of returned data. The values of the 

69 returned dictionary remain as generator if it is set 

70 to True. Default is False. 

71 :param library: explicitly name a library for use. 

72 e.g. library='pyexcel-ods' 

73 :param auto_detect_float: defaults to True 

74 :param auto_detect_int: defaults to True 

75 :param auto_detect_datetime: defaults to True 

76 :param ignore_infinity: defaults to True 

77 :param ignore_nan_text: various forms of 'NaN', 'nan' are ignored 

78 :param default_float_nan: choose one form of 'NaN', 'nan' 

79 :param pep_0515_off: turn off pep 0515. default to True. 

80 :param keep_trailing_empty_cells: keep trailing columns. default to False 

81 :param keywords: any other library specific parameters 

82 :returns: an ordered dictionary 

83 """ 

84 if streaming is not None and streaming is True: 

85 warnings.warn("Please use iget_data instead") 

86 data, _ = _get_data( 

87 afile, file_type=file_type, streaming=False, **keywords 

88 ) 

89 return data 

90 

91 

92def _get_data(afile, file_type=None, **keywords): 

93 if isstream(afile): 

94 keywords.update( 

95 dict( 

96 file_stream=afile, 

97 file_type=file_type or constants.FILE_FORMAT_CSV, 

98 ) 

99 ) 

100 else: 

101 if afile is None or file_type is None: 

102 keywords.update(dict(file_name=afile, file_type=file_type)) 

103 else: 

104 keywords.update(dict(file_content=afile, file_type=file_type)) 

105 return load_data(**keywords) 

106 

107 

108def save_data(afile, data, file_type=None, **keywords): 

109 """Save data to an excel file source 

110 

111 Your data must be a dictionary 

112 

113 :param filename: actual file name, a file stream or actual content 

114 :param data: a dictionary but an ordered dictionary is preferred 

115 :param file_type: used only when filename is not a physial file name 

116 :param force_file_type: used only when filename refers to a physical file 

117 and it is intended to open it as forced file type. 

118 :param library: explicitly name a library for use. 

119 e.g. library='pyexcel-ods' 

120 :param keywords: any other parameters that python csv module's 

121 `fmtparams <https://docs.python.org/release/3.1.5/library/csv.html#dialects-and-formatting-parameters>`_ 

122 """ # noqa 

123 to_store = data 

124 

125 is_list = isinstance(data, (list, GeneratorType)) 

126 if is_list: 

127 single_sheet_in_book = True 

128 to_store = {constants.DEFAULT_SHEET_NAME: data} 

129 else: 

130 keys = list(data.keys()) 

131 single_sheet_in_book = len(keys) == 1 

132 

133 no_file_type = isstream(afile) and file_type is None 

134 if no_file_type: 

135 file_type = constants.FILE_FORMAT_CSV 

136 

137 if isstream(afile): 

138 keywords.update(dict(file_stream=afile, file_type=file_type)) 

139 else: 

140 keywords.update(dict(file_name=afile, file_type=file_type)) 

141 keywords["single_sheet_in_book"] = single_sheet_in_book 

142 with get_writer(**keywords) as writer: 

143 writer.write(to_store) 

144 

145 

146def load_data( 

147 file_name=None, 

148 file_content=None, 

149 file_stream=None, 

150 file_type=None, 

151 force_file_type=None, 

152 sheet_name=None, 

153 sheet_index=None, 

154 sheets=None, 

155 library=None, 

156 streaming=False, 

157 **keywords 

158): 

159 """Load data from any supported excel formats 

160 

161 :param filename: actual file name, a file stream or actual content 

162 :param file_type: used only when filename is not a physial file name 

163 :param force_file_type: used only when filename refers to a physical file 

164 and it is intended to open it as forced file type. 

165 :param sheet_name: the name of the sheet to be loaded 

166 :param sheet_index: the index of the sheet to be loaded 

167 :param keywords: any other parameters 

168 """ 

169 result = {} 

170 inputs = [file_name, file_content, file_stream] 

171 number_of_none_inputs = [x for x in inputs if x is not None] 

172 if len(number_of_none_inputs) != 1: 

173 raise IOError(constants.MESSAGE_ERROR_02) 

174 

175 if file_type is None: 

176 if force_file_type: 

177 file_type = force_file_type 

178 else: 

179 try: 

180 file_type = file_name.split(".")[-1] 

181 except AttributeError: 

182 raise Exception(constants.MESSAGE_FILE_NAME_SHOULD_BE_STRING) 

183 

184 try: 

185 reader = OLD_READERS.get_a_plugin(file_type, library) 

186 except (NoSupportingPluginFound, SupportingPluginAvailableButNotInstalled): 

187 reader = Reader(file_type, library) 

188 

189 try: 

190 if file_name: 

191 reader.open(file_name, **keywords) 

192 elif file_content: 

193 reader.open_content(file_content, **keywords) 

194 elif file_stream: 

195 reader.open_stream(file_stream, **keywords) 

196 else: 

197 raise IOError("Unrecognized options") 

198 if sheet_name: 

199 result = reader.read_sheet_by_name(sheet_name) 

200 elif sheet_index is not None: 

201 result = reader.read_sheet_by_index(sheet_index) 

202 elif sheets is not None: 

203 result = reader.read_many(sheets) 

204 else: 

205 result = reader.read_all() 

206 if streaming is False: 

207 for key in result.keys(): 

208 result[key] = list(result[key]) 

209 reader.close() 

210 reader = None 

211 

212 return result, reader 

213 except NoSupportingPluginFound: 

214 if file_name: 

215 if os.path.exists(file_name): 

216 if os.path.isfile(file_name): 

217 raise 

218 else: 

219 raise IOError( 

220 constants.MESSAGE_NOT_FILE_FORMATTER % file_name 

221 ) 

222 else: 

223 raise IOError( 

224 constants.MESSAGE_FILE_DOES_NOT_EXIST % file_name 

225 ) 

226 else: 

227 raise 

228 

229 

230def get_writer( 

231 file_name=None, 

232 file_stream=None, 

233 file_type=None, 

234 library=None, 

235 force_file_type=None, 

236 **keywords 

237): 

238 """find a suitable writer""" 

239 inputs = [file_name, file_stream] 

240 number_of_none_inputs = [x for x in inputs if x is not None] 

241 

242 if len(number_of_none_inputs) != 1: 

243 raise IOError(constants.MESSAGE_ERROR_02) 

244 

245 file_type_given = True 

246 

247 if file_type is None and file_name: 

248 if force_file_type: 

249 file_type = force_file_type 

250 else: 

251 try: 

252 file_type = file_name.split(".")[-1] 

253 except AttributeError: 

254 raise Exception(constants.MESSAGE_FILE_NAME_SHOULD_BE_STRING) 

255 

256 file_type_given = False 

257 

258 try: 

259 writer = OLD_WRITERS.get_a_plugin(file_type, library) 

260 except (NoSupportingPluginFound, SupportingPluginAvailableButNotInstalled): 

261 writer = Writer(file_type, library) 

262 

263 if file_name: 

264 if file_type_given: 

265 writer.open_content(file_name, **keywords) 

266 else: 

267 writer.open(file_name, **keywords) 

268 elif file_stream: 

269 writer.open_stream(file_stream, **keywords) 

270 # else: is resolved by earlier raise statement 

271 return writer 

272 

273 

274# backward compactibility 

275store_data = save_data