pandasで以下のようにxlsファイルを読み込もうとしたら、以下のようなエラーが返ってきた。
pd.read_excel('ファイル.xls')
--------------------------------------------------------------------------- XLRDError Traceback (most recent call last)in () ----> 1 pd.read_excel('ファイル.xls') F:\environment\Anaconda3\envs\netops\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs) 294 ) 295 warnings.warn(msg, FutureWarning, stacklevel=stacklevel) --> 296 return func(*args, **kwargs) 297 298 return wrapper F:\environment\Anaconda3\envs\netops\lib\site-packages\pandas\io\excel\_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols) 302 303 if not isinstance(io, ExcelFile): --> 304 io = ExcelFile(io, engine=engine) 305 elif engine and engine != io.engine: 306 raise ValueError( F:\environment\Anaconda3\envs\netops\lib\site-packages\pandas\io\excel\_base.py in __init__(self, path_or_buffer, engine) 865 self._io = stringify_path(path_or_buffer) 866 --> 867 self._reader = self._engines[engine](self._io) 868 869 def __fspath__(self): F:\environment\Anaconda3\envs\netops\lib\site-packages\pandas\io\excel\_xlrd.py in __init__(self, filepath_or_buffer) 20 err_msg = "Install xlrd >= 1.0.0 for Excel support" 21 import_optional_dependency("xlrd", extra=err_msg) ---> 22 super().__init__(filepath_or_buffer) 23 24 @property F:\environment\Anaconda3\envs\netops\lib\site-packages\pandas\io\excel\_base.py in __init__(self, filepath_or_buffer) 351 self.book = self.load_workbook(filepath_or_buffer) 352 elif isinstance(filepath_or_buffer, str): --> 353 self.book = self.load_workbook(filepath_or_buffer) 354 elif isinstance(filepath_or_buffer, bytes): 355 self.book = self.load_workbook(BytesIO(filepath_or_buffer)) F:\environment\Anaconda3\envs\netops\lib\site-packages\pandas\io\excel\_xlrd.py in load_workbook(self, filepath_or_buffer) 35 return open_workbook(file_contents=data) 36 else: ---> 37 return open_workbook(filepath_or_buffer) 38 39 @property F:\environment\Anaconda3\envs\netops\lib\site-packages\xlrd\__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows) 155 formatting_info=formatting_info, 156 on_demand=on_demand, --> 157 ragged_rows=ragged_rows, 158 ) 159 return bk F:\environment\Anaconda3\envs\netops\lib\site-packages\xlrd\book.py in open_workbook_xls(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows) 90 t1 = perf_counter() 91 bk.load_time_stage_1 = t1 - t0 ---> 92 biff_version = bk.getbof(XL_WORKBOOK_GLOBALS) 93 if not biff_version: 94 raise XLRDError("Can't determine file's BIFF version") F:\environment\Anaconda3\envs\netops\lib\site-packages\xlrd\book.py in getbof(self, rqd_stream) 1276 bof_error('Expected BOF record; met end of file') 1277 if opcode not in bofcodes: -> 1278 bof_error('Expected BOF record; found %r' % self.mem[savpos:savpos+8]) 1279 length = self.get2bytes() 1280 if length == MY_EOF: F:\environment\Anaconda3\envs\netops\lib\site-packages\xlrd\book.py in bof_error(msg) 1270 1271 def bof_error(msg): -> 1272 raise XLRDError('Unsupported format, or corrupt file: ' + msg) 1273 savpos = self._position 1274 opcode = self.get2bytes() XLRDError: Unsupported format, or corrupt file: Expected BOF record; found b'<meta ht'
よく分からないけど、タグ的な何かを読み込もうとして失敗している。
ので、ちょっと調べてみたら、read_htmlで代用できるかもというのを見つけて、
pd.read_html('ファイル.xls')
で読み込んだらリストが返却されて、その内の2つ目の要素にデータフレームがそのまま入ってきた。
解決。
コメント