-
-
Notifications
You must be signed in to change notification settings - Fork 18.7k
Description
Code Sample, a copy-pastable example if possible
In [1]: import pandas as pd
...:
...: def check_roundtrip(obj):
...: with pd.HDFStore('test.h5', 'w') as store:
...: store['obj'] = obj
...: retrieved = store['obj']
...: return obj.equals(retrieved)
...:
...: s = pd.Series([], dtype='datetime64[ns, UTC]')
...: t = pd.Series([0], dtype='datetime64[ns, UTC]')
...: df = pd.DataFrame({'A': s})
In [2]: print(check_roundtrip(s))
False
In [3]: print(check_roundtrip(t))
False
In [4]: print(check_roundtrip(df))
------------------------------------------------------------------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-4-57f1e9853bf6> in <module>()
----> 1 print(check_roundtrip(df))
<ipython-input-1-1d7fb84453e9> in check_roundtrip(obj)
4 with pd.HDFStore('test.h5', 'w') as store:
5 store['obj'] = obj
----> 6 retrieved = store['obj']
7 return obj.equals(retrieved)
8
/home/ashieh/.local/lib/python2.7/site-packages/pandas/io/pytables.pyc in __getitem__(self, key)
481
482 def __getitem__(self, key):
--> 483 return self.get(key)
484
485 def __setitem__(self, key, value):
/home/ashieh/.local/lib/python2.7/site-packages/pandas/io/pytables.pyc in get(self, key)
669 if group is None:
670 raise KeyError('No object named %s in the file' % key)
--> 671 return self._read_group(group)
672
673 def select(self, key, where=None, start=None, stop=None, columns=None,
/home/ashieh/.local/lib/python2.7/site-packages/pandas/io/pytables.pyc in _read_group(self, group, **kwargs)
1347 s = self._create_storer(group)
1348 s.infer_axes()
-> 1349 return s.read(**kwargs)
1350
1351
/home/ashieh/.local/lib/python2.7/site-packages/pandas/io/pytables.pyc in read(self, start, stop, **kwargs)
2902 blk_items = self.read_index('block%d_items' % i)
2903 values = self.read_array('block%d_values' % i,
-> 2904 start=_start, stop=_stop)
2905 blk = make_block(values,
2906 placement=items.get_indexer(blk_items))
/home/ashieh/.local/lib/python2.7/site-packages/pandas/io/pytables.pyc in read_array(self, key, start, stop)
2464 if shape is not None:
2465 # length 0 axis
-> 2466 ret = np.empty(shape, dtype=dtype)
2467 else:
2468 ret = node[start:stop]
TypeError: Invalid datetime unit in metadata string "[ns, UTC]"
Problem description
HDFStore fails to save empty/non-empty Series and empty DataFrames with timezone-aware data correctly. The issue is that the timezone information is not being saved correctly. For Series, there is no timezone handling at all. For DataFrames, the timezone handling is skipped when empty.
Expected Output
The checks should pass.
Output of pd.show_versions()
INSTALLED VERSIONS
commit: None
python: 2.7.12.final.0
python-bits: 64
OS: Linux
OS-release: 4.4.0-1049-aws
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
LOCALE: None.None
pandas: 0.22.0
pytest: 3.5.0
pip: 9.0.3
setuptools: 39.0.1
Cython: 0.28.1
numpy: 1.14.2
scipy: 1.0.1
pyarrow: 0.9.0
xarray: None
IPython: 5.6.0
sphinx: None
patsy: None
dateutil: 2.7.2
pytz: 2018.3
blosc: None
bottleneck: None
tables: 3.4.2
numexpr: 2.6.4
feather: None
matplotlib: 2.2.2
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: None
bs4: None
html5lib: 1.0.1
sqlalchemy: None
pymysql: 0.8.0
psycopg2: None
jinja2: 2.10
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: None