Commit c1f864f1 authored by Aurélien Campéas's avatar Aurélien Campéas
Browse files

tsio: drop multi-index support

We have no users for this and it always feeled brittle
(even though with quite a bit of test coverage).

Closes #41.
parent 8fb3a5eec0cf
......@@ -140,84 +140,6 @@ Freq: H
assert str(back.index.dtype) == 'datetime64[ns, UTC]'
def test_multi_index_aware(engine, tsh):
ts_multi_aware = genserie(
start=pd.Timestamp(
2017, 10, 28, 23
).tz_localize('UTC').tz_convert('Europe/Paris'),
freq=['15T', '30T', '60T'],
repeat=10,
tz='Europe/Paris',
name='ts_multi_aware',
)
ts_multi_aware.index.rename(['a', 'b', 'c'], inplace=True)
assert_df("""
a b c
2017-10-29 01:00:00+02:00 2017-10-29 01:00:00+02:00 2017-10-29 01:00:00+02:00 0
2017-10-29 01:15:00+02:00 2017-10-29 01:30:00+02:00 2017-10-29 02:00:00+02:00 1
2017-10-29 01:30:00+02:00 2017-10-29 02:00:00+02:00 2017-10-29 02:00:00+01:00 2
2017-10-29 01:45:00+02:00 2017-10-29 02:30:00+02:00 2017-10-29 03:00:00+01:00 3
2017-10-29 02:00:00+02:00 2017-10-29 02:00:00+01:00 2017-10-29 04:00:00+01:00 4
2017-10-29 02:15:00+02:00 2017-10-29 02:30:00+01:00 2017-10-29 05:00:00+01:00 5
2017-10-29 02:30:00+02:00 2017-10-29 03:00:00+01:00 2017-10-29 06:00:00+01:00 6
2017-10-29 02:45:00+02:00 2017-10-29 03:30:00+01:00 2017-10-29 07:00:00+01:00 7
2017-10-29 02:00:00+01:00 2017-10-29 04:00:00+01:00 2017-10-29 08:00:00+01:00 8
2017-10-29 02:15:00+01:00 2017-10-29 04:30:00+01:00 2017-10-29 09:00:00+01:00 9
""", ts_multi_aware)
tsh.insert(engine, ts_multi_aware, 'ts_multi_aware', 'test')
ts_aware = tsh.get(engine, 'ts_multi_aware')
assert_df("""
ts_multi_aware
a b c
2017-10-28 23:00:00+00:00 2017-10-28 23:00:00+00:00 2017-10-28 23:00:00+00:00 0.0
2017-10-28 23:15:00+00:00 2017-10-28 23:30:00+00:00 2017-10-29 00:00:00+00:00 1.0
2017-10-28 23:30:00+00:00 2017-10-29 00:00:00+00:00 2017-10-29 01:00:00+00:00 2.0
2017-10-28 23:45:00+00:00 2017-10-29 00:30:00+00:00 2017-10-29 02:00:00+00:00 3.0
2017-10-29 00:00:00+00:00 2017-10-29 01:00:00+00:00 2017-10-29 03:00:00+00:00 4.0
2017-10-29 00:15:00+00:00 2017-10-29 01:30:00+00:00 2017-10-29 04:00:00+00:00 5.0
2017-10-29 00:30:00+00:00 2017-10-29 02:00:00+00:00 2017-10-29 05:00:00+00:00 6.0
2017-10-29 00:45:00+00:00 2017-10-29 02:30:00+00:00 2017-10-29 06:00:00+00:00 7.0
2017-10-29 01:00:00+00:00 2017-10-29 03:00:00+00:00 2017-10-29 07:00:00+00:00 8.0
2017-10-29 01:15:00+00:00 2017-10-29 03:30:00+00:00 2017-10-29 08:00:00+00:00 9.0
""", pd.DataFrame(ts_aware.sort_index()))
# Note: the columnns are returned according to the alphabetic order
ts = tsh.get(engine, 'ts_multi_aware',
from_value_date=pd.Timestamp(2017, 10, 29, 0).tz_localize('UTC'),
to_value_date=pd.Timestamp(2017, 10, 29, 1).tz_localize('UTC'))
assert_df("""
a b c
2017-10-29 00:00:00+00:00 2017-10-29 01:00:00+00:00 2017-10-29 03:00:00+00:00 4.0
2017-10-29 00:15:00+00:00 2017-10-29 01:30:00+00:00 2017-10-29 04:00:00+00:00 5.0
2017-10-29 00:30:00+00:00 2017-10-29 02:00:00+00:00 2017-10-29 05:00:00+00:00 6.0
2017-10-29 00:45:00+00:00 2017-10-29 02:30:00+00:00 2017-10-29 06:00:00+00:00 7.0
2017-10-29 01:00:00+00:00 2017-10-29 03:00:00+00:00 2017-10-29 07:00:00+00:00 8.0
""", ts)
ts = genserie(datetime(2010, 1, 1), 'D', 10)
with pytest.raises(Exception) as err:
tsh.insert(engine, ts, 'ts_multi_aware', 'test')
assert err.value.args[0] == 'Incompatible index types'
ts = genserie(
start=pd.Timestamp(
2017, 10, 28, 23
).tz_localize('UTC').tz_convert('Europe/Paris'),
freq=['15T', '30T'],
repeat=10,
tz='Europe/Paris',
name='ts_multi_aware',
)
ts.index.rename(['a', 'b'], inplace=True)
with pytest.raises(Exception) as err:
tsh.insert(engine, ts, 'ts_multi_aware', 'test')
assert err.value.args[0] == "Incompatible multi indexes: ['a', 'b', 'c'] vs ['a', 'b']"
def test_chunks(engine, tsh):
with tempattr(Snapshot, '_bucket_size', 2):
ts = genserie(datetime(2010, 1, 1), 'D', 5)
......@@ -901,216 +823,6 @@ Freq: D
tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
def test_multi_index(engine, tsh):
appdate_0 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
end=datetime(2015, 1, 2),
freq='D').values
pubdate_0 = [pd.datetime(2015, 1, 11, 12, 0, 0)] * 2
insertion_date_0 = [pd.datetime(2015, 1, 11, 12, 30, 0)] * 2
multi = [
appdate_0,
np.array(pubdate_0),
np.array(insertion_date_0)
]
ts_multi = pd.Series(range(2), index=multi)
ts_multi.index.rename(['b', 'c', 'a'], inplace=True)
tsh.insert(engine, ts_multi, 'ts_multi_simple', 'test')
ts = tsh.get(engine, 'ts_multi_simple')
assert_df("""
ts_multi_simple
a b c
2015-01-11 12:30:00 2015-01-01 2015-01-11 12:00:00 0.0
2015-01-02 2015-01-11 12:00:00 1.0
""", pd.DataFrame(ts))
diff = tsh.insert(engine, ts_multi, 'ts_multi_simple', 'test')
assert diff is None
ts_multi_2 = pd.Series([0, 2], index=multi)
ts_multi_2.index.rename(['b', 'c', 'a'], inplace=True)
tsh.insert(engine, ts_multi_2, 'ts_multi_simple', 'test')
ts = tsh.get(engine, 'ts_multi_simple')
assert_df("""
ts_multi_simple
a b c
2015-01-11 12:30:00 2015-01-01 2015-01-11 12:00:00 0.0
2015-01-02 2015-01-11 12:00:00 2.0
""", pd.DataFrame(ts))
# bigger ts
appdate_0 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
end=datetime(2015, 1, 4),
freq='D').values
pubdate_0 = [pd.datetime(2015, 1, 11, 12, 0, 0)] * 4
insertion_date_0 = [pd.datetime(2015, 1, 11, 12, 30, 0)] * 4
appdate_1 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
end=datetime(2015, 1, 4),
freq='D').values
pubdate_1 = [pd.datetime(2015, 1, 21, 12, 0, 0)] * 4
insertion_date_1 = [pd.datetime(2015, 1, 21, 12, 30, 0)] * 4
multi = [
np.concatenate([appdate_0, appdate_1]),
np.array(pubdate_0 + pubdate_1),
np.array(insertion_date_0 + insertion_date_1)
]
ts_multi = pd.Series(range(8), index=multi)
ts_multi.index.rename(['a', 'c', 'b'], inplace=True)
tsh.insert(engine, ts_multi, 'ts_multi', 'test')
ts = tsh.get(engine, 'ts_multi')
assert_df("""
ts_multi
a b c
2015-01-01 2015-01-11 12:30:00 2015-01-11 12:00:00 0.0
2015-01-21 12:30:00 2015-01-21 12:00:00 4.0
2015-01-02 2015-01-11 12:30:00 2015-01-11 12:00:00 1.0
2015-01-21 12:30:00 2015-01-21 12:00:00 5.0
2015-01-03 2015-01-11 12:30:00 2015-01-11 12:00:00 2.0
2015-01-21 12:30:00 2015-01-21 12:00:00 6.0
2015-01-04 2015-01-11 12:30:00 2015-01-11 12:00:00 3.0
2015-01-21 12:30:00 2015-01-21 12:00:00 7.0
""", pd.DataFrame(ts.sort_index()))
# Note: the columnns are returned according to the alphabetic order
appdate_2 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
end=datetime(2015, 1, 4),
freq='D').values
pubdate_2 = [pd.datetime(2015, 1, 31, 12, 0, 0)] * 4
insertion_date_2 = [pd.datetime(2015, 1, 31, 12, 30, 0)] * 4
multi_2 = [
np.concatenate([appdate_1, appdate_2]),
np.array(pubdate_1 + pubdate_2),
np.array(insertion_date_1 + insertion_date_2)
]
ts_multi_2 = pd.Series([4] * 8, index=multi_2)
ts_multi_2.index.rename(['a', 'c', 'b'], inplace=True)
# A second ts is inserted with some index in common with the first
# one: appdate_1, pubdate_1,and insertion_date_1. The value is set
# at 4, which matches the previous value of the "2015-01-01" point.
diff = tsh.insert(engine, ts_multi_2, 'ts_multi', 'test')
assert_df("""
ts_multi
a b c
2015-01-01 2015-01-31 12:30:00 2015-01-31 12:00:00 4.0
2015-01-02 2015-01-21 12:30:00 2015-01-21 12:00:00 4.0
2015-01-31 12:30:00 2015-01-31 12:00:00 4.0
2015-01-03 2015-01-21 12:30:00 2015-01-21 12:00:00 4.0
2015-01-31 12:30:00 2015-01-31 12:00:00 4.0
2015-01-04 2015-01-21 12:30:00 2015-01-21 12:00:00 4.0
2015-01-31 12:30:00 2015-01-31 12:00:00 4.0
""", pd.DataFrame(diff.sort_index()))
# the differential skips a value for "2015-01-01"
# which does not change from the previous ts
ts = tsh.get(engine, 'ts_multi')
assert_df("""
ts_multi
a b c
2015-01-01 2015-01-11 12:30:00 2015-01-11 12:00:00 0.0
2015-01-21 12:30:00 2015-01-21 12:00:00 4.0
2015-01-31 12:30:00 2015-01-31 12:00:00 4.0
2015-01-02 2015-01-11 12:30:00 2015-01-11 12:00:00 1.0
2015-01-21 12:30:00 2015-01-21 12:00:00 4.0
2015-01-31 12:30:00 2015-01-31 12:00:00 4.0
2015-01-03 2015-01-11 12:30:00 2015-01-11 12:00:00 2.0
2015-01-21 12:30:00 2015-01-21 12:00:00 4.0
2015-01-31 12:30:00 2015-01-31 12:00:00 4.0
2015-01-04 2015-01-11 12:30:00 2015-01-11 12:00:00 3.0
2015-01-21 12:30:00 2015-01-21 12:00:00 4.0
2015-01-31 12:30:00 2015-01-31 12:00:00 4.0
""", pd.DataFrame(ts.sort_index()))
# the result ts have now 3 values for each point in 'a'
def test_multi_index_get_history(engine, tsh):
appdate = pd.DatetimeIndex(
start=datetime(2015, 1, 1),
end=datetime(2015, 1, 2),
freq='D'
).values
forecast_date = [pd.Timestamp(2015, 1, 11, 12, 0, 0)] * 2
multi = [
appdate,
np.array(forecast_date),
]
ts_multi = pd.Series(range(2), index=multi)
ts_multi.index.rename(['app_date', 'fc_date'], inplace=True)
tsh.insert(engine, ts_multi, 'ts_mi', 'Babar',
_insertion_date=utcdt(2015, 1, 11, 12, 30, 0))
ts = tsh.get_history(engine, 'ts_mi')
assert_df("""
insertion_date app_date fc_date
2015-01-11 12:30:00+00:00 2015-01-01 2015-01-11 12:00:00 0.0
2015-01-02 2015-01-11 12:00:00 1.0
""", ts)
ts = tsh.get_history(engine, 'ts_mi', diffmode=True)
assert_df("""
insertion_date app_date fc_date
2015-01-11 12:30:00+00:00 2015-01-01 2015-01-11 12:00:00 0.0
2015-01-02 2015-01-11 12:00:00 1.0
""", ts)
# new forecast
appdate = pd.DatetimeIndex(
start=datetime(2015, 1, 1),
end=datetime(2015, 1, 2),
freq='D'
).values
forecast_date = [pd.Timestamp(2015, 1, 11, 13, 0, 0)] * 2
multi = [
appdate,
np.array(forecast_date),
]
ts_multi = pd.Series((x+.1 for x in range(2)), index=multi)
ts_multi.index.rename(['app_date', 'fc_date'], inplace=True)
tsh.insert(engine, ts_multi, 'ts_mi', 'Babar',
_insertion_date=utcdt(2015, 1, 11, 13, 30, 0))
ts = tsh.get_history(engine, 'ts_mi')
assert_df("""
insertion_date app_date fc_date
2015-01-11 12:30:00+00:00 2015-01-01 2015-01-11 12:00:00 0.0
2015-01-02 2015-01-11 12:00:00 1.0
2015-01-11 13:30:00+00:00 2015-01-01 2015-01-11 12:00:00 0.0
2015-01-11 13:00:00 0.1
2015-01-02 2015-01-11 12:00:00 1.0
2015-01-11 13:00:00 1.1
""", ts)
ts = tsh.get_history(engine, 'ts_mi', diffmode=True)
assert_df("""
insertion_date app_date fc_date
2015-01-11 12:30:00+00:00 2015-01-01 2015-01-11 12:00:00 0.0
2015-01-02 2015-01-11 12:00:00 1.0
2015-01-11 13:30:00+00:00 2015-01-01 2015-01-11 13:00:00 0.1
2015-01-02 2015-01-11 13:00:00 1.1
""", ts)
def test_get_history(engine, tsh):
for numserie in (1, 2, 3):
with engine.connect() as cn:
......
......@@ -50,16 +50,12 @@ class TimeSerie(SeriesServices):
return
assert ('<M8[ns]' == newts.index.dtype or
'datetime' in str(newts.index.dtype) or
'datetime' in str(newts.index.dtype) and not
isinstance(newts.index, pd.MultiIndex))
newts.name = name
table = self._get_ts_table(cn, name)
if isinstance(newts.index, pd.MultiIndex):
# we impose an order to survive rountrips
newts = newts.reorder_levels(sorted(newts.index.names))
if table is None:
return self._create(cn, newts, name, author, _insertion_date)
......@@ -346,12 +342,6 @@ class TimeSerie(SeriesServices):
assert ts.name is not None
metadata = self.metadata(cn, ts.name)
if metadata and metadata.get('tzaware', False):
if isinstance(ts.index, pd.MultiIndex):
for i in range(len(ts.index.levels)):
ts.index = ts.index.set_levels(
ts.index.levels[i].tz_localize('UTC'),
level=i)
return ts
return ts.tz_localize('UTC')
return ts
......@@ -442,11 +432,6 @@ class TimeSerie(SeriesServices):
raise Exception(m)
if ts.index.dtype.name != meta['index_type']:
raise Exception('Incompatible index types')
inames = [name for name in ts.index.names if name]
if inames != meta['index_names']:
raise Exception('Incompatible multi indexes: {} vs {}'.format(
meta['index_names'], inames)
)
def _finalize_insertion(self, cn, csid, name):
table = self.schema.changeset_series
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment