Commit d5d72d86 authored by Aurélien Campéas's avatar Aurélien Campéas
Browse files

an api point to compute for each revision the value date start/end, used by `get_delta`

Hence get_delta is now nice performance-wise.

DELTA all value dates 2.005328416824341
DELTA 1 day  0.18934059143066406

We remove a meaningless series of .get_history tests in the perf tests
rather than change the assertion.
parent 9d8740a38854
......@@ -211,18 +211,3 @@ def test_lots_of_diffs(engine, tracker, ptsh):
tracker.append({'test': 'manydiffs_history_chunks',
'class': tshclass,
'time': t1})
t0 = time()
for month in range(1, 3):
for day in range(1, 5):
date = utcdt(2018, month, day)
ts = tsh.get_history(engine, 'manydiffs',
from_insertion_date=date,
to_insertion_date=date + timedelta(days=31),
from_value_date=date + timedelta(days=10),
to_value_date=date + timedelta(days=20))
assert ts is not None
t1 = time() - t0
tracker.append({'test': 'manydiffs_history_chunks_valuedate',
'class': tshclass,
'time': t1})
......@@ -53,6 +53,10 @@ Freq: H
assert (ts.index == back.index).all()
assert str(back.index.dtype) == 'datetime64[ns, UTC]'
ival = tsh.interval(engine, 'tztest')
assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
assert ival.right == pd.Timestamp('2017-10-29 02:00:00+0000', tz='UTC')
ts = genserie(datetime(2017, 10, 29, 1),
'H', 4, tz='UTC')
ts.index = ts.index.tz_convert('Europe/Paris')
......@@ -96,6 +100,10 @@ insertion_date value_date
2017-10-29 03:00:00+00:00 2.0
""", hist)
ival = tsh.interval(engine, 'tztest')
assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
assert ival.right == pd.Timestamp('2017-10-29 04:00:00+0000', tz='UTC')
def test_differential(engine, tsh):
ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
......@@ -107,6 +115,14 @@ def test_differential(engine, tsh):
assert tsh.exists(engine, 'ts_test')
assert not tsh.exists(engine, 'this_does_not_exist')
assert tsh.interval(engine, 'ts_test') == pd.Interval(
datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 10, 0, 0),
closed='both'
)
with pytest.raises(ValueError):
assert tsh.interval(engine, 'nosuchts')
assert_df("""
2010-01-01 0.0
2010-01-02 1.0
......@@ -161,7 +177,8 @@ def test_differential(engine, tsh):
ts_longer.iloc[3] = 3.14
ts_longer.iloc[5] = ts_begin.iloc[7]
tsh.insert(engine, ts_longer, 'ts_test', 'test')
with engine.connect() as cn:
tsh.insert(cn, ts_longer, 'ts_test', 'test')
id3 = tsh.last_id(engine, 'ts_test')
assert id1 < id2 < id3
......@@ -186,6 +203,11 @@ def test_differential(engine, tsh):
2010-01-17 14.00
""", tsh.get(engine, 'ts_test'))
assert tsh.interval(engine, 'ts_test') == pd.Interval(
datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 17, 0, 0),
closed='both'
)
# start testing manual overrides
ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
ts_begin.loc['2010-01-04'] = -1
......
......@@ -9,14 +9,16 @@ from sqlalchemy import Table, Column, Integer, ForeignKey, Index
from sqlalchemy.sql.elements import NONE_NAME
from sqlalchemy.engine.base import Engine
from sqlalchemy.sql.expression import select, func, desc
from sqlalchemy.dialects.postgresql import BYTEA
from sqlalchemy.dialects.postgresql import BYTEA, TIMESTAMP
from tshistory.schema import tsschema
from tshistory.util import (
closed_overlaps,
inject_in_index,
num2float,
subset,
SeriesServices,
start_end,
tzaware_serie
)
from tshistory.snapshot import Snapshot, TABLES as SNAPTABLES
......@@ -147,10 +149,6 @@ class TimeSerie(SeriesServices):
if table is None:
return
if deltabefore is not None or deltaafter is not None:
assert from_value_date is None
assert to_value_date is None
cset = self.schema.changeset
revsql = select(
[cset.c.id, cset.c.insertion_date]
......@@ -165,7 +163,15 @@ class TimeSerie(SeriesServices):
if to_insertion_date:
revsql = revsql.where(cset.c.insertion_date <= to_insertion_date)
revs = cn.execute(revsql).fetchall()
if from_value_date or to_value_date:
revsql = revsql.where(
closed_overlaps(from_value_date, to_value_date)
)
revs = cn.execute(
revsql,
{'fromdate': from_value_date, 'todate': to_value_date}
).fetchall()
if not revs:
return
......@@ -218,7 +224,9 @@ class TimeSerie(SeriesServices):
to_value_date=None):
histo = self.get_history(
cn, seriename, deltabefore=-delta
cn, seriename, deltabefore=-delta,
from_value_date=from_value_date,
to_value_date=to_value_date
)
for revdate, serie in histo.items():
inject_in_index(serie, revdate)
......@@ -401,6 +409,19 @@ class TimeSerie(SeriesServices):
log.sort(key=lambda rev: rev['rev'])
return log
def interval(self, cn, seriename):
tablename = self._serie_to_tablename(cn, seriename)
if tablename is None:
raise ValueError(f'no such serie: {seriename}')
sql = (f'select start, "end" '
f'from "{self.namespace}.timeserie"."{tablename}" '
f'order by cset desc limit 1')
res = cn.execute(sql).fetchone()
start, end = res.start, res.end
if self.metadata(cn, seriename).get('tzaware'):
start, end = pd.Timestamp(start, tz='UTC'), pd.Timestamp(end, tz='UTC')
return pd.Interval(left=start, right=end, closed='both')
# /API
# Helpers
......@@ -415,9 +436,12 @@ class TimeSerie(SeriesServices):
snapshot = Snapshot(cn, self, seriename)
csid = self._newchangeset(cn, author, insertion_date, metadata)
head = snapshot.create(newts)
start, end = start_end(newts)
value = {
'cset': csid,
'snapshot': head
'snapshot': head,
'start': start,
'end': end
}
table = self._make_ts_table(cn, seriename, newts)
cn.execute(table.insert().values(value))
......@@ -439,10 +463,16 @@ class TimeSerie(SeriesServices):
return
csid = self._newchangeset(cn, author, insertion_date, metadata)
tsstart, tsend = start_end(newts)
ival = self.interval(cn, seriename)
start = min(tsstart, ival.left.replace(tzinfo=None))
end = max(tsend, ival.right.replace(tzinfo=None))
head = snapshot.update(diff)
value = {
'cset': csid,
'snapshot': head
'snapshot': head,
'start': start,
'end': end
}
cn.execute(table.insert().values(value))
self._finalize_insertion(cn, csid, seriename)
......@@ -481,12 +511,16 @@ class TimeSerie(SeriesServices):
Column('cset', Integer,
ForeignKey('{}.changeset.id'.format(self.namespace)),
nullable=False),
Column('start', TIMESTAMP, nullable=False),
Column('end', TIMESTAMP, nullable=False),
Column('snapshot', Integer,
ForeignKey('{}.snapshot.{}.id'.format(
self.namespace,
tablename))),
Index(NONE_NAME, 'cset'),
Index(NONE_NAME, 'snapshot'),
Index(NONE_NAME, 'start'),
Index(NONE_NAME, 'end'),
schema='{}.timeserie'.format(self.namespace),
keep_existing=True
)
......
......@@ -5,12 +5,33 @@ import hashlib
import numpy as np
import pandas as pd
from pandas.api.types import is_datetimetz
from sqlalchemy.sql.expression import text
def tzaware_serie(ts):
return is_datetimetz(ts.index)
def start_end(ts):
start = ts.index.min()
end = ts.index.max()
if start.tzinfo is not None:
assert end.tzinfo is not None
start = start.tz_convert('UTC').replace(tzinfo=None)
end = end.tz_convert('UTC').replace(tzinfo=None)
return start, end
def closed_overlaps(fromdate, todate):
fromdate = "'-infinity'" if fromdate is None else ':fromdate'
todate = "'infinity'" if todate is None else ':todate'
return text(
'({}, {}) overlaps (start, "end" + interval \'1 microsecond\')'.format(
fromdate, todate
)
)
def subset(ts, fromdate, todate):
if fromdate is None and todate is None:
return ts
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment