Commit 163fd531 authored by Aurélien Campéas's avatar Aurélien Campéas
Browse files

tsio, test: make `get_history` more robust and add perf tests

NOTE:
* fix a scope bug in conftest
* provide a dedicated `ptsh` TimeSerie fixture for perf tests
  (which don't need the namespace + replay stuff)
parent 6b1fa87a4ab7
......@@ -3,9 +3,9 @@ from pathlib import Path
import logging
from sqlalchemy import create_engine, MetaData
import pandas as pd
import pytest
from pytest_sa_pg.fixture import db
from tshistory import schema, tsio
......@@ -58,10 +58,16 @@ def tsh(request, engine):
schema.reset(engine, namespace)
@pytest.fixture(scope='session')
def ptsh(engine):
schema.reset(engine)
schema.init(engine, MetaData())
return tsio.TimeSerie()
OUT = []
@pytest.fixture
def tracker(scope='session'):
import pandas as pd
@pytest.fixture(scope='session')
def tracker():
yield OUT
print(pd.DataFrame(OUT))
# coding: utf-8
from pathlib import Path
from datetime import datetime
from datetime import datetime, timedelta
from time import time
from dateutil import parser
import calendar
......@@ -827,6 +827,26 @@ insertion_date value_date
2017-01-02 1.0
""", tsc)
tsc = tsh.get_history(engine, 'smallserie',
from_insertion_date=datetime(2017, 2, 4),
to_insertion_date=datetime(2017, 2, 4))
assert tsc is None
tsc = tsh.get_history(engine, 'smallserie',
from_insertion_date=datetime(2016, 2, 1),
to_insertion_date=datetime(2017, 2, 2))
assert_df("""
insertion_date value_date
2017-02-01 2017-01-01 0.0
2017-02-02 2017-01-01 0.0
2017-01-02 1.0
""", tsc)
tsc = tsh.get_history(engine, 'smallserie',
from_insertion_date=datetime(2016, 2, 1),
to_insertion_date=datetime(2016, 12, 31))
assert tsc is None
def test_add_na(engine, tsh):
# a serie of NaNs won't be insert in base
......@@ -883,11 +903,15 @@ def test_dtype_mismatch(engine, tsh):
@pytest.mark.perf
def test_bigdata(engine, tracker, tsh):
def test_bigdata(engine, tracker, ptsh):
tsh = ptsh
def create_data():
for year in range(2015, 2020):
serie = genserie(datetime(year, 1, 1), '10Min', 6 * 24 * 365)
tsh.insert(engine, serie, 'big', 'aurelien.campeas@pythonian.fr')
date = datetime(year, 1, 1)
serie = genserie(date, '10Min', 6 * 24 * 365)
with tsh.newchangeset(engine, 'aurelien.campeas@pythonian.fr',
_insertion_date=date):
tsh.insert(engine, serie, 'big')
t0 = time()
create_data()
......@@ -911,7 +935,21 @@ def test_bigdata(engine, tracker, tsh):
t0 = time()
tsh.get_history(engine, 'big')
t1 = time() - t0
tracker.append({'test': 'bigdata_gethistory',
tracker.append({'test': 'bigdata_gethistory_all',
'class': tshclass,
'time': t1,
'diffsize': None,
'snapsize': None})
t0 = time()
for year in (2015, 2017, 2019):
for month in (1, 5, 9, 12):
date = datetime(year, month, 1)
tsh.get_history(engine, 'big',
from_insertion_date=date,
to_insertion_date=date+timedelta(days=31))
t1 = time() - t0
tracker.append({'test': 'bigdata_get_history_chunks',
'class': tshclass,
'time': t1,
'diffsize': None,
......@@ -919,14 +957,19 @@ def test_bigdata(engine, tracker, tsh):
@pytest.mark.perf
def test_lots_of_diffs(engine, tracker, tsh):
def test_lots_of_diffs(engine, tracker, ptsh):
tsh = ptsh
def create_data():
# one insert per day for 4 months
for month in range(1, 4):
days = calendar.monthrange(2017, month)[1]
for day in range(1, days+1):
serie = genserie(datetime(2017, month, day), '10Min', 6*24)
date = datetime(2017, month, day)
serie = genserie(date, '10Min', 6*24)
with engine.connect() as cn:
tsh.insert(cn, serie, 'manydiffs', 'aurelien.campeas@pythonian.fr')
with tsh.newchangeset(cn, 'aurelien.campeas@pythonian.fr',
_insertion_date=date.replace(year=2018)):
tsh.insert(cn, serie, 'manydiffs')
t0 = time()
create_data()
......@@ -950,7 +993,22 @@ def test_lots_of_diffs(engine, tracker, tsh):
t0 = time()
tsh.get_history(engine, 'manydiffs')
t1 = time() - t0
tracker.append({'test': 'lots_of_diffs_gethistory',
tracker.append({'test': 'lots_of_diffs_gethistory_all',
'class': tshclass,
'time': t1,
'diffsize': None,
'snapsize': None})
t0 = time()
for month in range(1, 3):
for day in range(1, 5):
date = datetime(2018, month, day)
ts = tsh.get_history(engine, 'manydiffs',
from_insertion_date=date,
to_insertion_date=date+timedelta(days=31))
assert ts is not None
t1 = time() - t0
tracker.append({'test': 'lots_of_diffs_get_history_chunks',
'class': tshclass,
'time': t1,
'diffsize': None,
......
......@@ -197,6 +197,7 @@ class TimeSerie(object):
if table is None:
return
# compute diffs above the snapshot
cset = self.schema.changeset
diffsql = select([cset.c.id, cset.c.insertion_date, table.c.diff]
).order_by(cset.c.id
......@@ -208,11 +209,18 @@ class TimeSerie(object):
diffsql = diffsql.where(cset.c.insertion_date <= to_insertion_date)
diffs = cn.execute(diffsql).fetchall()
series = [(diffs[0]['insertion_date'],
self._build_snapshot_upto(cn, table,
[lambda cset, _: cset.c.id <= diffs[0]['id']]))
]
for csid_, revdate, diff in cn.execute(diffsql).fetchall()[1:]:
if not diffs:
# it's fine to ask for an insertion date range
# where noting did happen, but you get nothing
return
csid, revdate, diff_ = diffs[0]
snapshot = self._build_snapshot_upto(cn, table, [
lambda cset, _: cset.c.id == csid
])
series = [(revdate, snapshot)]
for csid_, revdate, diff in diffs[1:]:
diff = self._deserialize(diff, table.name)
serie = self._apply_diff(series[-1][1], diff)
series.append((revdate, serie))
......@@ -439,7 +447,7 @@ class TimeSerie(object):
).where(table.c[column] != None)
if qfilter:
sql = sql.where(table.c.csid == cset.c.id)
sql = sql.where(table.c.csid <= cset.c.id)
for filtercb in qfilter:
sql = sql.where(filtercb(cset, table))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment