Commit 878ec4df authored by Aurélien Campéas's avatar Aurélien Campéas
Browse files

tsio: add a `strip` method

Strip a serie history starting from a given changeset.
We make sure we rebuild a top-level snapshot (helps `.get` perfs)

resolves #28
parent 68bdbb66defa
......@@ -63,9 +63,11 @@ def test_changeset(engine, tsh):
{'author': 'babar',
'rev': 1,
'date': datetime(2020, 1, 1, 0, 0),
'meta': {},
'names': ['ts_values', 'ts_othervalues']},
{'author': 'celeste',
'rev': 2,
'meta': {},
'date': datetime(2020, 1, 1, 0, 0),
'names': ['ts_values']}
] == log
......@@ -89,6 +91,98 @@ def test_changeset(engine, tsh):
} == info
def test_strip(engine, tsh):
for i in range(1, 5):
pubdate = datetime(2017, 1, i)
ts = genserie(datetime(2017, 1, 10), 'H', 1 + i)
with tsh.newchangeset(engine, 'babar', _insertion_date=pubdate):
tsh.insert(engine, ts, 'xserie')
# also insert something completely unrelated
tsh.insert(engine, genserie(datetime(2018, 1, 1), 'D', 1 + i), 'yserie', 'celeste')
csida = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
assert csida is not None
csidb = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='before')
csidc = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='after')
assert csidb < csida < csidc
log = tsh.log(engine, names=['xserie', 'yserie'])
assert [(idx, l['author']) for idx, l in enumerate(log, start=1)
] == [
(1, 'babar'),
(2, 'celeste'),
(3, 'babar'),
(4, 'celeste'),
(5, 'babar'),
(6, 'celeste'),
(7, 'babar'),
(8, 'celeste')
]
h = tsh.get_history(engine, 'xserie')
assert_df("""
insertion_date value_date
2017-01-01 2017-01-10 00:00:00 0.0
2017-01-10 01:00:00 1.0
2017-01-02 2017-01-10 00:00:00 0.0
2017-01-10 01:00:00 1.0
2017-01-10 02:00:00 2.0
2017-01-03 2017-01-10 00:00:00 0.0
2017-01-10 01:00:00 1.0
2017-01-10 02:00:00 2.0
2017-01-10 03:00:00 3.0
2017-01-04 2017-01-10 00:00:00 0.0
2017-01-10 01:00:00 1.0
2017-01-10 02:00:00 2.0
2017-01-10 03:00:00 3.0
2017-01-10 04:00:00 4.0
""", h)
csid = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
with engine.connect() as cn:
tsh.strip(cn, 'xserie', csid)
assert_df("""
insertion_date value_date
2017-01-01 2017-01-10 00:00:00 0.0
2017-01-10 01:00:00 1.0
2017-01-02 2017-01-10 00:00:00 0.0
2017-01-10 01:00:00 1.0
2017-01-10 02:00:00 2.0
""", tsh.get_history(engine, 'xserie'))
assert_df("""
2017-01-10 00:00:00 0.0
2017-01-10 01:00:00 1.0
2017-01-10 02:00:00 2.0
""", tsh.get(engine, 'xserie'))
# internal structure is ok
with engine.connect() as cn:
cn.execute('set search_path to "{}.timeserie"'.format(tsh.namespace))
df = pd.read_sql("select id, diff, snapshot from xserie order by id", cn)
for attr in ('diff', 'snapshot'):
df[attr] = df[attr].apply(lambda x: False if x is None else True)
assert_df("""
id diff snapshot
0 1 False True
1 2 True True
""", df)
log = tsh.log(engine, names=['xserie', 'yserie'])
# 5 and 7 have disappeared
assert [l['author'] for l in log
] == ['babar', 'celeste', 'babar', 'celeste', 'celeste', 'celeste']
log = tsh.log(engine, stripped=True, names=['xserie', 'yserie'])
assert [list(l['meta'].values())[0][:-1] + 'X' for l in log if l['meta']
] == [
'got stripped from X',
'got stripped from X'
]
def test_tstamp_roundtrip(engine, tsh):
ts = genserie(datetime(2017, 10, 28, 23),
'H', 4, tz='UTC')
......@@ -771,14 +865,17 @@ def test_get_history(engine, tsh):
logs = tsh.log(engine, names=['smallserie'])
assert [
{'author': 'aurelien.campeas@pythonian.fr',
'meta': {},
'date': datetime(2017, 2, 1, 0, 0),
'names': ['smallserie']
},
{'author': 'aurelien.campeas@pythonian.fr',
'meta': {},
'date': datetime(2017, 2, 2, 0, 0),
'names': ['smallserie']
},
{'author': 'aurelien.campeas@pythonian.fr',
'meta': {},
'date': datetime(2017, 2, 3, 0, 0),
'names': ['smallserie']
}
......
......@@ -279,6 +279,62 @@ class TimeSerie(object):
).where(tstable.c.csid == cset.c.id)
return cn.execute(sql).scalar()
def changeset_at(self, cn, seriename, revdate, mode='strict'):
assert mode in ('strict', 'before', 'after')
cset = self.schema.changeset
table = self._table_definition_for(seriename)
sql = select([table.c.csid]).where(
table.c.csid == cset.c.id
)
if mode == 'strict':
sql = sql.where(cset.c.insertion_date == revdate)
elif mode == 'before':
sql = sql.where(cset.c.insertion_date <= revdate)
else:
sql = sql.where(cset.c.insertion_date >= revdate)
return cn.execute(sql).scalar()
def strip(self, cn, seriename, csid):
logs = self.log(cn, fromrev=csid, names=(seriename,))
assert logs
# put stripping info in the metadata
cset = self.schema.changeset
cset_serie = self.schema.changeset_series
for log in logs:
# update changeset.metadata
metadata = cn.execute(
select([cset.c.metadata]).where(cset.c.id == log['rev'])
).scalar() or {}
metadata['tshistory.info'] = 'got stripped from {}'.format(csid)
sql = cset.update().where(cset.c.id == log['rev']
).values(metadata=metadata)
cn.execute(sql)
# delete changset_serie item
sql = cset_serie.delete().where(
cset_serie.c.csid == log['rev']
).where(
cset_serie.c.serie == seriename
)
cn.execute(sql)
# wipe the diffs
table = self._table_definition_for(seriename)
cn.execute(table.delete().where(table.c.csid == csid))
# rebuild the top-level snapshot
cstip = self._latest_csid_for(cn, seriename)
if cn.execute(select([table.c.snapshot]).where(table.c.csid == cstip)).scalar() is None:
snap = self._build_snapshot_upto(
cn, table,
qfilter=(lambda cset, _t: cset.c.id < csid,)
)
sql = table.update().where(
table.c.csid == cstip
).values(
snapshot=self._serialize(snap)
)
cn.execute(sql)
def info(self, cn):
"""Gather global statistics on the current tshistory repository
"""
......@@ -291,6 +347,7 @@ class TimeSerie(object):
return stats
def log(self, cn, limit=0, diff=False, names=None, authors=None,
stripped=False,
fromrev=None, torev=None,
fromdate=None, todate=None):
"""Build a structure showing the history of all the series in the db,
......@@ -303,7 +360,7 @@ class TimeSerie(object):
self.schema.registry
)
sql = select([cset.c.id, cset.c.author, cset.c.insertion_date]
sql = select([cset.c.id, cset.c.author, cset.c.insertion_date, cset.c.metadata]
).distinct().order_by(desc(cset.c.id))
if limit:
......@@ -327,12 +384,17 @@ class TimeSerie(object):
if todate:
sql = sql.where(cset.c.insertion_date <= todate)
sql = sql.where(cset.c.id == cset_series.c.csid
).where(cset_series.c.serie == reg.c.name)
if stripped:
# outerjoin to show dead things
sql = sql.select_from(cset.outerjoin(cset_series))
else:
sql = sql.where(cset.c.id == cset_series.c.csid
).where(cset_series.c.serie == reg.c.name)
rset = cn.execute(sql)
for csetid, author, revdate in rset.fetchall():
for csetid, author, revdate, meta in rset.fetchall():
log.append({'rev': csetid, 'author': author, 'date': revdate,
'meta': meta or {},
'names': self._changeset_series(cn, csetid)})
if diff:
......@@ -426,6 +488,7 @@ class TimeSerie(object):
# insertion handling
def _get_tip_id(self, cn, table):
" get the *local* id "
sql = select([func.max(table.c.id)])
return cn.execute(sql).scalar()
......@@ -501,6 +564,8 @@ class TimeSerie(object):
return None
cset = self.schema.changeset
# beware the potential cartesian product
# between table & cset if there is no qfilter
sql = select([table.c.id,
table.c.diff,
table.c.parent,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment