Commit 8c92905e authored by Aurélien Campéas's avatar Aurélien Campéas
Browse files

tsio: retire storage of the internal diffs

The snapshot representation is now good enough.

Resolves #42.
parent 82fd06a49dec
......@@ -39,26 +39,6 @@ def tsh(request, engine):
tsh = tsio.TimeSerie(namespace)
yield tsh
# build a ts using the logs from another
log = tsh.log(engine, diff=True)
allnames = set()
for rev in log:
for name, ts in rev['diff'].items():
if 'big' in name:
continue
allnames.add(name)
tsh.insert(engine, ts, 'new_' + name,
rev['author'], _insertion_date=rev['date'])
# NOTE: the name set varies depending on the amount of tests
# so we don't capture that exact set for regression purpposes
# We only want to prove the manipulated series can be reconstructed
# using the logger.
for name in allnames:
assert (tsh.get(engine, name) == tsh.get(engine, 'new_' + name)).all()
schema.reset(engine, namespace)
@pytest.fixture(scope='session')
def ptsh(engine):
......
......@@ -125,25 +125,16 @@ def test_bigdata(engine, tracker, ptsh):
t1 = time() - t0
tshclass = tsh.__class__.__name__
with engine.connect() as cn:
cn.execute('set search_path to "{}.timeserie"'.format(tsh.namespace))
df = pd.read_sql('select id, diff from big order by id', cn)
df['diff'] = df['diff'].apply(lambda x: 0 if x is None else len(x))
size = df['diff'].sum()
tracker.append({'test': 'bigdata_insert',
'class': tshclass,
'time': t1,
'diffsize': size})
'time': t1})
t0 = time()
tsh.get_history(engine, 'big')
t1 = time() - t0
tracker.append({'test': 'bigdata_history_all',
'class': tshclass,
'time': t1,
'diffsize': None})
'time': t1})
t0 = time()
for year in (2015, 2017, 2019):
......@@ -155,8 +146,7 @@ def test_bigdata(engine, tracker, ptsh):
t1 = time() - t0
tracker.append({'test': 'bigdata_history_chunks',
'class': tshclass,
'time': t1,
'diffsize': None})
'time': t1})
@pytest.mark.perf
......@@ -181,25 +171,16 @@ def test_lots_of_diffs(engine, tracker, ptsh):
t1 = time() - t0
tshclass = tsh.__class__.__name__
with engine.connect() as cn:
cn.execute('set search_path to "{}.timeserie"'.format(tsh.namespace))
df = pd.read_sql("select id, diff from manydiffs order by id ",
cn)
df['diff'] = df['diff'].apply(lambda x: 0 if x is None else len(x))
size = df['diff'].sum()
tracker.append({'test': 'manydiffs_insert',
'class': tshclass,
'time': t1,
'diffsize': size})
'time': t1})
t0 = time()
tsh.get_history(engine, 'manydiffs')
t1 = time() - t0
tracker.append({'test': 'manydiffs_history_all',
'class': tshclass,
'time': t1,
'diffsize': None})
'time': t1})
t0 = time()
for month in range(1, 3):
......@@ -212,8 +193,7 @@ def test_lots_of_diffs(engine, tracker, ptsh):
t1 = time() - t0
tracker.append({'test': 'manydiffs_history_chunks',
'class': tshclass,
'time': t1,
'diffsize': None})
'time': t1})
t0 = time()
for month in range(1, 3):
......@@ -228,5 +208,4 @@ def test_lots_of_diffs(engine, tracker, ptsh):
t1 = time() - t0
tracker.append({'test': 'manydiffs_history_chunks_valuedate',
'class': tshclass,
'time': t1,
'diffsize': None})
'time': t1})
......@@ -885,18 +885,6 @@ insertion_date value_date
2017-01-10 02:00:00 2.0
""", tsh.get(engine, 'xserie'))
# internal structure is ok
with engine.connect() as cn:
cn.execute('set search_path to "{}.timeserie"'.format(tsh.namespace))
df = pd.read_sql("select id, diff from xserie order by id", cn)
df['diff'] = df['diff'].apply(lambda x: False if x is None else True)
assert_df("""
id diff
0 1 False
1 2 True
""", df)
log = tsh.log(engine, names=['xserie', 'yserie'])
# 5 and 7 have disappeared
assert [l['author'] for l in log
......
......@@ -245,7 +245,7 @@ class TimeSerie(SeriesServices):
stats['serie names'] = [row for row, in cn.execute(sql).fetchall()]
return stats
def log(self, cn, limit=0, diff=False, names=None, authors=None,
def log(self, cn, limit=0, names=None, authors=None,
stripped=False,
fromrev=None, torev=None,
fromdate=None, todate=None):
......@@ -290,11 +290,6 @@ class TimeSerie(SeriesServices):
'meta': meta or {},
'names': self._changeset_series(cn, csetid)})
if diff:
for rev in log:
rev['diff'] = {name: self.diff_at(cn, rev['rev'], name)
for name in rev['names']}
log.sort(key=lambda rev: rev['rev'])
return log
......@@ -338,7 +333,6 @@ class TimeSerie(SeriesServices):
head = snapshot.update(diff)
value = {
'cset': csid,
'diff': self._serialize(diff),
'snapshot': head
}
cn.execute(table.insert().values(value))
......@@ -376,7 +370,6 @@ class TimeSerie(SeriesServices):
Column('cset', Integer,
ForeignKey('{}.changeset.id'.format(self.namespace)),
index=True, nullable=False),
Column('diff', BYTEA),
Column('snapshot', Integer,
ForeignKey('{}.snapshot.{}.id'.format(
self.namespace,
......@@ -456,21 +449,3 @@ class TimeSerie(SeriesServices):
serie=name
)
cn.execute(sql)
def diff_at(self, cn, csetid, name):
table = self._get_ts_table(cn, name)
cset = self.schema.changeset
def filtercset(sql):
return sql.where(table.c.cset == cset.c.id
).where(cset.c.id == csetid)
sql = filtercset(select([table.c.id]))
tsid = cn.execute(sql).scalar()
if tsid == 1:
return Snapshot(cn, self, name).first
sql = filtercset(select([table.c.diff]))
ts = self._deserialize(cn.execute(sql).scalar(), name)
return self._ensure_tz_consistency(cn, ts)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment