Commit 536a301a authored by Aurélien Campéas's avatar Aurélien Campéas
Browse files

snapshot: have a method to compute the set of unreachable chunks

These orphans are created when strip is invoked.
Next, strip will use it.
parent c16e41e47b7a
......@@ -1161,10 +1161,15 @@ insertion_date value_date
2017-01-10 04:00:00 4.0
""", h)
snap = Snapshot(engine, tsh, 'xserie')
assert snap.garbage() == set()
csid = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
with engine.begin() as cn:
tsh.strip(cn, 'xserie', csid)
assert len(snap.garbage()) == 2
assert_hist("""
insertion_date value_date
2017-01-01 00:00:00+00:00 2017-01-10 00:00:00 0.0
......
......@@ -257,6 +257,21 @@ def check(db_uri, series=None, namespace='tsh'):
)
@tsh.command(name='garbage')
@click.argument('db-uri')
@click.option('--namespace', default='tsh')
def shell(db_uri, namespace='tsh'):
from tshistory.snapshot import Snapshot
e = create_engine(find_dburi(db_uri))
tsh = timeseries(namespace)
for name in tsh.list_series(e):
snap = Snapshot(e, tsh, name)
garb = snap.garbage()
if garb:
print('************************', name, 'garbage =', len(garb))
@tsh.command(name='shell')
@click.argument('db-uri')
@click.option('--namespace', default='tsh')
......
......@@ -296,3 +296,36 @@ class Snapshot(SeriesServices):
)
)
return series
def garbage(self):
""" inefficient but simple garbage list builder
garbage chunks are created on strip operations
"""
tablename = self.tsh._serie_to_tablename(self.cn, self.seriename)
reachablesql = f"""
with recursive heads as (
select snapshot from "{self.tsh.namespace}.timeserie"."{tablename}"
),
allchunks as (
select chunks.id as cid,
chunks.parent as parent
from "{self.tsh.namespace}.snapshot"."{self.name}" as chunks
where chunks.id in (select * from heads)
union
select chunks.id as cid,
chunks.parent as parent
from "{self.tsh.namespace}.snapshot"."{self.name}" as chunks
join allchunks on chunks.id = allchunks.parent
)
select cid from allchunks
"""
reachable_chunks = {
rev for rev, in self.cn.execute(reachablesql)
}
allsql = f'select id from "{self.tsh.namespace}.snapshot"."{self.name}" '
allchuks = {
rev for rev, in self.cn.execute(allsql).fetchall()
}
return allchuks - reachable_chunks
......@@ -93,6 +93,12 @@ class timeseries(SeriesServices):
return self._update(cn, tablename, newts, seriename, author,
metadata, _insertion_date)
def list_series(self, cn):
"""Return the list of all primary series"""
sql = f'select seriename from "{self.namespace}".registry '
return [row.seriename for row in cn.execute(sql)]
def get(self, cn, seriename, revision_date=None,
from_value_date=None, to_value_date=None,
_keep_nans=False):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment