Commit fcb6f358 authored by Aurélien Campéas's avatar Aurélien Campéas
Browse files

historycache: speed up the get operation

Doing a bisection should help with big insertion date lists.
parent a5dd1ce4221a
......@@ -8,7 +8,10 @@ import numpy as np
import pandas as pd
from tshistory.snapshot import Snapshot
from tshistory.util import threadpool
from tshistory.util import (
bisect_search,
threadpool
)
from tshistory.tsio import timeseries
from tshistory.testutil import (
assert_df,
......@@ -26,6 +29,18 @@ def utcdt(*dt):
return pd.Timestamp(datetime(*dt), tz='UTC')
def test_bisect():
values = [-4, -2, 1, 7]
assert bisect_search(values, -5) == -1
assert bisect_search(values, -4) == 0
assert bisect_search(values, -3) == 0
assert bisect_search(values, 0) == 1
assert bisect_search(values, 1) == 2
assert bisect_search(values, 3) == 2
assert bisect_search(values, 7) == 3
assert bisect_search(values, 8) == 4
def test_in_tx(tsh, engine):
assert tsh.type(engine, 'foo') == 'primary'
......
......@@ -11,6 +11,7 @@ from deprecated import deprecated
from sqlhelp import sqlfile, select, insert
from tshistory.util import (
bisect_search,
closed_overlaps,
num2float,
pruned_history,
......@@ -689,7 +690,6 @@ class timeseries(SeriesServices):
]
class historycache:
def __init__(self, tsh, cn, name,
......@@ -719,12 +719,12 @@ class historycache:
idates = self.idates
else:
idates = self.naive_idates
idx = len(idates)
for idate in reversed(idates):
idx -= 1
compidate = idate
if revision_date >= compidate:
return self.idates[idx]
idx = bisect_search(idates, revision_date)
if idx == -1:
return None
if idx >= len(idates):
idx = len(idates) - 1
return self.idates[idx]
def get(self, revision_date=None,
from_value_date=None,
......
......@@ -249,3 +249,37 @@ class unilist(list):
def append(self, element):
assert element not in self
super().append(element)
def bisect_search(values, value):
"""return an index j such that ``value`` is between values[j]
and values[j+1].
values must be monotonic increasing.
j=-1 or j=len(values) is returned to indicate that ``value`` is
out of range below and above respectively.
thanks to https://stackoverflow.com/questions/2566412/find-nearest-value-in-numpy-array
"""
n = len(values)
first, last = values[0], values[-1]
if value < first:
return -1
elif value > last:
return n
elif value == first:
return 0
elif value == last:
return n - 1
jl = 0
ju = n - 1
while ju - jl > 1:
jm = (ju+jl) >> 1
if value >= values[jm]:
jl = jm
else:
ju = jm
return jl
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment