test_tsio.py 43.6 KB
Newer Older
1
# coding: utf-8
2
import calendar
3
from datetime import datetime, timedelta
4
from pathlib import Path
5
from time import time
6
7
from functools import partial
import pytz
8

9
from dateutil import parser
10
import pytest
11
12
import numpy as np
import pandas as pd
13

14
from tshistory.snapshot import Snapshot
15
from tshistory.testutil import assert_group_equals, genserie, assert_df
16

17
DATADIR = Path(__file__).parent / 'data'
18

Aurélien Campéas's avatar
Aurélien Campéas committed
19

20
21
22
23
def utcdt(*dt):
    return pd.Timestamp(datetime(*dt), tz='UTC')


24
def test_changeset(engine, tsh):
25
    index = pd.date_range(start=datetime(2017, 1, 1), freq='D', periods=3)
26
    data = [1., 2., 3.]
27

28
29
30
31
    with engine.connect() as cn:
        with tsh.newchangeset(cn, 'babar', _insertion_date=utcdt(2020, 1, 1)):
            tsh.insert(cn, pd.Series(data, index=index), 'ts_values', author='WONTBEUSED')
            tsh.insert(cn, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
32

33
34
35
        # bogus author won't show up
        assert tsh.log(engine)[0]['author'] == 'babar'

36
37
        g = tsh.get_group(engine, 'ts_values')
        g2 = tsh.get_group(engine, 'ts_othervalues')
38
        assert_group_equals(g, g2)
39

40
        with pytest.raises(AssertionError):
41
            tsh.insert(engine, pd.Series([2, 3, 4], index=index), 'ts_values')
42

43
        with engine.connect() as cn:
44
            data.append(data.pop(0))
45
            with tsh.newchangeset(cn, 'celeste', _insertion_date=utcdt(2020, 1, 1)):
46
                tsh.insert(cn, pd.Series(data, index=index), 'ts_values')
47
                # below should be a noop
48
                tsh.insert(cn, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
49

50
    g = tsh.get_group(engine, 'ts_values')
51
52
    assert ['ts_values'] == list(g.keys())

53
    assert_df("""
54
55
56
2017-01-01    2.0
2017-01-02    3.0
2017-01-03    1.0
57
""", tsh.get(engine, 'ts_values'))
58

59
    assert_df("""
60
61
62
2017-01-01    a
2017-01-02    b
2017-01-03    c
63
""", tsh.get(engine, 'ts_othervalues'))
64

65
    log = tsh.log(engine, names=['ts_values', 'ts_othervalues'])
66
67
68
    assert [
        {'author': 'babar',
         'rev': 1,
69
         'date': pd.Timestamp('2020-01-01 00:00:00+0000', tz='UTC'),
70
         'meta': {},
71
72
73
         'names': ['ts_values', 'ts_othervalues']},
        {'author': 'celeste',
         'rev': 2,
74
         'meta': {},
75
         'date': pd.Timestamp('2020-01-01 00:00:00+0000', tz='UTC'),
76
77
78
         'names': ['ts_values']}
    ] == log

79
    log = tsh.log(engine, names=['ts_othervalues'])
80
81
    assert len(log) == 1
    assert log[0]['rev'] == 1
82
    assert log[0]['names'] == ['ts_values', 'ts_othervalues']
83

84
    log = tsh.log(engine, fromrev=2)
85
86
    assert len(log) == 1

87
    log = tsh.log(engine, torev=1)
88
89
    assert len(log) == 1

90
    info = tsh.info(engine)
91
92
93
94
95
96
    assert {
        'changeset count': 2,
        'serie names': ['ts_othervalues', 'ts_values'],
        'series count': 2
    } == info

97

98
99
def test_strip(engine, tsh):
    for i in range(1, 5):
100
        pubdate = utcdt(2017, 1, i)
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
        ts = genserie(datetime(2017, 1, 10), 'H', 1 + i)
        with tsh.newchangeset(engine, 'babar', _insertion_date=pubdate):
            tsh.insert(engine, ts, 'xserie')
        # also insert something completely unrelated
        tsh.insert(engine, genserie(datetime(2018, 1, 1), 'D', 1 + i), 'yserie', 'celeste')

    csida = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    assert csida is not None
    csidb = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='before')
    csidc = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='after')
    assert csidb < csida < csidc

    log = tsh.log(engine, names=['xserie', 'yserie'])
    assert [(idx, l['author']) for idx, l in enumerate(log, start=1)
    ] == [
        (1, 'babar'),
        (2, 'celeste'),
        (3, 'babar'),
        (4, 'celeste'),
        (5, 'babar'),
        (6, 'celeste'),
        (7, 'babar'),
        (8, 'celeste')
    ]

    h = tsh.get_history(engine, 'xserie')
    assert_df("""
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
2017-01-03 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
2017-01-04 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
                           2017-01-10 04:00:00    4.0
143
144
145
146
147
148
149
""", h)

    csid = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    with engine.connect() as cn:
        tsh.strip(cn, 'xserie', csid)

    assert_df("""
150
151
152
153
154
155
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
156
157
158
159
160
161
162
163
164
165
166
""", tsh.get_history(engine, 'xserie'))

    assert_df("""
2017-01-10 00:00:00    0.0
2017-01-10 01:00:00    1.0
2017-01-10 02:00:00    2.0
""", tsh.get(engine, 'xserie'))

    # internal structure is ok
    with engine.connect() as cn:
        cn.execute('set search_path to "{}.timeserie"'.format(tsh.namespace))
167
168
        df = pd.read_sql("select id, diff from xserie order by id", cn)
        df['diff'] = df['diff'].apply(lambda x: False if x is None else True)
169
170

    assert_df("""
171
172
173
id   diff
0   1  False
1   2   True
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
""", df)

    log = tsh.log(engine, names=['xserie', 'yserie'])
    # 5 and 7 have disappeared
    assert [l['author'] for l in log
    ] == ['babar', 'celeste', 'babar', 'celeste', 'celeste', 'celeste']

    log = tsh.log(engine, stripped=True, names=['xserie', 'yserie'])
    assert [list(l['meta'].values())[0][:-1] + 'X' for l in log if l['meta']
    ] == [
        'got stripped from X',
        'got stripped from X'
    ]


189
def test_tstamp_roundtrip(engine, tsh):
190
191
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
192
193
194
195
196
197
198
199
200
201
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

202
203
    tsh.insert(engine, ts, 'tztest', 'Babar')
    back = tsh.get(engine, 'tztest')
204
205
206

    # though un localized we understand it's been normalized to utc
    assert_df("""
207
208
209
210
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    2.0
2017-10-29 02:00:00+00:00    3.0
211
212
213
""", back)

    assert (ts.index == back.index).all()
214
    assert str(back.index.dtype) == 'datetime64[ns, UTC]'
215
216


217
218
def test_multi_index_aware(engine, tsh):
    ts_multi_aware = genserie(
219
220
221
        start=pd.Timestamp(
            2017, 10, 28, 23
        ).tz_localize('UTC').tz_convert('Europe/Paris'),
222
223
        freq=['15T', '30T', '60T'],
        repeat=10,
224
        tz='Europe/Paris',
225
226
227
228
        name='ts_multi_aware',
    )
    ts_multi_aware.index.rename(['a', 'b', 'c'], inplace=True)

229
230
231
232
233
234
235
236
237
238
239
240
241
242
    assert_df("""
a                          b                          c                        
2017-10-29 01:00:00+02:00  2017-10-29 01:00:00+02:00  2017-10-29 01:00:00+02:00    0
2017-10-29 01:15:00+02:00  2017-10-29 01:30:00+02:00  2017-10-29 02:00:00+02:00    1
2017-10-29 01:30:00+02:00  2017-10-29 02:00:00+02:00  2017-10-29 02:00:00+01:00    2
2017-10-29 01:45:00+02:00  2017-10-29 02:30:00+02:00  2017-10-29 03:00:00+01:00    3
2017-10-29 02:00:00+02:00  2017-10-29 02:00:00+01:00  2017-10-29 04:00:00+01:00    4
2017-10-29 02:15:00+02:00  2017-10-29 02:30:00+01:00  2017-10-29 05:00:00+01:00    5
2017-10-29 02:30:00+02:00  2017-10-29 03:00:00+01:00  2017-10-29 06:00:00+01:00    6
2017-10-29 02:45:00+02:00  2017-10-29 03:30:00+01:00  2017-10-29 07:00:00+01:00    7
2017-10-29 02:00:00+01:00  2017-10-29 04:00:00+01:00  2017-10-29 08:00:00+01:00    8
2017-10-29 02:15:00+01:00  2017-10-29 04:30:00+01:00  2017-10-29 09:00:00+01:00    9
""", ts_multi_aware)

243
244
245
246
    tsh.insert(engine, ts_multi_aware, 'ts_multi_aware', 'test')
    ts_aware = tsh.get(engine, 'ts_multi_aware')

    assert_df("""
247
ts_multi_aware
248
a                         b                         c                                        
249
250
251
252
253
254
255
256
257
258
2017-10-28 23:00:00+00:00 2017-10-28 23:00:00+00:00 2017-10-28 23:00:00+00:00             0.0
2017-10-28 23:15:00+00:00 2017-10-28 23:30:00+00:00 2017-10-29 00:00:00+00:00             1.0
2017-10-28 23:30:00+00:00 2017-10-29 00:00:00+00:00 2017-10-29 01:00:00+00:00             2.0
2017-10-28 23:45:00+00:00 2017-10-29 00:30:00+00:00 2017-10-29 02:00:00+00:00             3.0
2017-10-29 00:00:00+00:00 2017-10-29 01:00:00+00:00 2017-10-29 03:00:00+00:00             4.0
2017-10-29 00:15:00+00:00 2017-10-29 01:30:00+00:00 2017-10-29 04:00:00+00:00             5.0
2017-10-29 00:30:00+00:00 2017-10-29 02:00:00+00:00 2017-10-29 05:00:00+00:00             6.0
2017-10-29 00:45:00+00:00 2017-10-29 02:30:00+00:00 2017-10-29 06:00:00+00:00             7.0
2017-10-29 01:00:00+00:00 2017-10-29 03:00:00+00:00 2017-10-29 07:00:00+00:00             8.0
2017-10-29 01:15:00+00:00 2017-10-29 03:30:00+00:00 2017-10-29 08:00:00+00:00             9.0
259
260
261
    """, pd.DataFrame(ts_aware.sort_index()))
    # Note: the columnns are returned according to the alphabetic order

262
263
264
265
266
267
268
269
270
271
272
273
    ts = tsh.get(engine, 'ts_multi_aware',
                 from_value_date=pd.Timestamp(2017, 10, 29, 0).tz_localize('UTC'),
                 to_value_date=pd.Timestamp(2017, 10, 29, 1).tz_localize('UTC'))
    assert_df("""
a                          b                          c                        
2017-10-29 00:00:00+00:00  2017-10-29 01:00:00+00:00  2017-10-29 03:00:00+00:00    4.0
2017-10-29 00:15:00+00:00  2017-10-29 01:30:00+00:00  2017-10-29 04:00:00+00:00    5.0
2017-10-29 00:30:00+00:00  2017-10-29 02:00:00+00:00  2017-10-29 05:00:00+00:00    6.0
2017-10-29 00:45:00+00:00  2017-10-29 02:30:00+00:00  2017-10-29 06:00:00+00:00    7.0
2017-10-29 01:00:00+00:00  2017-10-29 03:00:00+00:00  2017-10-29 07:00:00+00:00    8.0
    """, ts)

274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
    ts = genserie(datetime(2010, 1, 1), 'D', 10)
    with pytest.raises(Exception) as err:
        tsh.insert(engine, ts, 'ts_multi_aware', 'test')
    assert err.value.args[0] == 'Incompatible index types'

    ts = genserie(
        start=pd.Timestamp(
            2017, 10, 28, 23
        ).tz_localize('UTC').tz_convert('Europe/Paris'),
        freq=['15T', '30T'],
        repeat=10,
        tz='Europe/Paris',
        name='ts_multi_aware',
    )
    ts.index.rename(['a', 'b'], inplace=True)

    with pytest.raises(Exception) as err:
        tsh.insert(engine, ts, 'ts_multi_aware', 'test')
    assert err.value.args[0] == "Incompatible multi indexes: ['a', 'b', 'c'] vs ['a', 'b']"

294

295
def test_differential(engine, tsh):
296
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
297
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
298

299
300
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
301

302
    assert_df("""
303
304
305
306
307
308
309
310
311
312
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
313
""", tsh.get(engine, 'ts_test'))
314
315

    # we should detect the emission of a message
316
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
317

318
    assert_df("""
319
320
321
322
323
324
325
326
327
328
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
329
""", tsh.get(engine, 'ts_test'))
330
331
332
333

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
334
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
335

336
    assert_df("""
337
338
339
340
341
342
343
344
345
346
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
347
""", tsh.get(engine, 'ts_test'))
348

349
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
350
351
352
353
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

354
    tsh.insert(engine, ts_longer, 'ts_test', 'test')
355

356
    assert_df("""
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
374
""", tsh.get(engine, 'ts_test'))
375
376

    # start testing manual overrides
377
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
378
    ts_begin.loc['2010-01-04'] = -1
379
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
380
381

    # -1 represents bogus upstream data
382
    assert_df("""
383
384
385
386
387
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
388
""", tsh.get(engine, 'ts_mixte'))
389
390

    # refresh all the period + 1 extra data point
391
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
392
    ts_more.loc['2010-01-04'] = -1
393
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
394

395
    assert_df("""
396
397
398
399
400
401
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
402
""", tsh.get(engine, 'ts_mixte'))
403
404

    # just append an extra data point
405
406
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
407
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
408

409
    assert_df("""
410
411
412
413
414
415
416
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
417
""", tsh.get(engine, 'ts_mixte'))
418

419
    with engine.connect() as cn:
420
        cn.execute('set search_path to "{0}.timeserie", {0}, public'.format(tsh.namespace))
421
422
423
        allts = pd.read_sql("select name, table_name from registry "
                            "where name in ('ts_test', 'ts_mixte')",
                            cn)
424

425
426
        assert_df("""
name              table_name
427
428
429
0   ts_test   {0}.timeserie.ts_test
1  ts_mixte  {0}.timeserie.ts_mixte
""".format(tsh.namespace), allts)
430

431
        assert_df("""
432
433
434
435
436
437
438
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
439
""", tsh.get(cn, 'ts_mixte',
440
             revision_date=datetime.now()))
441
442


443
def test_bad_import(engine, tsh):
444
    # the data were parsed as date by pd.read_json()
Aurélien Campéas's avatar
Aurélien Campéas committed
445
    df_result = pd.read_csv(str(DATADIR / 'test_data.csv'))
446
447
448
    df_result['Gas Day'] = df_result['Gas Day'].apply(parser.parse, dayfirst=True, yearfirst=False)
    df_result.set_index('Gas Day', inplace=True)
    ts = df_result['SC']
449

450
451
    tsh.insert(engine, ts, 'SND_SC', 'test')
    result = tsh.get(engine, 'SND_SC')
452
    assert result.dtype == 'float64'
453
454
455

    # insertion of empty ts
    ts = pd.Series(name='truc', dtype='object')
456
457
    tsh.insert(engine, ts, 'empty_ts', 'test')
    assert tsh.get(engine, 'empty_ts') is None
458
459
460

    # nan in ts
    # all na
461
    ts = genserie(datetime(2010, 1, 10), 'D', 10, [np.nan], name='truc')
462
463
    tsh.insert(engine, ts, 'test_nan', 'test')
    assert tsh.get(engine, 'test_nan') is None
464
465
466
467
468

    # mixe na
    ts = pd.Series([np.nan] * 5 + [3] * 5,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
469
470
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
471

472
473
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
474
    assert_df("""
475
476
477
478
479
2010-01-15    3.0
2010-01-16    3.0
2010-01-17    3.0
2010-01-18    3.0
2010-01-19    3.0
480
""", result)
481
482
483

    # get_ts with name not in database

484
    tsh.get(engine, 'inexisting_name', 'test')
485
486


487
def test_revision_date(engine, tsh):
488
    # we prepare a good joke for the end of the test
489
490
    ival = Snapshot._interval
    Snapshot._interval = 3
491
492
493
494

    for i in range(1, 5):
        with engine.connect() as cn:
            with tsh.newchangeset(cn, 'test',
495
                                  _insertion_date=utcdt(2016, 1, i)):
496
497
498
                tsh.insert(cn, genserie(datetime(2017, 1, i), 'D', 3, [i]), 'revdate')

    # end of prologue, now some real meat
499
    idate0 = pd.Timestamp('2015-1-1 00:00:00', tz='UTC')
500
501
502
503
504
    with tsh.newchangeset(engine, 'test', _insertion_date=idate0):
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [0], name='truc')
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate0 == tsh.latest_insertion_date(engine, 'ts_through_time')

505
    idate1 = pd.Timestamp('2015-1-1 15:45:23', tz='UTC')
506
    with tsh.newchangeset(engine, 'test', _insertion_date=idate1):
507
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
508
509
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
510

511
    idate2 = pd.Timestamp('2015-1-2 15:43:23', tz='UTC')
512
    with tsh.newchangeset(engine, 'test', _insertion_date=idate2):
513
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
514
515
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
516

517
    idate3 = pd.Timestamp('2015-1-3', tz='UTC')
518
    with tsh.newchangeset(engine, 'test', _insertion_date=idate3):
519
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
520
521
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
522

523
    ts = tsh.get(engine, 'ts_through_time')
524

525
    assert_df("""
526
527
528
529
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
530
""", ts)
531

532
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
533
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
534

535
    assert_df("""
536
537
538
539
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
540
""", ts)
541

542
    ts = tsh.get(engine, 'ts_through_time',
543
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
544

545
    assert_df("""
546
547
548
549
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
550
""", ts)
551

552
    ts = tsh.get(engine, 'ts_through_time',
553
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
554
555
556

    assert ts is None

557
558
559
560
561
562
563
564
565
566
    # epilogue: back to the revdate issue
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
2017-01-03    3.0
2017-01-04    4.0
2017-01-05    4.0
2017-01-06    4.0
""", tsh.get(engine, 'revdate'))

567
    oldstate = tsh.get(engine, 'revdate', revision_date=datetime(2016, 1, 2))
568
569
570
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
571
572
573
2017-01-03    2.0
2017-01-04    2.0
""", oldstate)
574

575
    Snapshot._interval = ival
576

577

578
def test_snapshots(engine, tsh):
579
580
    baseinterval = Snapshot._interval
    Snapshot._interval = 4
581

582
    with engine.connect() as cn:
583
        for tscount in range(1, 11):
584
            ts = genserie(datetime(2015, 1, 1), 'D', tscount, [1])
585
            diff = tsh.insert(cn, ts, 'growing', 'babar')
586
587
            assert diff.index[0] == diff.index[-1] == ts.index[-1]

588
    diff = tsh.insert(engine, ts, 'growing', 'babar')
589
    assert diff is None
590

591
    with engine.connect() as cn:
592
593
594
595
596
597
598
599
600
601
        cn.execute('set search_path to "{}.snapshot"'.format(tsh.namespace))
#         df = pd.read_sql("select cset from growing",
#                          cn)
#         assert_df("""
# cset
# 0     1
# 1     4
# 2     8
# 3    10
# """, df)
602

603
604
        ts = tsh.get(cn, 'growing')
        assert_df("""
605
606
607
608
609
610
611
612
613
614
2015-01-01    1.0
2015-01-02    1.0
2015-01-03    1.0
2015-01-04    1.0
2015-01-05    1.0
2015-01-06    1.0
2015-01-07    1.0
2015-01-08    1.0
2015-01-09    1.0
2015-01-10    1.0
615
""", ts)
616

617
618
        df = pd.read_sql("select id, chunk from growing order by id", cn)
        df['chunk'] = df['chunk'].apply(lambda x: 0 if x is None else len(x))
619

620
        assert_df("""
621
622
623
624
625
id  chunk
0   1     35
1   4     47
2   8     59
3  10     67
626
""", df)
627

628
629
    # table = tsh._get_ts_table(engine, 'growing')
    _, snap = Snapshot(engine, tsh, 'growing').find()
630
    assert (ts == snap).all()
631
    Snapshot._interval = baseinterval
632
633


634
def test_deletion(engine, tsh):
635
636
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
637
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
638

639
    ts = Snapshot(engine, tsh, 'ts_del').build_upto()
640
    assert ts.iloc[-1] == 9.0
641

642
    ts_begin.iloc[0] = np.nan
643
    ts_begin.iloc[3] = np.nan
644

645
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
646

647
    assert_df("""
648
649
650
651
652
653
654
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
655
2010-01-10    9.0
656
""", tsh.get(engine, 'ts_del'))
657

658
    ts2 = tsh.get(engine, 'ts_del',
659
660
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
661
    assert (tsh.get(engine, 'ts_del') == ts2).all()
662

663
664
665
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

666
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
667

668
    assert_df("""
669
670
671
672
673
674
675
676
677
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
678
2010-01-10     9.0
679
""", tsh.get(engine, 'ts_del'))
680
681
682

    # now with string!

683
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
684
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
685
686
687
688

    ts_string[4] = None
    ts_string[5] = None

689
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
690
    assert_df("""
691
692
693
694
695
696
697
698
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
699
""", tsh.get(engine, 'ts_string_del'))
700
701
702
703

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

704
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
705
    assert_df("""
706
707
708
709
710
711
712
713
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
714
2010-01-10    machin
715
""", tsh.get(engine, 'ts_string_del'))
716

717
    ts_string[ts_string.index] = np.nan
718
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
719

720
    erased = tsh.get(engine, 'ts_string_del')
721
722
    assert len(erased) == 0

723
724
    # first insertion with only nan

725
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
726
    tsh.insert(engine, ts_begin, 'ts_null', 'test')
727

728
    assert tsh.get(engine, 'ts_null') is None
729

730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
    # exhibit issue with nans handling
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

749
750
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
751
752
    assert diff is None

753
    # there is no difference
754
    assert 0 == len(tsh.diff(ts_repushed, ts_repushed))
755
756
757
758
759

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
760
    diff = tsh.diff(ts_repushed, ts_add)
761
762
763
764
765
766

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
767
2010-01-13    12.0""", diff.sort_index())
768
769
770
771
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
772

Aurélien Campéas's avatar
Aurélien Campéas committed
773
    # full erasing
774
775
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
776
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
777

Aurélien Campéas's avatar
Aurélien Campéas committed
778
    ts_begin.iloc[:] = np.nan
779
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
780
781

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
782
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
783
784
785
786

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
787
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
788
789

    ts_begin.iloc[:] = np.nan
790
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
791
792

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
793
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
794

Aurélien Campéas's avatar
Aurélien Campéas committed
795

796
def test_multi_index(engine, tsh):
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
    appdate_0 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 2),
                                 freq='D').values
    pubdate_0 = [pd.datetime(2015, 1, 11, 12, 0, 0)] * 2
    insertion_date_0 = [pd.datetime(2015, 1, 11, 12, 30, 0)] * 2

    multi = [
        appdate_0,
        np.array(pubdate_0),
        np.array(insertion_date_0)
    ]

    ts_multi = pd.Series(range(2), index=multi)
    ts_multi.index.rename(['b', 'c', 'a'], inplace=True)

812
    tsh.insert(engine, ts_multi, 'ts_multi_simple', 'test')
813

814
    ts = tsh.get(engine, 'ts_multi_simple')
815
816
817
    assert_df("""
                                                    ts_multi_simple
a                   b          c                                   
818
819
2015-01-11 12:30:00 2015-01-01 2015-01-11 12:00:00              0.0
                    2015-01-02 2015-01-11 12:00:00              1.0
820
821
""", pd.DataFrame(ts))

822
    diff = tsh.insert(engine, ts_multi, 'ts_multi_simple', 'test')
823
824
825
826
827
    assert diff is None

    ts_multi_2 = pd.Series([0, 2], index=multi)
    ts_multi_2.index.rename(['b', 'c', 'a'], inplace=True)

828
829
    tsh.insert(engine, ts_multi_2, 'ts_multi_simple', 'test')
    ts = tsh.get(engine, 'ts_multi_simple')
830
831
832
833

    assert_df("""
                                                    ts_multi_simple
a                   b          c                                   
834
835
2015-01-11 12:30:00 2015-01-01 2015-01-11 12:00:00              0.0
                    2015-01-02 2015-01-11 12:00:00              2.0
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
""", pd.DataFrame(ts))

    # bigger ts
    appdate_0 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 4),
                                 freq='D').values
    pubdate_0 = [pd.datetime(2015, 1, 11, 12, 0, 0)] * 4
    insertion_date_0 = [pd.datetime(2015, 1, 11, 12, 30, 0)] * 4

    appdate_1 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 4),
                                 freq='D').values

    pubdate_1 = [pd.datetime(2015, 1, 21, 12, 0, 0)] * 4
    insertion_date_1 = [pd.datetime(2015, 1, 21, 12, 30, 0)] * 4

    multi = [
        np.concatenate([appdate_0, appdate_1]),
        np.array(pubdate_0 + pubdate_1),
        np.array(insertion_date_0 + insertion_date_1)
    ]

    ts_multi = pd.Series(range(8), index=multi)
    ts_multi.index.rename(['a', 'c', 'b'], inplace=True)

861
862
    tsh.insert(engine, ts_multi, 'ts_multi', 'test')
    ts = tsh.get(engine, 'ts_multi')
863
864
865
866

    assert_df("""
                                                    ts_multi
a          b                   c                            
867
868
869
870
871
872
873
874
2015-01-01 2015-01-11 12:30:00 2015-01-11 12:00:00       0.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
2015-01-02 2015-01-11 12:30:00 2015-01-11 12:00:00       1.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       5.0
2015-01-03 2015-01-11 12:30:00 2015-01-11 12:00:00       2.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       6.0
2015-01-04 2015-01-11 12:30:00 2015-01-11 12:00:00       3.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       7.0
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
    """, pd.DataFrame(ts.sort_index()))
    # Note: the columnns are returned according to the alphabetic order

    appdate_2 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 4),
                                 freq='D').values
    pubdate_2 = [pd.datetime(2015, 1, 31, 12, 0, 0)] * 4
    insertion_date_2 = [pd.datetime(2015, 1, 31, 12, 30, 0)] * 4

    multi_2 = [
        np.concatenate([appdate_1, appdate_2]),
        np.array(pubdate_1 + pubdate_2),
        np.array(insertion_date_1 + insertion_date_2)
    ]

    ts_multi_2 = pd.Series([4] * 8, index=multi_2)
    ts_multi_2.index.rename(['a', 'c', 'b'], inplace=True)

    # A second ts is inserted with some index in common with the first
    # one: appdate_1, pubdate_1,and insertion_date_1. The value is set
    # at 4, which matches the previous value of the "2015-01-01" point.

897
    diff = tsh.insert(engine, ts_multi_2, 'ts_multi', 'test')
898
899
900
901
902
903
904
905
906
907
908
909
910
911
    assert_df("""
                                                    ts_multi
a          b                   c                            
2015-01-01 2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-02 2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-03 2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-04 2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
        """, pd.DataFrame(diff.sort_index()))
    # the differential skips a value for "2015-01-01"
    # which does not change from the previous ts

912
    ts = tsh.get(engine, 'ts_multi')
913
914
915
    assert_df("""
                                                    ts_multi
a          b                   c                            
916
917
918
919
920
921
922
923
924
925
926
927
2015-01-01 2015-01-11 12:30:00 2015-01-11 12:00:00       0.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-02 2015-01-11 12:30:00 2015-01-11 12:00:00       1.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-03 2015-01-11 12:30:00 2015-01-11 12:00:00       2.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-04 2015-01-11 12:30:00 2015-01-11 12:00:00       3.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
928
929
930
        """, pd.DataFrame(ts.sort_index()))

    # the result ts have now 3 values for each point in 'a'
931
932


933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
def test_multi_index_get_history(engine, tsh):
    appdate = pd.DatetimeIndex(
        start=datetime(2015, 1, 1),
        end=datetime(2015, 1, 2),
        freq='D'
    ).values
    forecast_date = [pd.Timestamp(2015, 1, 11, 12, 0, 0)] * 2
    multi = [
        appdate,
        np.array(forecast_date),
    ]

    ts_multi = pd.Series(range(2), index=multi)
    ts_multi.index.rename(['app_date', 'fc_date'], inplace=True)

    tsh.insert(engine, ts_multi, 'ts_mi', 'Babar',
949
               _insertion_date=utcdt(2015, 1, 11, 12, 30, 0))
950
951
952
953


    ts = tsh.get_history(engine, 'ts_mi')
    assert_df("""
954
955
956
insertion_date             app_date    fc_date            
2015-01-11 12:30:00+00:00  2015-01-01  2015-01-11 12:00:00    0.0
                           2015-01-02  2015-01-11 12:00:00    1.0
957
958
959
960
961
""", ts)

    ts = tsh.get_history(engine, 'ts_mi', diffmode=True)

    assert_df("""
962
963
964
insertion_date             app_date    fc_date            
2015-01-11 12:30:00+00:00  2015-01-01  2015-01-11 12:00:00    0.0
                           2015-01-02  2015-01-11 12:00:00    1.0
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
""", ts)

    # new forecast
    appdate = pd.DatetimeIndex(
        start=datetime(2015, 1, 1),
        end=datetime(2015, 1, 2),
        freq='D'
    ).values
    forecast_date = [pd.Timestamp(2015, 1, 11, 13, 0, 0)] * 2
    multi = [
        appdate,
        np.array(forecast_date),
    ]

    ts_multi = pd.Series((x+.1 for x in range(2)), index=multi)
    ts_multi.index.rename(['app_date', 'fc_date'], inplace=True)

    tsh.insert(engine, ts_multi, 'ts_mi', 'Babar',
983
               _insertion_date=utcdt(2015, 1, 11, 13, 30, 0))
984
985
986

    ts = tsh.get_history(engine, 'ts_mi')
    assert_df("""
987
988
989
990
991
992
993
insertion_date             app_date    fc_date            
2015-01-11 12:30:00+00:00  2015-01-01  2015-01-11 12:00:00    0.0
                           2015-01-02  2015-01-11 12:00:00    1.0
2015-01-11 13:30:00+00:00  2015-01-01  2015-01-11 12:00:00    0.0
                                       2015-01-11 13:00:00    0.1
                           2015-01-02  2015-01-11 12:00:00    1.0
                                       2015-01-11 13:00:00    1.1
994
995
996
997
""", ts)

    ts = tsh.get_history(engine, 'ts_mi', diffmode=True)
    assert_df("""
998
999
1000
1001
1002
insertion_date             app_date    fc_date            
2015-01-11 12:30:00+00:00  2015-01-01  2015-01-11 12:00:00    0.0
                           2015-01-02  2015-01-11 12:00:00    1.0
2015-01-11 13:30:00+00:00  2015-01-01  2015-01-11 13:00:00    0.1
                           2015-01-02  2015-01-11 13:00:00    1.1
1003
""", ts)
1004
1005


1006
def test_get_history(engine, tsh):
1007
1008
1009
    for numserie in (1, 2, 3):
        with engine.connect() as cn:
            with tsh.newchangeset(cn, 'aurelien.campeas@pythonian.fr',
1010
                                  _insertion_date=utcdt(2017, 2, numserie)):
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
                tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie')

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
1023
         'meta': {},
1024
         'date': pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
1025
1026
1027
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
1028
         'meta': {},
1029
         'date': pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
1030
1031
1032
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
1033
         'meta': {},
1034
         'date': pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC'),
1035
1036
1037
1038
1039
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')
1040
    assert