test_tsio.py 50.4 KB
Newer Older
1
from datetime import datetime, timedelta
2
3
from pathlib import Path
import pytz
4

5
from dateutil import parser
6
import pytest
7
8
import numpy as np
import pandas as pd
9

10
from tshistory.snapshot import Snapshot
11
12
from tshistory.util import rename_series, threadpool
from tshistory.tsio import TimeSerie
13
14
from tshistory.testutil import (
    assert_df,
15
16
    assert_hist,
    assert_hist_equals,
17
    assert_group_equals,
18
    assert_structures,
19
20
21
    genserie,
    tempattr
)
22

23
DATADIR = Path(__file__).parent / 'data'
24

Aurélien Campéas's avatar
Aurélien Campéas committed
25

26
27
28
29
def utcdt(*dt):
    return pd.Timestamp(datetime(*dt), tz='UTC')


30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def test_in_tx(engine):
    tsh = TimeSerie()

    with pytest.raises(TypeError) as err:
        tsh.insert(engine, 0, 0, 0)
    assert err.value.args[0] == 'You must use a transaction object'

    with engine.connect() as cn:
        with pytest.raises(TypeError) as err:
            tsh.insert(cn, 0, 0, 0)
    assert err.value.args[0] == 'You must use a transaction object'

    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
    with engine.begin() as cn:
        tsh.insert(cn, ts, 'test_tx', 'Babar')


48
def test_tstamp_roundtrip(engine, tsh):
49
    assert_structures(engine, tsh)
50
51
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
52
53
54
55
56
57
58
59
60
61
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

62
63
    tsh.insert(engine, ts, 'tztest', 'Babar',
               _insertion_date=utcdt(2018, 1, 1))
64
    back = tsh.get(engine, 'tztest')
65
66
67

    # though un localized we understand it's been normalized to utc
    assert_df("""
68
69
70
71
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    2.0
2017-10-29 02:00:00+00:00    3.0
72
73
74
""", back)

    assert (ts.index == back.index).all()
75
    assert str(back.index.dtype) == 'datetime64[ns, UTC]'
76

77
78
79
80
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 02:00:00+0000', tz='UTC')

81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
    ts = genserie(datetime(2017, 10, 29, 1),
                  'H', 4, tz='UTC')
    ts.index = ts.index.tz_convert('Europe/Paris')
    tsh.insert(engine, ts, 'tztest', 'Celeste',
                   _insertion_date=utcdt(2018, 1, 3))

    ts = tsh.get(engine, 'tztest')
    assert_df("""
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    0.0
2017-10-29 02:00:00+00:00    1.0
2017-10-29 03:00:00+00:00    2.0
2017-10-29 04:00:00+00:00    3.0
""", ts)

    hist = tsh.get_history(engine, 'tztest')
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
                           2017-10-29 04:00:00+00:00    3.0
""", hist)

    hist = tsh.get_history(engine, 'tztest',
                           from_value_date=utcdt(2017, 10, 29, 1),
                           to_value_date=utcdt(2017, 10, 29, 3))
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
""", hist)

124
125
126
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 04:00:00+0000', tz='UTC')
127
    assert_structures(engine, tsh)
128

129

130
def test_differential(engine, tsh):
131
    assert_structures(engine, tsh)
132
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
133
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
134

135
    id1 = tsh.last_id(engine, 'ts_test')
136
    assert tsh._previous_cset(engine, 'ts_test', id1) is None
137

138
139
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
140

141
142
143
144
145
146
147
148
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 10, 0, 0),
        closed='both'
    )

    with pytest.raises(ValueError):
        assert tsh.interval(engine, 'nosuchts')

149
    fetched = tsh.get(engine, 'ts_test')
150
    assert_df("""
151
152
153
154
155
156
157
158
159
160
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
161
162
""", fetched)
    assert fetched.name == 'ts_test'
163
164

    # we should detect the emission of a message
165
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
166

167
    assert_df("""
168
169
170
171
172
173
174
175
176
177
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
178
""", tsh.get(engine, 'ts_test'))
179
180
181
182

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
183
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
184
    id2 = tsh.last_id(engine, 'ts_test')
185
    assert tsh._previous_cset(engine, 'ts_test', id2) == id1
186

187
    assert_df("""
188
189
190
191
192
193
194
195
196
197
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
198
""", tsh.get(engine, 'ts_test'))
199

200
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
201
202
203
204
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

205
    with engine.begin() as cn:
206
        tsh.insert(cn, ts_longer, 'ts_test', 'test')
207
208
209
    id3 = tsh.last_id(engine, 'ts_test')

    assert id1 < id2 < id3
210

211
    assert_df("""
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
229
""", tsh.get(engine, 'ts_test'))
230

231
232
233
234
235
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 17, 0, 0),
        closed='both'
    )

236
    # start testing manual overrides
237
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
238
    ts_begin.loc['2010-01-04'] = -1
239
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
240
241

    # -1 represents bogus upstream data
242
    assert_df("""
243
244
245
246
247
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
248
""", tsh.get(engine, 'ts_mixte'))
249
250

    # refresh all the period + 1 extra data point
251
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
252
    ts_more.loc['2010-01-04'] = -1
253
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
254

255
    assert_df("""
256
257
258
259
260
261
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
262
""", tsh.get(engine, 'ts_mixte'))
263
264

    # just append an extra data point
265
266
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
267
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
268

269
    assert_df("""
270
271
272
273
274
275
276
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
277
""", tsh.get(engine, 'ts_mixte'))
278

279
    with engine.begin() as cn:
280
        cn.execute('set search_path to "{0}.timeserie", {0}, public'.format(tsh.namespace))
281
282
        allts = pd.read_sql("select seriename, table_name from registry "
                            "where seriename in ('ts_test', 'ts_mixte')",
283
                            cn)
284

285
        assert_df("""
286
287
288
seriename table_name
0   ts_test    ts_test
1  ts_mixte   ts_mixte
289
""".format(tsh.namespace), allts)
290

291
        assert_df("""
292
293
294
295
296
297
298
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
299
""", tsh.get(cn, 'ts_mixte',
300
             revision_date=datetime.now()))
301

302
303
    assert_structures(engine, tsh)

304

305
306
307
308
309
310
def test_serie_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
    tsh.insert(engine, serie, 'ts-metadata', 'babar')

    initialmeta = tsh.metadata(engine, 'ts-metadata')
    assert initialmeta == {
311
312
        'index_dtype': '<M8[ns]',
        'index_names': [],
313
        'index_type': 'datetime64[ns]',
314
315
316
        'tzaware': False,
        'value_dtype': '<f8',
        'value_type': 'float64'
317
318
319
320
321
322
323
324
325
326
327
    }

    tsh.update_metadata(engine, 'ts-metadata',
                        {'topic': 'banana spot price'}
    )
    assert tsh.metadata(engine, 'ts-metadata')['topic'] == 'banana spot price'

    with pytest.raises(AssertionError):
        tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True})

    assert tsh.metadata(engine, 'ts-metadata') == {
328
        'index_dtype': '<M8[ns]',
329
        'index_names': [],
330
331
        'index_type': 'datetime64[ns]',
        'topic': 'banana spot price',
332
        'tzaware': False,
333
334
        'value_dtype': '<f8',
        'value_type': 'float64'
335
    }
336

337

338
339
def test_changeset_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
340
    tsh.insert(engine, serie, 'ts-cs-metadata', 'babar',
341
342
               {'foo': 'A', 'bar': 42})

343
    log = tsh.log(engine, names=['ts-cs-metadata'])
344
345
346
347
    meta = tsh.changeset_metadata(engine, log[0]['rev'])
    assert meta == {'foo': 'A', 'bar': 42}


348
def test_revision_date(engine, tsh):
349
    for i in range(1, 5):
350
        with engine.begin() as cn:
351
352
            tsh.insert(cn, genserie(datetime(2017, 1, i), 'D', 3, [i]), 'revdate',
                       'test', _insertion_date=utcdt(2016, 1, i))
353
354

    # end of prologue, now some real meat
355
    idate0 = pd.Timestamp('2015-1-1 00:00:00', tz='UTC')
356
357
358
359
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [0], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate0)
    assert idate0 == tsh.latest_insertion_date(engine, 'ts_through_time')
360

361
    idate1 = pd.Timestamp('2015-1-1 15:45:23', tz='UTC')
362
363
364
365
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate1)
    assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
366

367
    idate2 = pd.Timestamp('2015-1-2 15:43:23', tz='UTC')
368
369
370
371
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate2)
    assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
372

373
    idate3 = pd.Timestamp('2015-1-3', tz='UTC')
374
375
376
377
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate3)
    assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
378

379
    ts = tsh.get(engine, 'ts_through_time')
380

381
    assert_df("""
382
383
384
385
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
386
""", ts)
387

388
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
389
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
390

391
    assert_df("""
392
393
394
395
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
396
""", ts)
397

398
    ts = tsh.get(engine, 'ts_through_time',
399
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
400

401
    assert_df("""
402
403
404
405
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
406
""", ts)
407

408
    ts = tsh.get(engine, 'ts_through_time',
409
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
410
411
412

    assert ts is None

413
414
415
416
417
418
419
420
421
422
    # epilogue: back to the revdate issue
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
2017-01-03    3.0
2017-01-04    4.0
2017-01-05    4.0
2017-01-06    4.0
""", tsh.get(engine, 'revdate'))

423
    oldstate = tsh.get(engine, 'revdate', revision_date=datetime(2016, 1, 2))
424
425
426
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
427
428
429
2017-01-03    2.0
2017-01-04    2.0
""", oldstate)
430

431

432
def test_point_deletion(engine, tsh):
433
434
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
435
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
436

437
    _, ts = Snapshot(engine, tsh, 'ts_del').find()
438
    assert ts.iloc[-2] == 8.0
439

440
    ts_begin.iloc[0] = np.nan
441
    ts_begin.iloc[3] = np.nan
442

443
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
444

445
    assert_df("""
446
447
448
449
450
451
452
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
453
2010-01-10    9.0
454
""", tsh.get(engine, 'ts_del'))
455

456
    ts2 = tsh.get(engine, 'ts_del',
457
458
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
459
    assert (tsh.get(engine, 'ts_del') == ts2).all()
460

461
462
463
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

464
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
465

466
    assert_df("""
467
468
469
470
471
472
473
474
475
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
476
2010-01-10     9.0
477
""", tsh.get(engine, 'ts_del'))
478
479
480

    # now with string!

481
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
482
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
483
484
485
486

    ts_string[4] = None
    ts_string[5] = None

487
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
488
    assert_df("""
489
490
491
492
493
494
495
496
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
497
""", tsh.get(engine, 'ts_string_del'))
498
499
500
501

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

502
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
503
    assert_df("""
504
505
506
507
508
509
510
511
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
512
2010-01-10    machin
513
""", tsh.get(engine, 'ts_string_del'))
514

515
    ts_string[ts_string.index] = np.nan
516
517
    with pytest.raises(ValueError):
        tsh.insert(engine, ts_string, 'ts_string_del', 'test')
518

519

520
521
def test_nan_first(engine, tsh):
    # first insertion with only nan
522
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
523
    assert tsh.insert(engine, ts_begin, 'ts_null', 'test') is None
524

525

526
def test_more_point_deletion(engine, tsh):
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

545
546
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
547
548
    assert diff is None

549
    # there is no difference
550
    assert 0 == len(tsh.diff(ts_repushed, ts_repushed))
551
552
553
554
555

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
556
    diff = tsh.diff(ts_repushed, ts_add)
557
558
559
560
561
562

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
563
2010-01-13    12.0""", diff.sort_index())
564
565
566
567
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
568

Aurélien Campéas's avatar
Aurélien Campéas committed
569
    # full erasing
570
571
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
572
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
573

Aurélien Campéas's avatar
Aurélien Campéas committed
574
    ts_begin.iloc[:] = np.nan
575
576
    with pytest.raises(ValueError):
        tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
577
578

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
579
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
580
581
582
583

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
584
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
585

586
587
    ts_begin = pd.Series([np.nan] * 4, name='ts_full_del_str',
                         index=ts_begin.index)
588
589
590

    with pytest.raises(ValueError):
        tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
591
592

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
593
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
594

Aurélien Campéas's avatar
Aurélien Campéas committed
595

596
def test_deletion_over_horizon(engine, tsh):
597
    idate = utcdt(2018, 2, 1)
598
599
600
601
602
603
    ts = pd.Series(
        [1, 2, 3],
        index=pd.date_range(datetime(2018, 1, 1), freq='D', periods=3)
    )

    name = 'delete_over_hz'
604
605
    tsh.insert(engine, ts, name, 'Babar',
               _insertion_date=idate)
606
607
608
609
610
611

    ts = pd.Series(
        [np.nan, np.nan, np.nan],
        index=pd.date_range(datetime(2018, 1, 3), freq='D', periods=3)
    )

612
613
    tsh.insert(engine, ts, name, 'Celeste',
               _insertion_date=idate.replace(day=2))
614
615
    ival = tsh.interval(engine, name)
    assert ival.left == datetime(2018, 1, 1)
616
    assert ival.right == datetime(2018, 1, 2)
617
618
619
620
621

    ts = pd.Series(
        [np.nan, np.nan, np.nan],
        index=pd.date_range(datetime(2017, 12, 30), freq='D', periods=3)
    )
622
623
    tsh.insert(engine, ts, name, 'Arthur',
               _insertion_date=idate.replace(day=3))
624
    ival = tsh.interval(engine, name)
625
626
    assert ival.left == datetime(2018, 1, 2)
    assert ival.right == datetime(2018, 1, 2)
627
628


629
def test_get_history(engine, tsh):
630
    for numserie in (1, 2, 3):
631
        with engine.begin() as cn:
632
633
634
            tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie',
                       'aurelien.campeas@pythonian.fr',
                       _insertion_date=utcdt(2017, 2, numserie))
635
636
637
638
639
640
641
642
643
644
645

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
646
         'meta': {},
647
         'date': pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
648
649
650
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
651
         'meta': {},
652
         'date': pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
653
654
655
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
656
         'meta': {},
657
         'date': pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC'),
658
659
660
661
662
663
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')

664
    assert_hist("""
665
666
667
668
669
670
671
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
672
673
""", histts)

674
    diffs = tsh.get_history(engine, 'smallserie', diffmode=True)
675
    assert_hist("""
676
677
678
679
680
681
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-03    2.0
""", diffs)

682
    for idate in histts:
683
        with engine.begin() as cn:
684
            idate = idate.replace(tzinfo=pytz.timezone('UTC'))
685
686
            tsh.insert(cn, histts[idate], 'smallserie2',
                       'aurelien.campeas@pythonian.f', _insertion_date=idate)
687
688
689

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
690
    assert_hist_equals(tsh.get_history(engine, 'smallserie2'), histts)
691
692
693
694

    # get history ranges
    tsa = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2))
695
    assert_hist("""
696
697
698
699
700
701
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
702
703
704
705
""", tsa)

    tsb = tsh.get_history(engine, 'smallserie',
                          to_insertion_date=datetime(2017, 2, 2))
706
    assert_hist("""
707
708
709
710
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
711
712
713
714
715
""", tsb)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2),
                          to_insertion_date=datetime(2017, 2, 2))
716
    assert_hist("""
717
718
719
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
720
721
""", tsc)

722
723
724
    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 4),
                          to_insertion_date=datetime(2017, 2, 4))
725
    assert tsc == {}
726
727
728
729

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2017, 2, 2))
730
    assert_hist("""
731
732
733
734
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
735
736
737
738
739
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2016, 12, 31))
740
    assert tsc == {}
741

742
743
744
745
    # restrictions on value dates
    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 1),
                          to_value_date=datetime(2017, 1, 2))
746
    assert_hist("""
747
748
749
750
751
752
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
753
754
755
756
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 2))
757
    assert_hist("""
758
759
760
761
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-02    1.0
                           2017-01-03    2.0
762
763
764
765
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          to_value_date=datetime(2017, 1, 2))
766
    assert_hist("""
767
768
769
770
771
772
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
773
774
""", tsc)

775
776
777
    tsc = tsh.get_history(engine, 'no-such-series')
    assert tsc is None

778

779
780
781
782
783
784
785
def test_history_delta(engine, tsh):
    for d in range(1, 3):
        idate = utcdt(2018, 1, d)
        serie = genserie(idate - timedelta(hours=1), 'H', 6, initval=[d])
        tsh.insert(engine, serie, 'hd', 'aurelien.campeas@pythonian.fr',
                   _insertion_date=idate)

786
    assert_hist("""
787
insertion_date             value_date               
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
                           2018-01-02 03:00:00+00:00    2.0
                           2018-01-02 04:00:00+00:00    2.0
    """, tsh.get_history(engine, 'hd'))

808
    assert_hist("""
809
insertion_date             value_date               
810
811
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
812
813
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
814
815
816
817
818
819
820
821
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
822
823
824
825
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd', deltaafter=timedelta(hours=2)))

826
    assert_hist("""
827
insertion_date             value_date               
828
2018-01-01 00:00:00+00:00  2018-01-01 00:00:00+00:00    1.0
829
                           2018-01-01 01:00:00+00:00    1.0
830
2018-01-02 00:00:00+00:00  2018-01-02 00:00:00+00:00    2.0
831
832
                           2018-01-02 01:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd',
833
                      deltabefore=timedelta(hours=0),
834
835
836
                      deltaafter=timedelta(hours=1)))


837
838
839
840
841
842
843
844
845
846
847
def test_nr_gethistory(engine, tsh):
    s0 = pd.Series([-1, 0, 0, -1],
                   index=pd.DatetimeIndex(start=datetime(2016, 12, 29),
                                          end=datetime(2017, 1, 1),
                                          freq='D'))
    tsh.insert(engine, s0, 'foo', 'zogzog')

    s1 = pd.Series([1, 0, 0, 1],
                   index=pd.DatetimeIndex(start=datetime(2017, 1, 1),
                                          end=datetime(2017, 1, 4),
                                          freq='D'))
848
    idate = utcdt(2016, 1, 1)
849
    for i in range(5):
850
        with engine.begin() as cn:
851
852
853
            tsh.insert(cn, s1 * i, 'foo',
                       'aurelien.campeas@pythonian.f',
                       _insertion_date=idate + timedelta(days=i))
854
855
856
857
858
859
860

    df = tsh.get_history(engine, 'foo',
                         datetime(2016, 1, 3),
                         datetime(2016, 1, 4),
                         datetime(2017, 1, 1),
                         datetime(2017, 1, 4))

861
    assert_hist("""
862
863
864
865
866
867
868
869
870
insertion_date             value_date
2016-01-03 00:00:00+00:00  2017-01-01    2.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    2.0
2016-01-04 00:00:00+00:00  2017-01-01    3.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    3.0
871
872
873
""", df)


874
def test_add_na(engine, tsh):
875
876
877
878
879
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

880
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
881
    assert diff is None
882
883
884

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
885
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
886
887
888
889
890

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

891
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
892
893
    assert diff is None

894
    result = tsh.get(engine, 'ts_add_na')
895
    assert len(result) == 5
896
897


898
def test_dtype_mismatch(engine, tsh):
899
    tsh.insert(engine,
900
901
902
903
904
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
905
        tsh.insert(engine,
906
907
908
909
910
911
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

912
    tsh.insert(engine,
913
914
915
916
917
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
918
        tsh.insert(engine,
919
920
921
922
923
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)
924
925


926
927
928
929
930
931
def test_precision(engine, tsh):
    floaty = 0.123456789123456789
    ts = genserie(datetime(2015, 1, 1), 'D', 5, initval=[floaty])

    tsh.insert(engine, ts, 'precision', 'test')
    ts_round = tsh.get(engine, 'precision')
932
    assert 0.12345678912345678 == ts_round.iloc[0]
933
934

    diff = tsh.insert(engine, ts_round, 'precision', 'test')
Aurélien Campéas's avatar
Aurélien Campéas committed
935
    assert diff is None  # the roundtriped series does not produce a diff when reinserted
936

Aurélien Campéas's avatar
Aurélien Campéas committed
937
    diff = tsh.insert(engine, ts, 'precision', 'test')  # neither does the original series
938
939
940
    assert diff is None


941
942
943
944
945
946
947
948
def test_serie_deletion(engine, tsh):
    ts = genserie(datetime(2018, 1, 10), 'H', 10)
    tsh.insert(engine, ts, 'keepme', 'Babar')
    tsh.insert(engine, ts, 'deleteme', 'Celeste')
    ts = genserie(datetime(2018, 1, 12), 'H', 10)
    tsh.insert(engine, ts, 'keepme', 'Babar')
    tsh.insert(engine, ts, 'deleteme', 'Celeste')

949
    seriecount, csetcount, csetseriecount = assert_structures(engine, tsh)
950

951
    with engine.begin() as cn:
952
953
954
955
956
957
958
        tsh.delete(cn, 'deleteme')

    assert not tsh.exists(engine, 'deleteme')
    log = [entry['author']
           for entry in tsh.log(engine, names=('keepme', 'deleteme'))]
    assert log == ['Babar', 'Babar']

959
960
961
962
    seriecount2, csetcount2, csetseriecount2 = assert_structures(engine, tsh)

    assert csetcount - csetcount2  == 2
    assert csetseriecount - csetseriecount2 == 2
963
964
965
966
967
968
    assert seriecount - seriecount2 == 1

    with pytest.raises(AssertionError) as werr:
        tsh.delete(engine, 'keepme')
    assert werr.value.args[0] == 'use a transaction object'

969
    tsh.insert(engine, ts, 'deleteme', 'Celeste')
970

971

972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
def test_strip(engine, tsh):
    for i in range(1, 5):
        pubdate = utcdt(2017, 1, i)
        ts = genserie(datetime(2017, 1, 10), 'H', 1 + i)
        tsh.insert(engine, ts, 'xserie', 'babar', _insertion_date=pubdate)
        # also insert something completely unrelated
        tsh.insert(engine, genserie(datetime(2018, 1, 1), 'D', 1 + i), 'yserie', 'celeste')

    csida = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    assert csida is not None
    csidb = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='before')
    csidc = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='after')
    assert csidb < csida < csidc

    log = tsh.log(engine, names=['xserie', 'yserie'])
    assert [(idx, l['author']) for idx, l in enumerate(log, start=1)
    ] == [
        (1, 'babar'),
        (2, 'celeste'),
        (3, 'babar'),
        (4, 'celeste'),
        (5, 'babar'),
        (6, 'celeste'),
        (7, 'babar'),
        (8, 'celeste')
    ]

    h = tsh.get_history(engine, 'xserie')
1000
    assert_hist("""
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
2017-01-03 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
2017-01-04 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
                           2017-01-10 04:00:00    4.0
""", h)

    csid = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
1019
    with engine.begin() as cn:
1020
1021
        tsh.strip(cn, 'xserie', csid)

1022
    assert_hist("""
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032