test_tsio.py 50.7 KB
Newer Older
1
from datetime import datetime, timedelta
2
3
from pathlib import Path
import pytz
4

5
from dateutil import parser
6
import pytest
7
8
import numpy as np
import pandas as pd
9

10
from tshistory.snapshot import Snapshot
11
12
from tshistory.util import rename_series, threadpool
from tshistory.tsio import TimeSerie
13
14
from tshistory.testutil import (
    assert_df,
15
16
    assert_hist,
    assert_hist_equals,
17
    assert_group_equals,
18
    assert_structures,
19
20
21
    genserie,
    tempattr
)
22

23
DATADIR = Path(__file__).parent / 'data'
24

Aurélien Campéas's avatar
Aurélien Campéas committed
25

26
27
28
29
def utcdt(*dt):
    return pd.Timestamp(datetime(*dt), tz='UTC')


30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def test_in_tx(engine):
    tsh = TimeSerie()

    with pytest.raises(TypeError) as err:
        tsh.insert(engine, 0, 0, 0)
    assert err.value.args[0] == 'You must use a transaction object'

    with engine.connect() as cn:
        with pytest.raises(TypeError) as err:
            tsh.insert(cn, 0, 0, 0)
    assert err.value.args[0] == 'You must use a transaction object'

    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
    with engine.begin() as cn:
        tsh.insert(cn, ts, 'test_tx', 'Babar')


48
def test_tstamp_roundtrip(engine, tsh):
49
    assert_structures(engine, tsh)
50
51
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
52
53
54
55
56
57
58
59
60
61
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

62
63
    tsh.insert(engine, ts, 'tztest', 'Babar',
               _insertion_date=utcdt(2018, 1, 1))
64
    back = tsh.get(engine, 'tztest')
65
66
67

    # though un localized we understand it's been normalized to utc
    assert_df("""
68
69
70
71
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    2.0
2017-10-29 02:00:00+00:00    3.0
72
73
74
""", back)

    assert (ts.index == back.index).all()
75
    assert str(back.index.dtype) == 'datetime64[ns, UTC]'
76

77
78
79
80
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 02:00:00+0000', tz='UTC')

81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
    ts = genserie(datetime(2017, 10, 29, 1),
                  'H', 4, tz='UTC')
    ts.index = ts.index.tz_convert('Europe/Paris')
    tsh.insert(engine, ts, 'tztest', 'Celeste',
                   _insertion_date=utcdt(2018, 1, 3))

    ts = tsh.get(engine, 'tztest')
    assert_df("""
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    0.0
2017-10-29 02:00:00+00:00    1.0
2017-10-29 03:00:00+00:00    2.0
2017-10-29 04:00:00+00:00    3.0
""", ts)

    hist = tsh.get_history(engine, 'tztest')
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
                           2017-10-29 04:00:00+00:00    3.0
""", hist)

    hist = tsh.get_history(engine, 'tztest',
                           from_value_date=utcdt(2017, 10, 29, 1),
                           to_value_date=utcdt(2017, 10, 29, 3))
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
""", hist)

124
125
126
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 04:00:00+0000', tz='UTC')
127
    assert_structures(engine, tsh)
128

129

130
def test_differential(engine, tsh):
131
    assert_structures(engine, tsh)
132
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
133
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
134

135
    id1 = tsh.last_id(engine, 'ts_test')
136
    assert tsh._previous_cset(engine, 'ts_test', id1) is None
137

138
139
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
140

141
142
143
144
145
146
147
148
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 10, 0, 0),
        closed='both'
    )

    with pytest.raises(ValueError):
        assert tsh.interval(engine, 'nosuchts')

149
    fetched = tsh.get(engine, 'ts_test')
150
    assert_df("""
151
152
153
154
155
156
157
158
159
160
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
161
162
""", fetched)
    assert fetched.name == 'ts_test'
163
164

    # we should detect the emission of a message
165
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
166

167
    assert_df("""
168
169
170
171
172
173
174
175
176
177
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
178
""", tsh.get(engine, 'ts_test'))
179
180
181
182

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
183
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
184
    id2 = tsh.last_id(engine, 'ts_test')
185
    assert tsh._previous_cset(engine, 'ts_test', id2) == id1
186

187
    assert_df("""
188
189
190
191
192
193
194
195
196
197
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
198
""", tsh.get(engine, 'ts_test'))
199

200
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
201
202
203
204
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

205
    with engine.begin() as cn:
206
        tsh.insert(cn, ts_longer, 'ts_test', 'test')
207
208
209
    id3 = tsh.last_id(engine, 'ts_test')

    assert id1 < id2 < id3
210

211
    assert_df("""
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
229
""", tsh.get(engine, 'ts_test'))
230

231
232
233
234
235
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 17, 0, 0),
        closed='both'
    )

236
    # start testing manual overrides
237
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
238
    ts_begin.loc['2010-01-04'] = -1
239
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
240
241

    # -1 represents bogus upstream data
242
    assert_df("""
243
244
245
246
247
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
248
""", tsh.get(engine, 'ts_mixte'))
249
250

    # refresh all the period + 1 extra data point
251
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
252
    ts_more.loc['2010-01-04'] = -1
253
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
254

255
    assert_df("""
256
257
258
259
260
261
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
262
""", tsh.get(engine, 'ts_mixte'))
263
264

    # just append an extra data point
265
266
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
267
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
268

269
    assert_df("""
270
271
272
273
274
275
276
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
277
""", tsh.get(engine, 'ts_mixte'))
278

279
    with engine.begin() as cn:
280
        cn.execute('set search_path to "{0}.timeserie", {0}, public'.format(tsh.namespace))
281
282
        allts = pd.read_sql("select seriename, table_name from registry "
                            "where seriename in ('ts_test', 'ts_mixte')",
283
                            cn)
284

285
        assert_df("""
286
287
288
seriename table_name
0   ts_test    ts_test
1  ts_mixte   ts_mixte
289
""".format(tsh.namespace), allts)
290

291
        assert_df("""
292
293
294
295
296
297
298
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
299
""", tsh.get(cn, 'ts_mixte',
300
             revision_date=datetime.now()))
301

302
303
    assert_structures(engine, tsh)

304

305
306
307
308
309
310
def test_serie_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
    tsh.insert(engine, serie, 'ts-metadata', 'babar')

    initialmeta = tsh.metadata(engine, 'ts-metadata')
    assert initialmeta == {
311
312
        'index_dtype': '<M8[ns]',
        'index_names': [],
313
        'index_type': 'datetime64[ns]',
314
315
316
        'tzaware': False,
        'value_dtype': '<f8',
        'value_type': 'float64'
317
318
319
320
321
322
323
324
325
326
327
    }

    tsh.update_metadata(engine, 'ts-metadata',
                        {'topic': 'banana spot price'}
    )
    assert tsh.metadata(engine, 'ts-metadata')['topic'] == 'banana spot price'

    with pytest.raises(AssertionError):
        tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True})

    assert tsh.metadata(engine, 'ts-metadata') == {
328
        'index_dtype': '<M8[ns]',
329
        'index_names': [],
330
331
        'index_type': 'datetime64[ns]',
        'topic': 'banana spot price',
332
        'tzaware': False,
333
334
        'value_dtype': '<f8',
        'value_type': 'float64'
335
    }
336

337

338
339
def test_changeset_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
340
    tsh.insert(engine, serie, 'ts-cs-metadata', 'babar',
341
342
               {'foo': 'A', 'bar': 42})

343
    log = tsh.log(engine, names=['ts-cs-metadata'])
344
345
346
347
    meta = tsh.changeset_metadata(engine, log[0]['rev'])
    assert meta == {'foo': 'A', 'bar': 42}


348
def test_revision_date(engine, tsh):
349
    for i in range(1, 5):
350
        with engine.begin() as cn:
351
352
            tsh.insert(cn, genserie(datetime(2017, 1, i), 'D', 3, [i]), 'revdate',
                       'test', _insertion_date=utcdt(2016, 1, i))
353
354

    # end of prologue, now some real meat
355
    idate0 = pd.Timestamp('2015-1-1 00:00:00', tz='UTC')
356
357
358
359
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [0], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate0)
    assert idate0 == tsh.latest_insertion_date(engine, 'ts_through_time')
360

361
    idate1 = pd.Timestamp('2015-1-1 15:45:23', tz='UTC')
362
363
364
365
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate1)
    assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
366

367
    idate2 = pd.Timestamp('2015-1-2 15:43:23', tz='UTC')
368
369
370
371
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate2)
    assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
372

373
    idate3 = pd.Timestamp('2015-1-3', tz='UTC')
374
375
376
377
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate3)
    assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
378

379
    ts = tsh.get(engine, 'ts_through_time')
380

381
    assert_df("""
382
383
384
385
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
386
""", ts)
387

388
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
389
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
390

391
    assert_df("""
392
393
394
395
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
396
""", ts)
397

398
    ts = tsh.get(engine, 'ts_through_time',
399
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
400

401
    assert_df("""
402
403
404
405
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
406
""", ts)
407

408
    ts = tsh.get(engine, 'ts_through_time',
409
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
410
411
412

    assert ts is None

413
414
415
416
417
418
419
420
421
422
    # epilogue: back to the revdate issue
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
2017-01-03    3.0
2017-01-04    4.0
2017-01-05    4.0
2017-01-06    4.0
""", tsh.get(engine, 'revdate'))

423
    oldstate = tsh.get(engine, 'revdate', revision_date=datetime(2016, 1, 2))
424
425
426
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
427
428
429
2017-01-03    2.0
2017-01-04    2.0
""", oldstate)
430

431

432
def test_point_deletion(engine, tsh):
433
434
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
435
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
436

437
    _, ts = Snapshot(engine, tsh, 'ts_del').find()
438
    assert ts.iloc[-2] == 8.0
439

440
    ts_begin.iloc[0] = np.nan
441
    ts_begin.iloc[3] = np.nan
442

443
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
444

445
    assert_df("""
446
447
448
449
450
451
452
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
453
2010-01-10    9.0
454
""", tsh.get(engine, 'ts_del'))
455

456
    ts2 = tsh.get(engine, 'ts_del',
457
458
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
459
    assert (tsh.get(engine, 'ts_del') == ts2).all()
460

461
462
463
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

464
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
465

466
    assert_df("""
467
468
469
470
471
472
473
474
475
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
476
2010-01-10     9.0
477
""", tsh.get(engine, 'ts_del'))
478
479
480

    # now with string!

481
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
482
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
483
484
485
486

    ts_string[4] = None
    ts_string[5] = None

487
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
488
    assert_df("""
489
490
491
492
493
494
495
496
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
497
""", tsh.get(engine, 'ts_string_del'))
498
499
500
501

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

502
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
503
    assert_df("""
504
505
506
507
508
509
510
511
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
512
2010-01-10    machin
513
""", tsh.get(engine, 'ts_string_del'))
514

515
    ts_string[ts_string.index] = np.nan
516
517
    with pytest.raises(ValueError):
        tsh.insert(engine, ts_string, 'ts_string_del', 'test')
518

519

520
521
def test_nan_first(engine, tsh):
    # first insertion with only nan
522
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
523
    assert tsh.insert(engine, ts_begin, 'ts_null', 'test') is None
524

525

526
def test_more_point_deletion(engine, tsh):
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

545
546
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
547
548
    assert diff is None

549
    # there is no difference
550
    assert 0 == len(tsh.diff(ts_repushed, ts_repushed))
551
552
553
554
555

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
556
    diff = tsh.diff(ts_repushed, ts_add)
557
558
559
560
561
562

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
563
2010-01-13    12.0""", diff.sort_index())
564
565
566
567
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
568

Aurélien Campéas's avatar
Aurélien Campéas committed
569
    # full erasing
570
571
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
572
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
573

Aurélien Campéas's avatar
Aurélien Campéas committed
574
    ts_begin.iloc[:] = np.nan
575
576
    with pytest.raises(ValueError):
        tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
577
578

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
579
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
580
581
582
583

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
584
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
585

586
587
    ts_begin = pd.Series([np.nan] * 4, name='ts_full_del_str',
                         index=ts_begin.index)
588
589
590

    with pytest.raises(ValueError):
        tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
591
592

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
593
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
594

Aurélien Campéas's avatar
Aurélien Campéas committed
595

596
def test_deletion_over_horizon(engine, tsh):
597
    idate = utcdt(2018, 2, 1)
598
599
600
601
602
603
    ts = pd.Series(
        [1, 2, 3],
        index=pd.date_range(datetime(2018, 1, 1), freq='D', periods=3)
    )

    name = 'delete_over_hz'
604
605
    tsh.insert(engine, ts, name, 'Babar',
               _insertion_date=idate)
606
607
608
609
610
611

    ts = pd.Series(
        [np.nan, np.nan, np.nan],
        index=pd.date_range(datetime(2018, 1, 3), freq='D', periods=3)
    )

612
613
    tsh.insert(engine, ts, name, 'Celeste',
               _insertion_date=idate.replace(day=2))
614
615
    ival = tsh.interval(engine, name)
    assert ival.left == datetime(2018, 1, 1)
616
    assert ival.right == datetime(2018, 1, 2)
617
618
619
620
621

    ts = pd.Series(
        [np.nan, np.nan, np.nan],
        index=pd.date_range(datetime(2017, 12, 30), freq='D', periods=3)
    )
622
623
    tsh.insert(engine, ts, name, 'Arthur',
               _insertion_date=idate.replace(day=3))
624
    ival = tsh.interval(engine, name)
625
626
    assert ival.left == datetime(2018, 1, 2)
    assert ival.right == datetime(2018, 1, 2)
627
628


629
def test_get_history(engine, tsh):
630
    for numserie in (1, 2, 3):
631
        with engine.begin() as cn:
632
633
634
            tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie',
                       'aurelien.campeas@pythonian.fr',
                       _insertion_date=utcdt(2017, 2, numserie))
635
636
637
638
639
640
641
642
643
644
645

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
646
         'meta': {},
647
         'date': pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
648
649
650
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
651
         'meta': {},
652
         'date': pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
653
654
655
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
656
         'meta': {},
657
         'date': pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC'),
658
659
660
661
662
663
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')

664
    assert_hist("""
665
666
667
668
669
670
671
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
672
673
""", histts)

674
    diffs = tsh.get_history(engine, 'smallserie', diffmode=True)
675
    assert_hist("""
676
677
678
679
680
681
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-03    2.0
""", diffs)

682
    for idate in histts:
683
        with engine.begin() as cn:
684
            idate = idate.replace(tzinfo=pytz.timezone('UTC'))
685
686
            tsh.insert(cn, histts[idate], 'smallserie2',
                       'aurelien.campeas@pythonian.f', _insertion_date=idate)
687
688
689

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
690
    assert_hist_equals(tsh.get_history(engine, 'smallserie2'), histts)
691
692
693
694

    # get history ranges
    tsa = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2))
695
    assert_hist("""
696
697
698
699
700
701
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
702
703
704
705
""", tsa)

    tsb = tsh.get_history(engine, 'smallserie',
                          to_insertion_date=datetime(2017, 2, 2))
706
    assert_hist("""
707
708
709
710
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
711
712
713
714
715
""", tsb)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2),
                          to_insertion_date=datetime(2017, 2, 2))
716
    assert_hist("""
717
718
719
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
720
721
""", tsc)

722
723
724
    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 4),
                          to_insertion_date=datetime(2017, 2, 4))
725
    assert tsc == {}
726
727
728
729

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2017, 2, 2))
730
    assert_hist("""
731
732
733
734
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
735
736
737
738
739
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2016, 12, 31))
740
    assert tsc == {}
741

742
743
744
745
    # restrictions on value dates
    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 1),
                          to_value_date=datetime(2017, 1, 2))
746
    assert_hist("""
747
748
749
750
751
752
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
753
754
755
756
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 2))
757
    assert_hist("""
758
759
760
761
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-02    1.0
                           2017-01-03    2.0
762
763
764
765
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          to_value_date=datetime(2017, 1, 2))
766
    assert_hist("""
767
768
769
770
771
772
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
773
774
""", tsc)

775
776
777
    tsc = tsh.get_history(engine, 'no-such-series')
    assert tsc is None

778
779
780
781
782
783
784
785
    idates = tsh.insertion_dates(engine, 'smallserie')
    assert idates == [
        pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
        pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
        pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC')
    ]


786

787
788
789
790
791
792
793
def test_history_delta(engine, tsh):
    for d in range(1, 3):
        idate = utcdt(2018, 1, d)
        serie = genserie(idate - timedelta(hours=1), 'H', 6, initval=[d])
        tsh.insert(engine, serie, 'hd', 'aurelien.campeas@pythonian.fr',
                   _insertion_date=idate)

794
    assert_hist("""
795
insertion_date             value_date               
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
                           2018-01-02 03:00:00+00:00    2.0
                           2018-01-02 04:00:00+00:00    2.0
    """, tsh.get_history(engine, 'hd'))

816
    assert_hist("""
817
insertion_date             value_date               
818
819
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
820
821
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
822
823
824
825
826
827
828
829
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
830
831
832
833
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd', deltaafter=timedelta(hours=2)))

834
    assert_hist("""
835
insertion_date             value_date               
836
2018-01-01 00:00:00+00:00  2018-01-01 00:00:00+00:00    1.0
837
                           2018-01-01 01:00:00+00:00    1.0
838
2018-01-02 00:00:00+00:00  2018-01-02 00:00:00+00:00    2.0
839
840
                           2018-01-02 01:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd',
841
                      deltabefore=timedelta(hours=0),
842
843
844
                      deltaafter=timedelta(hours=1)))


845
846
847
848
849
850
851
852
853
854
855
def test_nr_gethistory(engine, tsh):
    s0 = pd.Series([-1, 0, 0, -1],
                   index=pd.DatetimeIndex(start=datetime(2016, 12, 29),
                                          end=datetime(2017, 1, 1),
                                          freq='D'))
    tsh.insert(engine, s0, 'foo', 'zogzog')

    s1 = pd.Series([1, 0, 0, 1],
                   index=pd.DatetimeIndex(start=datetime(2017, 1, 1),
                                          end=datetime(2017, 1, 4),
                                          freq='D'))
856
    idate = utcdt(2016, 1, 1)
857
    for i in range(5):
858
        with engine.begin() as cn:
859
860
861
            tsh.insert(cn, s1 * i, 'foo',
                       'aurelien.campeas@pythonian.f',
                       _insertion_date=idate + timedelta(days=i))
862
863
864
865
866
867
868

    df = tsh.get_history(engine, 'foo',
                         datetime(2016, 1, 3),
                         datetime(2016, 1, 4),
                         datetime(2017, 1, 1),
                         datetime(2017, 1, 4))

869
    assert_hist("""
870
871
872
873
874
875
876
877
878
insertion_date             value_date
2016-01-03 00:00:00+00:00  2017-01-01    2.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    2.0
2016-01-04 00:00:00+00:00  2017-01-01    3.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    3.0
879
880
881
""", df)


882
def test_add_na(engine, tsh):
883
884
885
886
887
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

888
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
889
    assert diff is None
890
891
892

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
893
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
894
895
896
897
898

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

899
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
900
901
    assert diff is None

902
    result = tsh.get(engine, 'ts_add_na')
903
    assert len(result) == 5
904
905


906
def test_dtype_mismatch(engine, tsh):
907
    tsh.insert(engine,
908
909
910
911
912
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
913
        tsh.insert(engine,
914
915
916
917
918
919
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

920
    tsh.insert(engine,
921
922
923
924
925
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
926
        tsh.insert(engine,
927
928
929
930
931
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)
932
933


934
935
936
937
938
939
def test_precision(engine, tsh):
    floaty = 0.123456789123456789
    ts = genserie(datetime(2015, 1, 1), 'D', 5, initval=[floaty])

    tsh.insert(engine, ts, 'precision', 'test')
    ts_round = tsh.get(engine, 'precision')
940
    assert 0.12345678912345678 == ts_round.iloc[0]
941
942

    diff = tsh.insert(engine, ts_round, 'precision', 'test')
Aurélien Campéas's avatar
Aurélien Campéas committed
943
    assert diff is None  # the roundtriped series does not produce a diff when reinserted
944

Aurélien Campéas's avatar
Aurélien Campéas committed
945
    diff = tsh.insert(engine, ts, 'precision', 'test')  # neither does the original series
946
947
948
    assert diff is None


949
950
951
952
953
954
955
956
def test_serie_deletion(engine, tsh):
    ts = genserie(datetime(2018, 1, 10), 'H', 10)
    tsh.insert(engine, ts, 'keepme', 'Babar')
    tsh.insert(engine, ts, 'deleteme', 'Celeste')
    ts = genserie(datetime(2018, 1, 12), 'H', 10)
    tsh.insert(engine, ts, 'keepme', 'Babar')
    tsh.insert(engine, ts, 'deleteme', 'Celeste')

957
    seriecount, csetcount, csetseriecount = assert_structures(engine, tsh)
958

959
    with engine.begin() as cn:
960
961
962
963
964
965
966
        tsh.delete(cn, 'deleteme')

    assert not tsh.exists(engine, 'deleteme')
    log = [entry['author']
           for entry in tsh.log(engine, names=('keepme', 'deleteme'))]
    assert log == ['Babar', 'Babar']

967
968
969
970
    seriecount2, csetcount2, csetseriecount2 = assert_structures(engine, tsh)

    assert csetcount - csetcount2  == 2
    assert csetseriecount - csetseriecount2 == 2
971
972
973
974
975
976
    assert seriecount - seriecount2 == 1

    with pytest.raises(AssertionError) as werr:
        tsh.delete(engine, 'keepme')
    assert werr.value.args[0] == 'use a transaction object'

977
    tsh.insert(engine, ts, 'deleteme', 'Celeste')
978

979

980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
def test_strip(engine, tsh):
    for i in range(1, 5):
        pubdate = utcdt(2017, 1, i)
        ts = genserie(datetime(2017, 1, 10), 'H', 1 + i)
        tsh.insert(engine, ts, 'xserie', 'babar', _insertion_date=pubdate)
        # also insert something completely unrelated
        tsh.insert(engine, genserie(datetime(2018, 1, 1), 'D', 1 + i), 'yserie', 'celeste')

    csida = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    assert csida is not None
    csidb = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='before')
    csidc = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='after')
    assert csidb < csida < csidc

    log = tsh.log(engine, names=['xserie', 'yserie'])
    assert [(idx, l['author']) for idx, l in enumerate(log, start=1)
    ] == [
        (1, 'babar'),
        (2, 'celeste'),
        (3, 'babar'),
        (4, 'celeste'),
        (5, 'babar'),
        (6, 'celeste'),
        (7, 'babar'),
        (8, 'celeste')
    ]

    h = tsh.get_history(engine, 'xserie')
1008
    assert_hist("""
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
2017-01-03 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
2017-01-04 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
                           2017-01-10 04:00:00    4.0
""", h)

    csid = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
1027
    with engine.begin() as cn:
1028
1029
        tsh.strip(cn, 'xserie', csid)