test_tsio.py 49.9 KB
Newer Older
1
from datetime import datetime, timedelta
2
3
from pathlib import Path
import pytz
4

5
from dateutil import parser
6
import pytest
7
8
import numpy as np
import pandas as pd
9

10
from tshistory.snapshot import Snapshot
11
12
from tshistory.util import rename_series, threadpool
from tshistory.tsio import TimeSerie
13
14
from tshistory.testutil import (
    assert_df,
15
16
    assert_hist,
    assert_hist_equals,
17
    assert_group_equals,
18
    assert_structures,
19
20
21
    genserie,
    tempattr
)
22

23
DATADIR = Path(__file__).parent / 'data'
24

Aurélien Campéas's avatar
Aurélien Campéas committed
25

26
27
28
29
def utcdt(*dt):
    return pd.Timestamp(datetime(*dt), tz='UTC')


30
def test_tstamp_roundtrip(engine, tsh):
31
    assert_structures(engine, tsh)
32
33
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
34
35
36
37
38
39
40
41
42
43
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

44
45
    tsh.insert(engine, ts, 'tztest', 'Babar',
               _insertion_date=utcdt(2018, 1, 1))
46
    back = tsh.get(engine, 'tztest')
47
48
49

    # though un localized we understand it's been normalized to utc
    assert_df("""
50
51
52
53
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    2.0
2017-10-29 02:00:00+00:00    3.0
54
55
56
""", back)

    assert (ts.index == back.index).all()
57
    assert str(back.index.dtype) == 'datetime64[ns, UTC]'
58

59
60
61
62
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 02:00:00+0000', tz='UTC')

63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
    ts = genserie(datetime(2017, 10, 29, 1),
                  'H', 4, tz='UTC')
    ts.index = ts.index.tz_convert('Europe/Paris')
    tsh.insert(engine, ts, 'tztest', 'Celeste',
                   _insertion_date=utcdt(2018, 1, 3))

    ts = tsh.get(engine, 'tztest')
    assert_df("""
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    0.0
2017-10-29 02:00:00+00:00    1.0
2017-10-29 03:00:00+00:00    2.0
2017-10-29 04:00:00+00:00    3.0
""", ts)

    hist = tsh.get_history(engine, 'tztest')
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
                           2017-10-29 04:00:00+00:00    3.0
""", hist)

    hist = tsh.get_history(engine, 'tztest',
                           from_value_date=utcdt(2017, 10, 29, 1),
                           to_value_date=utcdt(2017, 10, 29, 3))
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
""", hist)

106
107
108
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 04:00:00+0000', tz='UTC')
109
    assert_structures(engine, tsh)
110

111

112
def test_differential(engine, tsh):
113
    assert_structures(engine, tsh)
114
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
115
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
116

117
    id1 = tsh.last_id(engine, 'ts_test')
118
    assert tsh._previous_cset(engine, 'ts_test', id1) is None
119

120
121
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
122

123
124
125
126
127
128
129
130
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 10, 0, 0),
        closed='both'
    )

    with pytest.raises(ValueError):
        assert tsh.interval(engine, 'nosuchts')

131
    fetched = tsh.get(engine, 'ts_test')
132
    assert_df("""
133
134
135
136
137
138
139
140
141
142
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
143
144
""", fetched)
    assert fetched.name == 'ts_test'
145
146

    # we should detect the emission of a message
147
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
148

149
    assert_df("""
150
151
152
153
154
155
156
157
158
159
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
160
""", tsh.get(engine, 'ts_test'))
161
162
163
164

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
165
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
166
    id2 = tsh.last_id(engine, 'ts_test')
167
    assert tsh._previous_cset(engine, 'ts_test', id2) == id1
168

169
    assert_df("""
170
171
172
173
174
175
176
177
178
179
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
180
""", tsh.get(engine, 'ts_test'))
181

182
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
183
184
185
186
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

187
    with engine.begin() as cn:
188
        tsh.insert(cn, ts_longer, 'ts_test', 'test')
189
190
191
    id3 = tsh.last_id(engine, 'ts_test')

    assert id1 < id2 < id3
192

193
    assert_df("""
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
211
""", tsh.get(engine, 'ts_test'))
212

213
214
215
216
217
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 17, 0, 0),
        closed='both'
    )

218
    # start testing manual overrides
219
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
220
    ts_begin.loc['2010-01-04'] = -1
221
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
222
223

    # -1 represents bogus upstream data
224
    assert_df("""
225
226
227
228
229
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
230
""", tsh.get(engine, 'ts_mixte'))
231
232

    # refresh all the period + 1 extra data point
233
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
234
    ts_more.loc['2010-01-04'] = -1
235
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
236

237
    assert_df("""
238
239
240
241
242
243
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
244
""", tsh.get(engine, 'ts_mixte'))
245
246

    # just append an extra data point
247
248
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
249
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
250

251
    assert_df("""
252
253
254
255
256
257
258
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
259
""", tsh.get(engine, 'ts_mixte'))
260

261
    with engine.begin() as cn:
262
        cn.execute('set search_path to "{0}.timeserie", {0}, public'.format(tsh.namespace))
263
264
        allts = pd.read_sql("select seriename, table_name from registry "
                            "where seriename in ('ts_test', 'ts_mixte')",
265
                            cn)
266

267
        assert_df("""
268
269
270
seriename table_name
0   ts_test    ts_test
1  ts_mixte   ts_mixte
271
""".format(tsh.namespace), allts)
272

273
        assert_df("""
274
275
276
277
278
279
280
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
281
""", tsh.get(cn, 'ts_mixte',
282
             revision_date=datetime.now()))
283

284
285
    assert_structures(engine, tsh)

286

287
288
289
290
291
292
def test_serie_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
    tsh.insert(engine, serie, 'ts-metadata', 'babar')

    initialmeta = tsh.metadata(engine, 'ts-metadata')
    assert initialmeta == {
293
294
        'index_dtype': '<M8[ns]',
        'index_names': [],
295
        'index_type': 'datetime64[ns]',
296
297
298
        'tzaware': False,
        'value_dtype': '<f8',
        'value_type': 'float64'
299
300
301
302
303
304
305
306
307
308
309
310
    }

    tsh.update_metadata(engine, 'ts-metadata',
                        {'topic': 'banana spot price'}
    )
    assert tsh.metadata(engine, 'ts-metadata')['topic'] == 'banana spot price'

    with pytest.raises(AssertionError):
        tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True})

    tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True}, internal=True)
    assert tsh.metadata(engine, 'ts-metadata') == {
311
        'index_dtype': '<M8[ns]',
312
        'index_names': [],
313
314
315
316
317
        'index_type': 'datetime64[ns]',
        'topic': 'banana spot price',
        'tzaware': True,
        'value_dtype': '<f8',
        'value_type': 'float64'
318
    }
319

320
321
322
323
    # unbreak the serie for the second test pass :o
    tsh.update_metadata(engine, 'ts-metadata', initialmeta, internal=True)


324
325
def test_changeset_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
326
    tsh.insert(engine, serie, 'ts-cs-metadata', 'babar',
327
328
               {'foo': 'A', 'bar': 42})

329
    log = tsh.log(engine, names=['ts-cs-metadata'])
330
331
332
333
    meta = tsh.changeset_metadata(engine, log[0]['rev'])
    assert meta == {'foo': 'A', 'bar': 42}


334
def test_revision_date(engine, tsh):
335
    for i in range(1, 5):
336
        with engine.begin() as cn:
337
338
            tsh.insert(cn, genserie(datetime(2017, 1, i), 'D', 3, [i]), 'revdate',
                       'test', _insertion_date=utcdt(2016, 1, i))
339
340

    # end of prologue, now some real meat
341
    idate0 = pd.Timestamp('2015-1-1 00:00:00', tz='UTC')
342
343
344
345
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [0], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate0)
    assert idate0 == tsh.latest_insertion_date(engine, 'ts_through_time')
346

347
    idate1 = pd.Timestamp('2015-1-1 15:45:23', tz='UTC')
348
349
350
351
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate1)
    assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
352

353
    idate2 = pd.Timestamp('2015-1-2 15:43:23', tz='UTC')
354
355
356
357
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate2)
    assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
358

359
    idate3 = pd.Timestamp('2015-1-3', tz='UTC')
360
361
362
363
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate3)
    assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
364

365
    ts = tsh.get(engine, 'ts_through_time')
366

367
    assert_df("""
368
369
370
371
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
372
""", ts)
373

374
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
375
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
376

377
    assert_df("""
378
379
380
381
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
382
""", ts)
383

384
    ts = tsh.get(engine, 'ts_through_time',
385
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
386

387
    assert_df("""
388
389
390
391
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
392
""", ts)
393

394
    ts = tsh.get(engine, 'ts_through_time',
395
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
396
397
398

    assert ts is None

399
400
401
402
403
404
405
406
407
408
    # epilogue: back to the revdate issue
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
2017-01-03    3.0
2017-01-04    4.0
2017-01-05    4.0
2017-01-06    4.0
""", tsh.get(engine, 'revdate'))

409
    oldstate = tsh.get(engine, 'revdate', revision_date=datetime(2016, 1, 2))
410
411
412
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
413
414
415
2017-01-03    2.0
2017-01-04    2.0
""", oldstate)
416

417

418
def test_point_deletion(engine, tsh):
419
420
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
421
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
422

423
    _, ts = Snapshot(engine, tsh, 'ts_del').find()
424
    assert ts.iloc[-2] == 8.0
425

426
    ts_begin.iloc[0] = np.nan
427
    ts_begin.iloc[3] = np.nan
428

429
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
430

431
    assert_df("""
432
433
434
435
436
437
438
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
439
2010-01-10    9.0
440
""", tsh.get(engine, 'ts_del'))
441

442
    ts2 = tsh.get(engine, 'ts_del',
443
444
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
445
    assert (tsh.get(engine, 'ts_del') == ts2).all()
446

447
448
449
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

450
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
451

452
    assert_df("""
453
454
455
456
457
458
459
460
461
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
462
2010-01-10     9.0
463
""", tsh.get(engine, 'ts_del'))
464
465
466

    # now with string!

467
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
468
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
469
470
471
472

    ts_string[4] = None
    ts_string[5] = None

473
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
474
    assert_df("""
475
476
477
478
479
480
481
482
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
483
""", tsh.get(engine, 'ts_string_del'))
484
485
486
487

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

488
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
489
    assert_df("""
490
491
492
493
494
495
496
497
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
498
2010-01-10    machin
499
""", tsh.get(engine, 'ts_string_del'))
500

501
    ts_string[ts_string.index] = np.nan
502
503
    with pytest.raises(ValueError):
        tsh.insert(engine, ts_string, 'ts_string_del', 'test')
504

505

506
507
def test_nan_first(engine, tsh):
    # first insertion with only nan
508
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
509
    assert tsh.insert(engine, ts_begin, 'ts_null', 'test') is None
510

511

512
def test_more_point_deletion(engine, tsh):
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

531
532
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
533
534
    assert diff is None

535
    # there is no difference
536
    assert 0 == len(tsh.diff(ts_repushed, ts_repushed))
537
538
539
540
541

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
542
    diff = tsh.diff(ts_repushed, ts_add)
543
544
545
546
547
548

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
549
2010-01-13    12.0""", diff.sort_index())
550
551
552
553
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
554

Aurélien Campéas's avatar
Aurélien Campéas committed
555
    # full erasing
556
557
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
558
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
559

Aurélien Campéas's avatar
Aurélien Campéas committed
560
    ts_begin.iloc[:] = np.nan
561
562
    with pytest.raises(ValueError):
        tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
563
564

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
565
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
566
567
568
569

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
570
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
571

572
573
    ts_begin = pd.Series([np.nan] * 4, name='ts_full_del_str',
                         index=ts_begin.index)
574
575
576

    with pytest.raises(ValueError):
        tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
577
578

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
579
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
580

Aurélien Campéas's avatar
Aurélien Campéas committed
581

582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
def test_deletion_over_horizon(engine, tsh):
    ts = pd.Series(
        [1, 2, 3],
        index=pd.date_range(datetime(2018, 1, 1), freq='D', periods=3)
    )

    name = 'delete_over_hz'
    tsh.insert(engine, ts, name, 'Babar')

    ts = pd.Series(
        [np.nan, np.nan, np.nan],
        index=pd.date_range(datetime(2018, 1, 3), freq='D', periods=3)
    )

    tsh.insert(engine, ts, name, 'Celeste')
    ival = tsh.interval(engine, name)
    assert ival.left == datetime(2018, 1, 1)
599
    assert ival.right == datetime(2018, 1, 2)
600
601
602
603
604
605
606

    ts = pd.Series(
        [np.nan, np.nan, np.nan],
        index=pd.date_range(datetime(2017, 12, 30), freq='D', periods=3)
    )
    tsh.insert(engine, ts, name, 'Arthur')
    ival = tsh.interval(engine, name)
607
608
    assert ival.left == datetime(2018, 1, 2)
    assert ival.right == datetime(2018, 1, 2)
609
610


611
def test_get_history(engine, tsh):
612
    for numserie in (1, 2, 3):
613
        with engine.begin() as cn:
614
615
616
            tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie',
                       'aurelien.campeas@pythonian.fr',
                       _insertion_date=utcdt(2017, 2, numserie))
617
618
619
620
621
622
623
624
625
626
627

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
628
         'meta': {},
629
         'date': pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
630
631
632
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
633
         'meta': {},
634
         'date': pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
635
636
637
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
638
         'meta': {},
639
         'date': pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC'),
640
641
642
643
644
645
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')

646
    assert_hist("""
647
648
649
650
651
652
653
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
654
655
""", histts)

656
    diffs = tsh.get_history(engine, 'smallserie', diffmode=True)
657
    assert_hist("""
658
659
660
661
662
663
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-03    2.0
""", diffs)

664
    for idate in histts:
665
        with engine.begin() as cn:
666
            idate = idate.replace(tzinfo=pytz.timezone('UTC'))
667
668
            tsh.insert(cn, histts[idate], 'smallserie2',
                       'aurelien.campeas@pythonian.f', _insertion_date=idate)
669
670
671

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
672
    assert_hist_equals(tsh.get_history(engine, 'smallserie2'), histts)
673
674
675
676

    # get history ranges
    tsa = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2))
677
    assert_hist("""
678
679
680
681
682
683
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
684
685
686
687
""", tsa)

    tsb = tsh.get_history(engine, 'smallserie',
                          to_insertion_date=datetime(2017, 2, 2))
688
    assert_hist("""
689
690
691
692
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
693
694
695
696
697
""", tsb)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2),
                          to_insertion_date=datetime(2017, 2, 2))
698
    assert_hist("""
699
700
701
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
702
703
""", tsc)

704
705
706
    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 4),
                          to_insertion_date=datetime(2017, 2, 4))
707
    assert tsc == {}
708
709
710
711

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2017, 2, 2))
712
    assert_hist("""
713
714
715
716
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
717
718
719
720
721
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2016, 12, 31))
722
    assert tsc == {}
723

724
725
726
727
    # restrictions on value dates
    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 1),
                          to_value_date=datetime(2017, 1, 2))
728
    assert_hist("""
729
730
731
732
733
734
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
735
736
737
738
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 2))
739
    assert_hist("""
740
741
742
743
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-02    1.0
                           2017-01-03    2.0
744
745
746
747
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          to_value_date=datetime(2017, 1, 2))
748
    assert_hist("""
749
750
751
752
753
754
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
755
756
""", tsc)

757
758
759
    tsc = tsh.get_history(engine, 'no-such-series')
    assert tsc is None

760

761
762
763
764
765
766
767
def test_history_delta(engine, tsh):
    for d in range(1, 3):
        idate = utcdt(2018, 1, d)
        serie = genserie(idate - timedelta(hours=1), 'H', 6, initval=[d])
        tsh.insert(engine, serie, 'hd', 'aurelien.campeas@pythonian.fr',
                   _insertion_date=idate)

768
    assert_hist("""
769
insertion_date             value_date               
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
                           2018-01-02 03:00:00+00:00    2.0
                           2018-01-02 04:00:00+00:00    2.0
    """, tsh.get_history(engine, 'hd'))

790
    assert_hist("""
791
insertion_date             value_date               
792
793
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
794
795
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
796
797
798
799
800
801
802
803
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
804
805
806
807
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd', deltaafter=timedelta(hours=2)))

808
    assert_hist("""
809
insertion_date             value_date               
810
2018-01-01 00:00:00+00:00  2018-01-01 00:00:00+00:00    1.0
811
                           2018-01-01 01:00:00+00:00    1.0
812
2018-01-02 00:00:00+00:00  2018-01-02 00:00:00+00:00    2.0
813
814
                           2018-01-02 01:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd',
815
                      deltabefore=timedelta(hours=0),
816
817
818
                      deltaafter=timedelta(hours=1)))


819
820
821
822
823
824
825
826
827
828
829
def test_nr_gethistory(engine, tsh):
    s0 = pd.Series([-1, 0, 0, -1],
                   index=pd.DatetimeIndex(start=datetime(2016, 12, 29),
                                          end=datetime(2017, 1, 1),
                                          freq='D'))
    tsh.insert(engine, s0, 'foo', 'zogzog')

    s1 = pd.Series([1, 0, 0, 1],
                   index=pd.DatetimeIndex(start=datetime(2017, 1, 1),
                                          end=datetime(2017, 1, 4),
                                          freq='D'))
830
    idate = utcdt(2016, 1, 1)
831
    for i in range(5):
832
        with engine.begin() as cn:
833
834
835
            tsh.insert(cn, s1 * i, 'foo',
                       'aurelien.campeas@pythonian.f',
                       _insertion_date=idate + timedelta(days=i))
836
837
838
839
840
841
842

    df = tsh.get_history(engine, 'foo',
                         datetime(2016, 1, 3),
                         datetime(2016, 1, 4),
                         datetime(2017, 1, 1),
                         datetime(2017, 1, 4))

843
    assert_hist("""
844
845
846
847
848
849
850
851
852
insertion_date             value_date
2016-01-03 00:00:00+00:00  2017-01-01    2.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    2.0
2016-01-04 00:00:00+00:00  2017-01-01    3.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    3.0
853
854
855
""", df)


856
def test_add_na(engine, tsh):
857
858
859
860
861
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

862
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
863
    assert diff is None
864
865
866

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
867
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
868
869
870
871
872

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

873
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
874
875
    assert diff is None

876
    result = tsh.get(engine, 'ts_add_na')
877
    assert len(result) == 5
878
879


880
def test_dtype_mismatch(engine, tsh):
881
    tsh.insert(engine,
882
883
884
885
886
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
887
        tsh.insert(engine,
888
889
890
891
892
893
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

894
    tsh.insert(engine,
895
896
897
898
899
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
900
        tsh.insert(engine,
901
902
903
904
905
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)
906
907


908
909
910
911
912
913
def test_precision(engine, tsh):
    floaty = 0.123456789123456789
    ts = genserie(datetime(2015, 1, 1), 'D', 5, initval=[floaty])

    tsh.insert(engine, ts, 'precision', 'test')
    ts_round = tsh.get(engine, 'precision')
914
    assert 0.12345678912345678 == ts_round.iloc[0]
915
916

    diff = tsh.insert(engine, ts_round, 'precision', 'test')
Aurélien Campéas's avatar
Aurélien Campéas committed
917
    assert diff is None  # the roundtriped series does not produce a diff when reinserted
918

Aurélien Campéas's avatar
Aurélien Campéas committed
919
    diff = tsh.insert(engine, ts, 'precision', 'test')  # neither does the original series
920
921
922
    assert diff is None


923
924
925
926
927
928
929
930
def test_serie_deletion(engine, tsh):
    ts = genserie(datetime(2018, 1, 10), 'H', 10)
    tsh.insert(engine, ts, 'keepme', 'Babar')
    tsh.insert(engine, ts, 'deleteme', 'Celeste')
    ts = genserie(datetime(2018, 1, 12), 'H', 10)
    tsh.insert(engine, ts, 'keepme', 'Babar')
    tsh.insert(engine, ts, 'deleteme', 'Celeste')

931
    seriecount, csetcount, csetseriecount = assert_structures(engine, tsh)
932

933
    with engine.begin() as cn:
934
935
936
937
938
939
940
        tsh.delete(cn, 'deleteme')

    assert not tsh.exists(engine, 'deleteme')
    log = [entry['author']
           for entry in tsh.log(engine, names=('keepme', 'deleteme'))]
    assert log == ['Babar', 'Babar']

941
942
943
944
    seriecount2, csetcount2, csetseriecount2 = assert_structures(engine, tsh)

    assert csetcount - csetcount2  == 2
    assert csetseriecount - csetseriecount2 == 2
945
946
947
948
949
950
    assert seriecount - seriecount2 == 1

    with pytest.raises(AssertionError) as werr:
        tsh.delete(engine, 'keepme')
    assert werr.value.args[0] == 'use a transaction object'

951
    tsh.insert(engine, ts, 'deleteme', 'Celeste')
952

953

954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
def test_strip(engine, tsh):
    for i in range(1, 5):
        pubdate = utcdt(2017, 1, i)
        ts = genserie(datetime(2017, 1, 10), 'H', 1 + i)
        tsh.insert(engine, ts, 'xserie', 'babar', _insertion_date=pubdate)
        # also insert something completely unrelated
        tsh.insert(engine, genserie(datetime(2018, 1, 1), 'D', 1 + i), 'yserie', 'celeste')

    csida = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    assert csida is not None
    csidb = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='before')
    csidc = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='after')
    assert csidb < csida < csidc

    log = tsh.log(engine, names=['xserie', 'yserie'])
    assert [(idx, l['author']) for idx, l in enumerate(log, start=1)
    ] == [
        (1, 'babar'),
        (2, 'celeste'),
        (3, 'babar'),
        (4, 'celeste'),
        (5, 'babar'),
        (6, 'celeste'),
        (7, 'babar'),
        (8, 'celeste')
    ]

    h = tsh.get_history(engine, 'xserie')
982
    assert_hist("""
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
2017-01-03 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
2017-01-04 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
                           2017-01-10 04:00:00    4.0
""", h)

    csid = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
1001
    with engine.begin() as cn:
1002
1003
        tsh.strip(cn, 'xserie', csid)

1004
    assert_hist("""
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
""", tsh.get_history(engine, 'xserie'))

    assert_df("""
2017-01-10 00:00:00    0.0
2017-01-10 01:00:00    1.0
2017-01-10 02:00:00    2.0
""", tsh.get(engine, 'xserie'))

    log = tsh.log(engine, names=['xserie', 'yserie'])
    # 5 and 7 have disappeared
    assert [l['author'] for l in log
    ] == ['babar', 'celeste', 'babar', 'celeste', 'celeste', 'celeste']

    log = tsh.log(engine, stripped=True, names=['xserie', 'yserie'])
1025
1026
1027
1028
1029
1030
    for l in log:
        if l['meta']:
            meta = l['meta']
            stripinfo = meta.get('tshistory.info')
            if stripinfo:
                assert stripinfo.startswith('got stripped from')
1031
1032


1033
1034
1035
def test_long_name(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 40)

1036
1037
1038
    name = 'a' * 64
    tsh.insert(engine, serie, name, 'babar')
    assert tsh.get(engine, name) is not None
1039
1040
1041


def test_get_delta(engine, tsh):
1042
1043
1044
    assert tsh.get_delta(engine, 'no-such-series',
                         delta=pd.Timedelta(days=2)) is None

1045
1046
1047
1048