test_tsio.py 55.3 KB
Newer Older
1
from datetime import datetime, timedelta
2
3
from pathlib import Path
import pytz
4

5
from dateutil import parser
6
import pytest
7
8
import numpy as np
import pandas as pd
9

10
from tshistory.snapshot import Snapshot
11
from tshistory.util import threadpool
12
from tshistory.tsio import timeseries
13
14
from tshistory.testutil import (
    assert_df,
15
16
    assert_hist,
    assert_hist_equals,
17
    assert_group_equals,
18
    assert_structures,
19
20
21
    genserie,
    tempattr
)
22

23
DATADIR = Path(__file__).parent / 'data'
24

Aurélien Campéas's avatar
Aurélien Campéas committed
25

26
27
28
29
def utcdt(*dt):
    return pd.Timestamp(datetime(*dt), tz='UTC')


30
def test_in_tx(tsh, engine):
31
32
    assert tsh.type(engine, 'foo') == 'primary'

33
    with pytest.raises(TypeError) as err:
34
        tsh.insert(engine.connect(), 0, 0, 0)
35
36
37
38
39
40
41
42
    assert err.value.args[0] == 'You must use a transaction object'

    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
    with engine.begin() as cn:
        tsh.insert(cn, ts, 'test_tx', 'Babar')


43
def test_tstamp_roundtrip(engine, tsh):
44
    assert_structures(engine, tsh)
45
46
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
47
48
49
50
51
52
53
54
55
56
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

57
58
    tsh.insert(engine, ts, 'tztest', 'Babar',
               _insertion_date=utcdt(2018, 1, 1))
59
    back = tsh.get(engine, 'tztest')
60
61
62

    # though un localized we understand it's been normalized to utc
    assert_df("""
63
64
65
66
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    2.0
2017-10-29 02:00:00+00:00    3.0
67
68
69
""", back)

    assert (ts.index == back.index).all()
70
    assert str(back.index.dtype) == 'datetime64[ns, UTC]'
71

72
73
74
75
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 02:00:00+0000', tz='UTC')

76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
    ts = genserie(datetime(2017, 10, 29, 1),
                  'H', 4, tz='UTC')
    ts.index = ts.index.tz_convert('Europe/Paris')
    tsh.insert(engine, ts, 'tztest', 'Celeste',
                   _insertion_date=utcdt(2018, 1, 3))

    ts = tsh.get(engine, 'tztest')
    assert_df("""
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    0.0
2017-10-29 02:00:00+00:00    1.0
2017-10-29 03:00:00+00:00    2.0
2017-10-29 04:00:00+00:00    3.0
""", ts)

92
    hist = tsh.history(engine, 'tztest')
93
94
95
96
97
98
99
100
101
102
103
104
105
106
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
                           2017-10-29 04:00:00+00:00    3.0
""", hist)

107
108
109
    hist = tsh.history(engine, 'tztest',
                       from_value_date=utcdt(2017, 10, 29, 1),
                       to_value_date=utcdt(2017, 10, 29, 3))
110
111
112
113
114
115
116
117
118
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
""", hist)

119
120
121
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 04:00:00+0000', tz='UTC')
122
    assert_structures(engine, tsh)
123

124

125
def test_differential(engine, tsh):
126
    assert_structures(engine, tsh)
127
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
128
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
129

130
    id1 = tsh.last_id(engine, 'ts_test')
131
    assert tsh._previous_cset(engine, 'ts_test', id1) is None
132

133
134
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
135

136
137
138
139
140
141
142
143
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 10, 0, 0),
        closed='both'
    )

    with pytest.raises(ValueError):
        assert tsh.interval(engine, 'nosuchts')

144
    fetched = tsh.get(engine, 'ts_test')
145
    assert_df("""
146
147
148
149
150
151
152
153
154
155
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
156
157
""", fetched)
    assert fetched.name == 'ts_test'
158
159

    # we should detect the emission of a message
160
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
161

162
    assert_df("""
163
164
165
166
167
168
169
170
171
172
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
173
""", tsh.get(engine, 'ts_test'))
174
175
176
177

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
178
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
179
    id2 = tsh.last_id(engine, 'ts_test')
180
    assert tsh._previous_cset(engine, 'ts_test', id2) == id1
181

182
    assert_df("""
183
184
185
186
187
188
189
190
191
192
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
193
""", tsh.get(engine, 'ts_test'))
194

195
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
196
197
198
199
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

200
    with engine.begin() as cn:
201
        tsh.insert(cn, ts_longer, 'ts_test', 'test')
202
203
204
    id3 = tsh.last_id(engine, 'ts_test')

    assert id1 < id2 < id3
205

206
    assert_df("""
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
224
""", tsh.get(engine, 'ts_test'))
225

226
227
228
229
230
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 17, 0, 0),
        closed='both'
    )

231
    # insert single data, in override of previous one
232
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
233
    ts_begin.loc['2010-01-04'] = -1
234
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
235

236
    assert_df("""
237
238
239
240
241
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
242
""", tsh.get(engine, 'ts_mixte'))
243

244
    # add new series with one additional values
245
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
246
    ts_more.loc['2010-01-04'] = -1
247
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
248

249
    assert_df("""
250
251
252
253
254
255
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
256
""", tsh.get(engine, 'ts_mixte'))
257
258

    # just append an extra data point
259
260
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
261
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
262

263
    assert_df("""
264
265
266
267
268
269
270
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
271
""", tsh.get(engine, 'ts_mixte'))
272

273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
    assert_df("""
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
    """, tsh.get(engine, 'ts_mixte',
                 from_value_date=datetime(2010, 1, 3),
                 to_value_date=datetime(2010, 1, 6)
    ))

    assert_df("""
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
        """, tsh.get(engine, 'ts_mixte',
                     from_value_date=datetime(2010, 1, 4)
    ))

    assert_df("""
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
            """, tsh.get(engine, 'ts_mixte',
                         to_value_date=datetime(2010, 1, 5)
        ))

302
    with engine.begin() as cn:
303
        cn.execute(
304
            f'set search_path to "{tsh.namespace}.timeserie", "{tsh.namespace}", public'
305
        )
306
307
        allts = pd.read_sql("select seriename, table_name from registry "
                            "where seriename in ('ts_test', 'ts_mixte')",
308
                            cn)
309

310
        assert_df("""
311
312
313
seriename table_name
0   ts_test    ts_test
1  ts_mixte   ts_mixte
314
""".format(tsh.namespace), allts)
315

316
        assert_df("""
317
318
319
320
321
322
323
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
324
""", tsh.get(cn, 'ts_mixte',
325
             revision_date=datetime.now()))
326

327
328
    assert_structures(engine, tsh)

329

330
331
332
333
334
335
def test_serie_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
    tsh.insert(engine, serie, 'ts-metadata', 'babar')

    initialmeta = tsh.metadata(engine, 'ts-metadata')
    assert initialmeta == {
336
        'index_dtype': '<M8[ns]',
337
        'index_type': 'datetime64[ns]',
338
339
340
        'tzaware': False,
        'value_dtype': '<f8',
        'value_type': 'float64'
341
342
343
344
345
346
347
348
349
350
351
    }

    tsh.update_metadata(engine, 'ts-metadata',
                        {'topic': 'banana spot price'}
    )
    assert tsh.metadata(engine, 'ts-metadata')['topic'] == 'banana spot price'

    with pytest.raises(AssertionError):
        tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True})

    assert tsh.metadata(engine, 'ts-metadata') == {
352
353
354
        'index_dtype': '<M8[ns]',
        'index_type': 'datetime64[ns]',
        'topic': 'banana spot price',
355
        'tzaware': False,
356
357
        'value_dtype': '<f8',
        'value_type': 'float64'
358
    }
359

360

361
362
def test_changeset_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
363
    tsh.insert(engine, serie, 'ts-cs-metadata', 'babar',
364
365
               {'foo': 'A', 'bar': 42})

366
    log = tsh.log(engine, names=['ts-cs-metadata'])
367
368
369
    meta = tsh.changeset_metadata(engine, log[0]['rev'])
    assert meta == {'foo': 'A', 'bar': 42}

370
371
372
    log = tsh.log(engine, names=['ts-cs-metadata'], limit=1)
    assert len(log) == 1

373

374
def test_revision_date(engine, tsh):
375
    for i in range(1, 5):
376
        with engine.begin() as cn:
377
378
            tsh.insert(cn, genserie(datetime(2017, 1, i), 'D', 3, [i]), 'revdate',
                       'test', _insertion_date=utcdt(2016, 1, i))
379
380

    # end of prologue, now some real meat
381
    idate0 = pd.Timestamp('2015-1-1 00:00:00', tz='UTC')
382
383
384
385
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [0], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate0)
    assert idate0 == tsh.latest_insertion_date(engine, 'ts_through_time')
386

387
    idate1 = pd.Timestamp('2015-1-1 15:45:23', tz='UTC')
388
389
390
391
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate1)
    assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
392

393
    idate2 = pd.Timestamp('2015-1-2 15:43:23', tz='UTC')
394
395
396
397
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate2)
    assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
398

399
    idate3 = pd.Timestamp('2015-1-3', tz='UTC')
400
401
402
403
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate3)
    assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
404

405
    ts = tsh.get(engine, 'ts_through_time')
406

407
    assert_df("""
408
409
410
411
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
412
""", ts)
413

414
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
415
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
416

417
    assert_df("""
418
419
420
421
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
422
""", ts)
423

424
    ts = tsh.get(engine, 'ts_through_time',
425
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
426

427
    assert_df("""
428
429
430
431
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
432
""", ts)
433

434
    ts = tsh.get(engine, 'ts_through_time',
435
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
436
437
438

    assert ts is None

439
440
441
442
443
444
445
446
447
448
    # epilogue: back to the revdate issue
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
2017-01-03    3.0
2017-01-04    4.0
2017-01-05    4.0
2017-01-06    4.0
""", tsh.get(engine, 'revdate'))

449
    oldstate = tsh.get(engine, 'revdate', revision_date=datetime(2016, 1, 2))
450
451
452
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
453
454
455
2017-01-03    2.0
2017-01-04    2.0
""", oldstate)
456

457

458
def test_point_deletion(engine, tsh):
459
460
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
461
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
462

463
    _, ts = Snapshot(engine, tsh, 'ts_del').find()
464
    assert ts.iloc[-2] == 8.0
465

466
    ts_begin.iloc[0] = np.nan
467
    ts_begin.iloc[3] = np.nan
468

469
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
470

471
    assert_df("""
472
473
474
475
476
477
478
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
479
2010-01-10    9.0
480
""", tsh.get(engine, 'ts_del'))
481

482
    ts2 = tsh.get(engine, 'ts_del',
483
484
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
485
    assert (tsh.get(engine, 'ts_del') == ts2).all()
486

487
488
489
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

490
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
491

492
    assert_df("""
493
494
495
496
497
498
499
500
501
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
502
2010-01-10     9.0
503
""", tsh.get(engine, 'ts_del'))
504
505
506

    # now with string!

507
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
508
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
509
510
511
512

    ts_string[4] = None
    ts_string[5] = None

513
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
514
    assert_df("""
515
516
517
518
519
520
521
522
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
523
""", tsh.get(engine, 'ts_string_del'))
524
525
526
527

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

528
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
529
    assert_df("""
530
531
532
533
534
535
536
537
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
538
2010-01-10    machin
539
""", tsh.get(engine, 'ts_string_del'))
540

541
    ts_string[ts_string.index] = np.nan
542
543
    with pytest.raises(ValueError):
        tsh.insert(engine, ts_string, 'ts_string_del', 'test')
544

545

546
547
def test_nan_first(engine, tsh):
    # first insertion with only nan
548
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
549
    assert tsh.insert(engine, ts_begin, 'ts_null', 'test') is None
550

551

552
def test_more_point_deletion(engine, tsh):
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

571
572
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
573
574
    assert diff is None

575
    # there is no difference
576
    assert 0 == len(tsh.diff(ts_repushed, ts_repushed))
577
578
579
580
581

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
582
    diff = tsh.diff(ts_repushed, ts_add)
583
584
585
586
587
588

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
589
2010-01-13    12.0""", diff.sort_index())
590
591
592
593
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
594

Aurélien Campéas's avatar
Aurélien Campéas committed
595
    # full erasing
596
597
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
598
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
599

Aurélien Campéas's avatar
Aurélien Campéas committed
600
    ts_begin.iloc[:] = np.nan
601
602
    with pytest.raises(ValueError):
        tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
603
604

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
605
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
606
607
608
609

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
610
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
611

612
613
    ts_begin = pd.Series([np.nan] * 4, name='ts_full_del_str',
                         index=ts_begin.index)
614
615
616

    with pytest.raises(ValueError):
        tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
617
618

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
619
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
620

Aurélien Campéas's avatar
Aurélien Campéas committed
621

622
def test_deletion_over_horizon(engine, tsh):
623
    idate = utcdt(2018, 2, 1)
624
625
626
627
628
629
    ts = pd.Series(
        [1, 2, 3],
        index=pd.date_range(datetime(2018, 1, 1), freq='D', periods=3)
    )

    name = 'delete_over_hz'
630
631
    tsh.insert(engine, ts, name, 'Babar',
               _insertion_date=idate)
632
633
634
635
636
637

    ts = pd.Series(
        [np.nan, np.nan, np.nan],
        index=pd.date_range(datetime(2018, 1, 3), freq='D', periods=3)
    )

638
639
    tsh.insert(engine, ts, name, 'Celeste',
               _insertion_date=idate.replace(day=2))
640
641
    ival = tsh.interval(engine, name)
    assert ival.left == datetime(2018, 1, 1)
642
    assert ival.right == datetime(2018, 1, 2)
643
644
645
646
647

    ts = pd.Series(
        [np.nan, np.nan, np.nan],
        index=pd.date_range(datetime(2017, 12, 30), freq='D', periods=3)
    )
648
649
    tsh.insert(engine, ts, name, 'Arthur',
               _insertion_date=idate.replace(day=3))
650
    ival = tsh.interval(engine, name)
651
652
    assert ival.left == datetime(2018, 1, 2)
    assert ival.right == datetime(2018, 1, 2)
653
654


655
def test_history(engine, tsh):
656
    for numserie in (1, 2, 3):
657
        with engine.begin() as cn:
658
659
660
            tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie',
                       'aurelien.campeas@pythonian.fr',
                       _insertion_date=utcdt(2017, 2, numserie))
661
662
663
664
665
666
667
668
669
670
671

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
672
         'meta': {},
673
         'date': pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
674
         'name': 'smallserie'
675
676
        },
        {'author': 'aurelien.campeas@pythonian.fr',
677
         'meta': {},
678
         'date': pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
679
         'name': 'smallserie'
680
681
        },
        {'author': 'aurelien.campeas@pythonian.fr',
682
         'meta': {},
683
         'date': pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC'),
684
         'name': 'smallserie'
685
686
687
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
688
    histts = tsh.history(engine, 'smallserie')
689

690
    assert_hist("""
691
692
693
694
695
696
697
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
698
699
""", histts)

700
    diffs = tsh.history(engine, 'smallserie', diffmode=True)
701
    assert_hist("""
702
703
704
705
706
707
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-03    2.0
""", diffs)

708
    for idate in histts:
709
        with engine.begin() as cn:
710
            idate = idate.replace(tzinfo=pytz.timezone('UTC'))
711
712
            tsh.insert(cn, histts[idate], 'smallserie2',
                       'aurelien.campeas@pythonian.f', _insertion_date=idate)
713
714
715

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
716
    assert_hist_equals(tsh.history(engine, 'smallserie2'), histts)
717
718

    # get history ranges
719
720
    tsa = tsh.history(engine, 'smallserie',
                      from_insertion_date=datetime(2017, 2, 2))
721
    assert_hist("""
722
723
724
725
726
727
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
728
729
""", tsa)

730
731
    tsb = tsh.history(engine, 'smallserie',
                      to_insertion_date=datetime(2017, 2, 2))
732
    assert_hist("""
733
734
735
736
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
737
738
""", tsb)

739
740
741
    tsc = tsh.history(engine, 'smallserie',
                      from_insertion_date=datetime(2017, 2, 2),
                      to_insertion_date=datetime(2017, 2, 2))
742
    assert_hist("""
743
744
745
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
746
747
""", tsc)

748
749
750
    tsc = tsh.history(engine, 'smallserie',
                      from_insertion_date=datetime(2017, 2, 4),
                      to_insertion_date=datetime(2017, 2, 4))
751
    assert tsc == {}
752

753
754
755
    tsc = tsh.history(engine, 'smallserie',
                      from_insertion_date=datetime(2016, 2, 1),
                      to_insertion_date=datetime(2017, 2, 2))
756
    assert_hist("""
757
758
759
760
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
761
762
""", tsc)

763
764
765
    tsc = tsh.history(engine, 'smallserie',
                      from_insertion_date=datetime(2016, 2, 1),
                      to_insertion_date=datetime(2016, 12, 31))
766
    assert tsc == {}
767

768
    # restrictions on value dates
769
770
771
    tsc = tsh.history(engine, 'smallserie',
                      from_value_date=datetime(2017, 1, 1),
                      to_value_date=datetime(2017, 1, 2))
772
    assert_hist("""
773
774
775
776
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
777
778
""", tsc)

779
780
    tsc = tsh.history(engine, 'smallserie',
                      from_value_date=datetime(2017, 1, 2))
781
    assert_hist("""
782
783
784
785
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-02    1.0
                           2017-01-03    2.0
786
787
""", tsc)

788
789
    tsc = tsh.history(engine, 'smallserie',
                      to_value_date=datetime(2017, 1, 2))
790
    assert_hist("""
791
792
793
794
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
795
796
""", tsc)

797
    tsc = tsh.history(engine, 'no-such-series')
798
799
    assert tsc is None

800
801
802
803
804
805
    idates = tsh.insertion_dates(engine, 'smallserie')
    assert idates == [
        pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
        pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
        pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC')
    ]
806
807
808
809
810
811
812
813
    idates = tsh.insertion_dates(
        engine, 'smallserie',
        fromdate=utcdt(2017, 2, 2),
        todate=utcdt(2017, 2, 2)
    )
    assert idates == [
        pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
    ]
814
815


816

817
818
819
820
821
822
823
def test_history_delta(engine, tsh):
    for d in range(1, 3):
        idate = utcdt(2018, 1, d)
        serie = genserie(idate - timedelta(hours=1), 'H', 6, initval=[d])
        tsh.insert(engine, serie, 'hd', 'aurelien.campeas@pythonian.fr',
                   _insertion_date=idate)

824
    assert_hist("""
825
insertion_date             value_date               
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
                           2018-01-02 03:00:00+00:00    2.0
                           2018-01-02 04:00:00+00:00    2.0
844
    """, tsh.history(engine, 'hd'))
845

846
    assert_hist("""
847
insertion_date             value_date               
848
849
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
850
851
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
852
853
854
855
856
857
858
859
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
860
861
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
862
""",  tsh.history(engine, 'hd', deltaafter=timedelta(hours=2)))
863

864
    assert_hist("""
865
insertion_date             value_date               
866
2018-01-01 00:00:00+00:00  2018-01-01 00:00:00+00:00    1.0
867
                           2018-01-01 01:00:00+00:00    1.0
868
2018-01-02 00:00:00+00:00  2018-01-02 00:00:00+00:00    2.0
869
                           2018-01-02 01:00:00+00:00    2.0
870
871
872
""",  tsh.history(engine, 'hd',
                  deltabefore=timedelta(hours=0),
                  deltaafter=timedelta(hours=1)))
873
874


875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
def test_delta_na(engine, tsh):
    ldates = (
        utcdt(2015, 1, 20),
        utcdt(2015, 1, 21),
        utcdt(2015, 1, 22)
    )

    for idx, idate in enumerate(ldates):
        ts = pd.Series([idx] * 3, index = ldates)
        tsh.insert(engine, ts, 'without_na', 'arnaud',
                   _insertion_date=idate)

    assert_df("""
2015-01-20 00:00:00+00:00    2.0
2015-01-21 00:00:00+00:00    2.0
2015-01-22 00:00:00+00:00    2.0
""", tsh.get(engine, 'without_na'))

    assert_df("""
2015-01-20 00:00:00+00:00    0.0
2015-01-21 00:00:00+00:00    1.0
2015-01-22 00:00:00+00:00    2.0
897
    """, tsh.staircase(engine, 'without_na', delta=timedelta(hours=0)))
898
899
900
901
902
903
904
905
906
907
908
909
910
    #as expected

    assert_hist("""
insertion_date             value_date               
2015-01-20 00:00:00+00:00  2015-01-20 00:00:00+00:00    0.0
                           2015-01-21 00:00:00+00:00    0.0
                           2015-01-22 00:00:00+00:00    0.0
2015-01-21 00:00:00+00:00  2015-01-20 00:00:00+00:00    1.0
                           2015-01-21 00:00:00+00:00    1.0
                           2015-01-22 00:00:00+00:00    1.0
2015-01-22 00:00:00+00:00  2015-01-20 00:00:00+00:00    2.0
                           2015-01-21 00:00:00+00:00    2.0
                           2015-01-22 00:00:00+00:00    2.0
911
    """, tsh.history(engine, 'without_na'))
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932

    # now, the last insertion has Na as last value
    ldates = (
        utcdt(2015, 1, 20),
        utcdt(2015, 1, 21),
        utcdt(2015, 1, 22)
    )

    for idx, idate in enumerate(ldates):
        serie = pd.Series([float(idx)] * 3, index = ldates)
        if idx == 2:
            serie[-1] = np.nan
        tsh.insert(engine, serie, 'with_na', 'arnaud',
                   _insertion_date=idate)

    # the value at 2015-01-22 is hidden by the inserted nan
    assert_df("""
2015-01-20 00:00:00+00:00    2.0
2015-01-21 00:00:00+00:00    2.0
    """, tsh.get(engine, 'with_na'))

933
    # the last value is also correctly hidden
934
935
936
    assert_df("""
2015-01-20 00:00:00+00:00    0.0
2015-01-21 00:00:00+00:00    1.0
937
    """, tsh.staircase(engine, 'with_na', delta=timedelta(hours=0)))
938

939
    # the value gathered by staircase at value date 2015-01-22 is a
940
    # nan, so it masks the previous ones at the same date
941
942
943
944
945
946
947
948
949
950
    assert_hist("""
insertion_date             value_date               
2015-01-20 00:00:00+00:00  2015-01-20 00:00:00+00:00    0.0
                           2015-01-21 00:00:00+00:00    0.0
                           2015-01-22 00:00:00+00:00    0.0
2015-01-21 00:00:00+00:00  2015-01-20 00:00:00+00:00    1.0
                           2015-01-21 00:00:00+00:00    1.0
                           2015-01-22 00:00:00+00:00    1.0
2015-01-22 00:00:00+00:00  2015-01-20 00:00:00+00:00    2.0
                           2015-01-21 00:00:00+00:00    2.0
951
                           2015-01-22 00:00:00+00:00    NaN
952
        """, tsh.history(engine, 'with_na', _keep_nans=True))
953
954


955
956
def test_nr_gethistory(engine, tsh):
    s0 = pd.Series([-1, 0, 0, -1],
957
958
959
                   index=pd.date_range(start=datetime(2016, 12, 29),
                                       end=datetime(2017, 1, 1),
                                       freq='D'))
960
961
962
    tsh.insert(engine, s0, 'foo', 'zogzog')

    s1 = pd.Series([1, 0, 0, 1],
963
964
965
                   index=pd.date_range(start=datetime(2017, 1, 1),
                                       end=datetime(2017, 1, 4),
                                       freq='D'))
966
    idate = utcdt(2016, 1, 1)
967
    for i in range(5):
968
        with engine.begin() as cn:
969
970
971
            tsh.insert(cn, s1 * i, 'foo',
                       'aurelien.campeas@pythonian.f',
                       _insertion_date=idate + timedelta(days=i))
972

973
974
975
976
977
    df = tsh.history(engine, 'foo',
                     datetime(2016, 1, 3),
                     datetime(2016, 1, 4),
                     datetime(2017, 1, 1),
                     datetime(2017, 1, 4))
978

979
    assert_hist("""
980
981
982
983
984
985
986
987
988
insertion_date             value_date
2016-01-03 00:00:00+00:00  2017-01-01    2.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    2.0
2016-01-04 00:00:00+00:00  2017-01-01    3.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    3.0
989
990
991
""", df)


992
def test_add_na(engine, tsh):
993
994
995
996
997
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

998
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
999
    assert diff is None
1000
1001
1002

    # in case of insertion in existing data
    ts_begin = genserie(datetime(