test_tsio.py 55.5 KB
Newer Older
1
from datetime import datetime, timedelta
2
3
from pathlib import Path
import pytz
4

5
from dateutil import parser
6
import pytest
7
8
import numpy as np
import pandas as pd
9

10
from tshistory.snapshot import Snapshot
11
from tshistory.util import threadpool
12
from tshistory.tsio import timeseries
13
14
from tshistory.testutil import (
    assert_df,
15
16
    assert_hist,
    assert_hist_equals,
17
    assert_group_equals,
18
    assert_structures,
19
20
21
    genserie,
    tempattr
)
22

23
DATADIR = Path(__file__).parent / 'data'
24

Aurélien Campéas's avatar
Aurélien Campéas committed
25

26
27
28
29
def utcdt(*dt):
    return pd.Timestamp(datetime(*dt), tz='UTC')


30
def test_in_tx(tsh, engine):
31
32
    assert tsh.type(engine, 'foo') == 'primary'

33
    with pytest.raises(TypeError) as err:
34
        tsh.insert(engine.connect(), 0, 0, 0)
35
36
37
38
39
40
41
42
    assert err.value.args[0] == 'You must use a transaction object'

    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
    with engine.begin() as cn:
        tsh.insert(cn, ts, 'test_tx', 'Babar')


43
def test_tstamp_roundtrip(engine, tsh):
44
    assert_structures(engine, tsh)
45
46
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
47
48
49
50
51
52
53
54
55
56
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

57
58
    tsh.insert(engine, ts, 'tztest', 'Babar',
               _insertion_date=utcdt(2018, 1, 1))
59
    back = tsh.get(engine, 'tztest')
60
61
62

    # though un localized we understand it's been normalized to utc
    assert_df("""
63
64
65
66
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    2.0
2017-10-29 02:00:00+00:00    3.0
67
68
69
""", back)

    assert (ts.index == back.index).all()
70
    assert str(back.index.dtype) == 'datetime64[ns, UTC]'
71

72
73
74
75
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 02:00:00+0000', tz='UTC')

76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
    ts = genserie(datetime(2017, 10, 29, 1),
                  'H', 4, tz='UTC')
    ts.index = ts.index.tz_convert('Europe/Paris')
    tsh.insert(engine, ts, 'tztest', 'Celeste',
                   _insertion_date=utcdt(2018, 1, 3))

    ts = tsh.get(engine, 'tztest')
    assert_df("""
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    0.0
2017-10-29 02:00:00+00:00    1.0
2017-10-29 03:00:00+00:00    2.0
2017-10-29 04:00:00+00:00    3.0
""", ts)

92
    hist = tsh.history(engine, 'tztest')
93
94
95
96
97
98
99
100
101
102
103
104
105
106
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
                           2017-10-29 04:00:00+00:00    3.0
""", hist)

107
108
109
    hist = tsh.history(engine, 'tztest',
                       from_value_date=utcdt(2017, 10, 29, 1),
                       to_value_date=utcdt(2017, 10, 29, 3))
110
111
112
113
114
115
116
117
118
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
""", hist)

119
120
121
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 04:00:00+0000', tz='UTC')
122
    assert_structures(engine, tsh)
123

124

125
def test_differential(engine, tsh):
126
    assert_structures(engine, tsh)
127
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
128
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
129

130
    id1 = tsh.last_id(engine, 'ts_test')
131
    assert tsh._previous_cset(engine, 'ts_test', id1) is None
132

133
134
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
135

136
137
138
139
140
141
142
143
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 10, 0, 0),
        closed='both'
    )

    with pytest.raises(ValueError):
        assert tsh.interval(engine, 'nosuchts')

144
    fetched = tsh.get(engine, 'ts_test')
145
    assert_df("""
146
147
148
149
150
151
152
153
154
155
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
156
157
""", fetched)
    assert fetched.name == 'ts_test'
158
159

    # we should detect the emission of a message
160
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
161

162
    assert_df("""
163
164
165
166
167
168
169
170
171
172
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
173
""", tsh.get(engine, 'ts_test'))
174
175
176
177

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
178
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
179
    id2 = tsh.last_id(engine, 'ts_test')
180
    assert tsh._previous_cset(engine, 'ts_test', id2) == id1
181

182
    assert_df("""
183
184
185
186
187
188
189
190
191
192
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
193
""", tsh.get(engine, 'ts_test'))
194

195
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
196
197
198
199
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

200
    with engine.begin() as cn:
201
        tsh.insert(cn, ts_longer, 'ts_test', 'test')
202
203
204
    id3 = tsh.last_id(engine, 'ts_test')

    assert id1 < id2 < id3
205

206
    assert_df("""
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
224
""", tsh.get(engine, 'ts_test'))
225

226
227
228
229
230
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 17, 0, 0),
        closed='both'
    )

231
    # insert single data, in override of previous one
232
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
233
    ts_begin.loc['2010-01-04'] = -1
234
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
235

236
    assert_df("""
237
238
239
240
241
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
242
""", tsh.get(engine, 'ts_mixte'))
243

244
    # add new series with one additional values
245
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
246
    ts_more.loc['2010-01-04'] = -1
247
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
248

249
    assert_df("""
250
251
252
253
254
255
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
256
""", tsh.get(engine, 'ts_mixte'))
257
258

    # just append an extra data point
259
260
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
261
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
262

263
    assert_df("""
264
265
266
267
268
269
270
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
271
""", tsh.get(engine, 'ts_mixte'))
272

273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
    assert_df("""
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
    """, tsh.get(engine, 'ts_mixte',
                 from_value_date=datetime(2010, 1, 3),
                 to_value_date=datetime(2010, 1, 6)
    ))

    assert_df("""
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
        """, tsh.get(engine, 'ts_mixte',
                     from_value_date=datetime(2010, 1, 4)
    ))

    assert_df("""
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
            """, tsh.get(engine, 'ts_mixte',
                         to_value_date=datetime(2010, 1, 5)
        ))

302
    with engine.begin() as cn:
303
304
305
        cn.execute(
            f'set search_path to "{tsh.namespace}.timeserie", {tsh.namespace}, public'
        )
306
307
        allts = pd.read_sql("select seriename, table_name from registry "
                            "where seriename in ('ts_test', 'ts_mixte')",
308
                            cn)
309

310
        assert_df("""
311
312
313
seriename table_name
0   ts_test    ts_test
1  ts_mixte   ts_mixte
314
""".format(tsh.namespace), allts)
315

316
        assert_df("""
317
318
319
320
321
322
323
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
324
""", tsh.get(cn, 'ts_mixte',
325
             revision_date=datetime.now()))
326

327
328
    assert_structures(engine, tsh)

329

330
331
332
333
334
335
def test_serie_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
    tsh.insert(engine, serie, 'ts-metadata', 'babar')

    initialmeta = tsh.metadata(engine, 'ts-metadata')
    assert initialmeta == {
336
337
        'index_dtype': '<M8[ns]',
        'index_names': [],
338
        'index_type': 'datetime64[ns]',
339
340
341
        'tzaware': False,
        'value_dtype': '<f8',
        'value_type': 'float64'
342
343
344
345
346
347
348
349
350
351
352
    }

    tsh.update_metadata(engine, 'ts-metadata',
                        {'topic': 'banana spot price'}
    )
    assert tsh.metadata(engine, 'ts-metadata')['topic'] == 'banana spot price'

    with pytest.raises(AssertionError):
        tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True})

    assert tsh.metadata(engine, 'ts-metadata') == {
353
        'index_dtype': '<M8[ns]',
354
        'index_names': [],
355
356
        'index_type': 'datetime64[ns]',
        'topic': 'banana spot price',
357
        'tzaware': False,
358
359
        'value_dtype': '<f8',
        'value_type': 'float64'
360
    }
361

362

363
364
def test_changeset_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
365
    tsh.insert(engine, serie, 'ts-cs-metadata', 'babar',
366
367
               {'foo': 'A', 'bar': 42})

368
    log = tsh.log(engine, names=['ts-cs-metadata'])
369
370
371
372
    meta = tsh.changeset_metadata(engine, log[0]['rev'])
    assert meta == {'foo': 'A', 'bar': 42}


373
def test_revision_date(engine, tsh):
374
    for i in range(1, 5):
375
        with engine.begin() as cn:
376
377
            tsh.insert(cn, genserie(datetime(2017, 1, i), 'D', 3, [i]), 'revdate',
                       'test', _insertion_date=utcdt(2016, 1, i))
378
379

    # end of prologue, now some real meat
380
    idate0 = pd.Timestamp('2015-1-1 00:00:00', tz='UTC')
381
382
383
384
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [0], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate0)
    assert idate0 == tsh.latest_insertion_date(engine, 'ts_through_time')
385

386
    idate1 = pd.Timestamp('2015-1-1 15:45:23', tz='UTC')
387
388
389
390
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate1)
    assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
391

392
    idate2 = pd.Timestamp('2015-1-2 15:43:23', tz='UTC')
393
394
395
396
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate2)
    assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
397

398
    idate3 = pd.Timestamp('2015-1-3', tz='UTC')
399
400
401
402
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate3)
    assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
403

404
    ts = tsh.get(engine, 'ts_through_time')
405

406
    assert_df("""
407
408
409
410
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
411
""", ts)
412

413
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
414
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
415

416
    assert_df("""
417
418
419
420
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
421
""", ts)
422

423
    ts = tsh.get(engine, 'ts_through_time',
424
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
425

426
    assert_df("""
427
428
429
430
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
431
""", ts)
432

433
    ts = tsh.get(engine, 'ts_through_time',
434
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
435
436
437

    assert ts is None

438
439
440
441
442
443
444
445
446
447
    # epilogue: back to the revdate issue
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
2017-01-03    3.0
2017-01-04    4.0
2017-01-05    4.0
2017-01-06    4.0
""", tsh.get(engine, 'revdate'))

448
    oldstate = tsh.get(engine, 'revdate', revision_date=datetime(2016, 1, 2))
449
450
451
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
452
453
454
2017-01-03    2.0
2017-01-04    2.0
""", oldstate)
455

456

457
def test_point_deletion(engine, tsh):
458
459
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
460
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
461

462
    _, ts = Snapshot(engine, tsh, 'ts_del').find()
463
    assert ts.iloc[-2] == 8.0
464

465
    ts_begin.iloc[0] = np.nan
466
    ts_begin.iloc[3] = np.nan
467

468
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
469

470
    assert_df("""
471
472
473
474
475
476
477
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
478
2010-01-10    9.0
479
""", tsh.get(engine, 'ts_del'))
480

481
    ts2 = tsh.get(engine, 'ts_del',
482
483
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
484
    assert (tsh.get(engine, 'ts_del') == ts2).all()
485

486
487
488
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

489
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
490

491
    assert_df("""
492
493
494
495
496
497
498
499
500
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
501
2010-01-10     9.0
502
""", tsh.get(engine, 'ts_del'))
503
504
505

    # now with string!

506
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
507
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
508
509
510
511

    ts_string[4] = None
    ts_string[5] = None

512
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
513
    assert_df("""
514
515
516
517
518
519
520
521
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
522
""", tsh.get(engine, 'ts_string_del'))
523
524
525
526

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

527
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
528
    assert_df("""
529
530
531
532
533
534
535
536
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
537
2010-01-10    machin
538
""", tsh.get(engine, 'ts_string_del'))
539

540
    ts_string[ts_string.index] = np.nan
541
542
    with pytest.raises(ValueError):
        tsh.insert(engine, ts_string, 'ts_string_del', 'test')
543

544

545
546
def test_nan_first(engine, tsh):
    # first insertion with only nan
547
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
548
    assert tsh.insert(engine, ts_begin, 'ts_null', 'test') is None
549

550

551
def test_more_point_deletion(engine, tsh):
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

570
571
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
572
573
    assert diff is None

574
    # there is no difference
575
    assert 0 == len(tsh.diff(ts_repushed, ts_repushed))
576
577
578
579
580

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
581
    diff = tsh.diff(ts_repushed, ts_add)
582
583
584
585
586
587

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
588
2010-01-13    12.0""", diff.sort_index())
589
590
591
592
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
593

Aurélien Campéas's avatar
Aurélien Campéas committed
594
    # full erasing
595
596
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
597
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
598

Aurélien Campéas's avatar
Aurélien Campéas committed
599
    ts_begin.iloc[:] = np.nan
600
601
    with pytest.raises(ValueError):
        tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
602
603

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
604
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
605
606
607
608

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
609
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
610

611
612
    ts_begin = pd.Series([np.nan] * 4, name='ts_full_del_str',
                         index=ts_begin.index)
613
614
615

    with pytest.raises(ValueError):
        tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
616
617

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
618
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
619

Aurélien Campéas's avatar
Aurélien Campéas committed
620

621
def test_deletion_over_horizon(engine, tsh):
622
    idate = utcdt(2018, 2, 1)
623
624
625
626
627
628
    ts = pd.Series(
        [1, 2, 3],
        index=pd.date_range(datetime(2018, 1, 1), freq='D', periods=3)
    )

    name = 'delete_over_hz'
629
630
    tsh.insert(engine, ts, name, 'Babar',
               _insertion_date=idate)
631
632
633
634
635
636

    ts = pd.Series(
        [np.nan, np.nan, np.nan],
        index=pd.date_range(datetime(2018, 1, 3), freq='D', periods=3)
    )

637
638
    tsh.insert(engine, ts, name, 'Celeste',
               _insertion_date=idate.replace(day=2))
639
640
    ival = tsh.interval(engine, name)
    assert ival.left == datetime(2018, 1, 1)
641
    assert ival.right == datetime(2018, 1, 2)
642
643
644
645
646

    ts = pd.Series(
        [np.nan, np.nan, np.nan],
        index=pd.date_range(datetime(2017, 12, 30), freq='D', periods=3)
    )
647
648
    tsh.insert(engine, ts, name, 'Arthur',
               _insertion_date=idate.replace(day=3))
649
    ival = tsh.interval(engine, name)
650
651
    assert ival.left == datetime(2018, 1, 2)
    assert ival.right == datetime(2018, 1, 2)
652
653


654
def test_history(engine, tsh):
655
    for numserie in (1, 2, 3):
656
        with engine.begin() as cn:
657
658
659
            tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie',
                       'aurelien.campeas@pythonian.fr',
                       _insertion_date=utcdt(2017, 2, numserie))
660
661
662
663
664
665
666
667
668
669
670

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
671
         'meta': {},
672
         'date': pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
673
674
675
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
676
         'meta': {},
677
         'date': pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
678
679
680
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
681
         'meta': {},
682
         'date': pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC'),
683
684
685
686
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
687
    histts = tsh.history(engine, 'smallserie')
688

689
    assert_hist("""
690
691
692
693
694
695
696
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
697
698
""", histts)

699
    diffs = tsh.history(engine, 'smallserie', diffmode=True)
700
    assert_hist("""
701
702
703
704
705
706
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-03    2.0
""", diffs)

707
    for idate in histts:
708
        with engine.begin() as cn:
709
            idate = idate.replace(tzinfo=pytz.timezone('UTC'))
710
711
            tsh.insert(cn, histts[idate], 'smallserie2',
                       'aurelien.campeas@pythonian.f', _insertion_date=idate)
712
713
714

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
715
    assert_hist_equals(tsh.history(engine, 'smallserie2'), histts)
716
717

    # get history ranges
718
719
    tsa = tsh.history(engine, 'smallserie',
                      from_insertion_date=datetime(2017, 2, 2))
720
    assert_hist("""
721
722
723
724
725
726
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
727
728
""", tsa)

729
730
    tsb = tsh.history(engine, 'smallserie',
                      to_insertion_date=datetime(2017, 2, 2))
731
    assert_hist("""
732
733
734
735
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
736
737
""", tsb)

738
739
740
    tsc = tsh.history(engine, 'smallserie',
                      from_insertion_date=datetime(2017, 2, 2),
                      to_insertion_date=datetime(2017, 2, 2))
741
    assert_hist("""
742
743
744
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
745
746
""", tsc)

747
748
749
    tsc = tsh.history(engine, 'smallserie',
                      from_insertion_date=datetime(2017, 2, 4),
                      to_insertion_date=datetime(2017, 2, 4))
750
    assert tsc == {}
751

752
753
754
    tsc = tsh.history(engine, 'smallserie',
                      from_insertion_date=datetime(2016, 2, 1),
                      to_insertion_date=datetime(2017, 2, 2))
755
    assert_hist("""
756
757
758
759
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
760
761
""", tsc)

762
763
764
    tsc = tsh.history(engine, 'smallserie',
                      from_insertion_date=datetime(2016, 2, 1),
                      to_insertion_date=datetime(2016, 12, 31))
765
    assert tsc == {}
766

767
    # restrictions on value dates
768
769
770
    tsc = tsh.history(engine, 'smallserie',
                      from_value_date=datetime(2017, 1, 1),
                      to_value_date=datetime(2017, 1, 2))
771
    assert_hist("""
772
773
774
775
776
777
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
778
779
""", tsc)

780
781
    tsc = tsh.history(engine, 'smallserie',
                      from_value_date=datetime(2017, 1, 2))
782
    assert_hist("""
783
784
785
786
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-02    1.0
                           2017-01-03    2.0
787
788
""", tsc)

789
790
    tsc = tsh.history(engine, 'smallserie',
                      to_value_date=datetime(2017, 1, 2))
791
    assert_hist("""
792
793
794
795
796
797
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
798
799
""", tsc)

800
    tsc = tsh.history(engine, 'no-such-series')
801
802
    assert tsc is None

803
804
805
806
807
808
    idates = tsh.insertion_dates(engine, 'smallserie')
    assert idates == [
        pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
        pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
        pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC')
    ]
809
810
811
812
813
814
815
816
    idates = tsh.insertion_dates(
        engine, 'smallserie',
        fromdate=utcdt(2017, 2, 2),
        todate=utcdt(2017, 2, 2)
    )
    assert idates == [
        pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
    ]
817
818


819

820
821
822
823
824
825
826
def test_history_delta(engine, tsh):
    for d in range(1, 3):
        idate = utcdt(2018, 1, d)
        serie = genserie(idate - timedelta(hours=1), 'H', 6, initval=[d])
        tsh.insert(engine, serie, 'hd', 'aurelien.campeas@pythonian.fr',
                   _insertion_date=idate)

827
    assert_hist("""
828
insertion_date             value_date               
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
                           2018-01-02 03:00:00+00:00    2.0
                           2018-01-02 04:00:00+00:00    2.0
847
    """, tsh.history(engine, 'hd'))
848

849
    assert_hist("""
850
insertion_date             value_date               
851
852
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
853
854
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
855
856
857
858
859
860
861
862
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
863
864
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
865
""",  tsh.history(engine, 'hd', deltaafter=timedelta(hours=2)))
866

867
    assert_hist("""
868
insertion_date             value_date               
869
2018-01-01 00:00:00+00:00  2018-01-01 00:00:00+00:00    1.0
870
                           2018-01-01 01:00:00+00:00    1.0
871
2018-01-02 00:00:00+00:00  2018-01-02 00:00:00+00:00    2.0
872
                           2018-01-02 01:00:00+00:00    2.0
873
874
875
""",  tsh.history(engine, 'hd',
                  deltabefore=timedelta(hours=0),
                  deltaafter=timedelta(hours=1)))
876
877


878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
def test_delta_na(engine, tsh):
    ldates = (
        utcdt(2015, 1, 20),
        utcdt(2015, 1, 21),
        utcdt(2015, 1, 22)
    )

    for idx, idate in enumerate(ldates):
        ts = pd.Series([idx] * 3, index = ldates)
        tsh.insert(engine, ts, 'without_na', 'arnaud',
                   _insertion_date=idate)

    assert_df("""
2015-01-20 00:00:00+00:00    2.0
2015-01-21 00:00:00+00:00    2.0
2015-01-22 00:00:00+00:00    2.0
""", tsh.get(engine, 'without_na'))

    assert_df("""
2015-01-20 00:00:00+00:00    0.0
2015-01-21 00:00:00+00:00    1.0
2015-01-22 00:00:00+00:00    2.0
900
    """, tsh.staircase(engine, 'without_na', delta=timedelta(hours=0)))
901
902
903
904
905
906
907
908
909
910
911
912
913
    #as expected

    assert_hist("""
insertion_date             value_date               
2015-01-20 00:00:00+00:00  2015-01-20 00:00:00+00:00    0.0
                           2015-01-21 00:00:00+00:00    0.0
                           2015-01-22 00:00:00+00:00    0.0
2015-01-21 00:00:00+00:00  2015-01-20 00:00:00+00:00    1.0
                           2015-01-21 00:00:00+00:00    1.0
                           2015-01-22 00:00:00+00:00    1.0
2015-01-22 00:00:00+00:00  2015-01-20 00:00:00+00:00    2.0
                           2015-01-21 00:00:00+00:00    2.0
                           2015-01-22 00:00:00+00:00    2.0
914
    """, tsh.history(engine, 'without_na'))
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935

    # now, the last insertion has Na as last value
    ldates = (
        utcdt(2015, 1, 20),
        utcdt(2015, 1, 21),
        utcdt(2015, 1, 22)
    )

    for idx, idate in enumerate(ldates):
        serie = pd.Series([float(idx)] * 3, index = ldates)
        if idx == 2:
            serie[-1] = np.nan
        tsh.insert(engine, serie, 'with_na', 'arnaud',
                   _insertion_date=idate)

    # the value at 2015-01-22 is hidden by the inserted nan
    assert_df("""
2015-01-20 00:00:00+00:00    2.0
2015-01-21 00:00:00+00:00    2.0
    """, tsh.get(engine, 'with_na'))

936
    # the last value is also correctly hidden
937
938
939
    assert_df("""
2015-01-20 00:00:00+00:00    0.0
2015-01-21 00:00:00+00:00    1.0
940
    """, tsh.staircase(engine, 'with_na', delta=timedelta(hours=0)))
941

942
    # the value gathered by staircase at value date 2015-01-22 is a
943
    # nan, so it masks the previous ones at the same date
944
945
946
947
948
949
950
951
952
953
    assert_hist("""
insertion_date             value_date               
2015-01-20 00:00:00+00:00  2015-01-20 00:00:00+00:00    0.0
                           2015-01-21 00:00:00+00:00    0.0
                           2015-01-22 00:00:00+00:00    0.0
2015-01-21 00:00:00+00:00  2015-01-20 00:00:00+00:00    1.0
                           2015-01-21 00:00:00+00:00    1.0
                           2015-01-22 00:00:00+00:00    1.0
2015-01-22 00:00:00+00:00  2015-01-20 00:00:00+00:00    2.0
                           2015-01-21 00:00:00+00:00    2.0
954
                           2015-01-22 00:00:00+00:00    NaN
955
        """, tsh.history(engine, 'with_na', _keep_nans=True))
956
957


958
959
960
961
962
963
964
965
966
967
968
def test_nr_gethistory(engine, tsh):
    s0 = pd.Series([-1, 0, 0, -1],
                   index=pd.DatetimeIndex(start=datetime(2016, 12, 29),
                                          end=datetime(2017, 1, 1),
                                          freq='D'))
    tsh.insert(engine, s0, 'foo', 'zogzog')

    s1 = pd.Series([1, 0, 0, 1],
                   index=pd.DatetimeIndex(start=datetime(2017, 1, 1),
                                          end=datetime(2017, 1, 4),
                                          freq='D'))
969
    idate = utcdt(2016, 1, 1)
970
    for i in range(5):
971
        with engine.begin() as cn:
972
973
974
            tsh.insert(cn, s1 * i, 'foo',
                       'aurelien.campeas@pythonian.f',
                       _insertion_date=idate + timedelta(days=i))
975

976
977
978
979
980
    df = tsh.history(engine, 'foo',
                     datetime(2016, 1, 3),
                     datetime(2016, 1, 4),
                     datetime(2017, 1, 1),
                     datetime(2017, 1, 4))
981

982
    assert_hist("""
983
984
985
986
987
988
989
990
991
insertion_date             value_date
2016-01-03 00:00:00+00:00  2017-01-01    2.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    2.0
2016-01-04 00:00:00+00:00  2017-01-01    3.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    3.0
992
993
994
""", df)


995
def test_add_na(engine, tsh):
996
997
998
999
1000
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

1001
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
1002
    assert diff is None
1003
1004
1005

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
1006
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
1007
1008
1009
1010
1011

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

1012
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
1013
1014
    assert diff is None