test_tsio.py 55.6 KB
Newer Older
1
from datetime import datetime, timedelta
2
3
from pathlib import Path
import pytz
4

5
from dateutil import parser
6
import pytest
7
8
import numpy as np
import pandas as pd
9

10
from tshistory.snapshot import Snapshot
11
from tshistory.util import threadpool
12
from tshistory.tsio import TimeSerie
13
14
from tshistory.testutil import (
    assert_df,
15
16
    assert_hist,
    assert_hist_equals,
17
    assert_group_equals,
18
    assert_structures,
19
20
21
    genserie,
    tempattr
)
22

23
DATADIR = Path(__file__).parent / 'data'
24

Aurélien Campéas's avatar
Aurélien Campéas committed
25

26
27
28
29
def utcdt(*dt):
    return pd.Timestamp(datetime(*dt), tz='UTC')


30
def test_in_tx(tsh, engine):
31
32
    assert tsh.type(engine, 'foo') == 'primary'

33
    with pytest.raises(TypeError) as err:
34
        tsh.insert(engine.connect(), 0, 0, 0)
35
36
37
38
39
40
41
42
    assert err.value.args[0] == 'You must use a transaction object'

    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
    with engine.begin() as cn:
        tsh.insert(cn, ts, 'test_tx', 'Babar')


43
def test_tstamp_roundtrip(engine, tsh):
44
    assert_structures(engine, tsh)
45
46
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
47
48
49
50
51
52
53
54
55
56
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

57
58
    tsh.insert(engine, ts, 'tztest', 'Babar',
               _insertion_date=utcdt(2018, 1, 1))
59
    back = tsh.get(engine, 'tztest')
60
61
62

    # though un localized we understand it's been normalized to utc
    assert_df("""
63
64
65
66
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    2.0
2017-10-29 02:00:00+00:00    3.0
67
68
69
""", back)

    assert (ts.index == back.index).all()
70
    assert str(back.index.dtype) == 'datetime64[ns, UTC]'
71

72
73
74
75
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 02:00:00+0000', tz='UTC')

76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
    ts = genserie(datetime(2017, 10, 29, 1),
                  'H', 4, tz='UTC')
    ts.index = ts.index.tz_convert('Europe/Paris')
    tsh.insert(engine, ts, 'tztest', 'Celeste',
                   _insertion_date=utcdt(2018, 1, 3))

    ts = tsh.get(engine, 'tztest')
    assert_df("""
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    0.0
2017-10-29 02:00:00+00:00    1.0
2017-10-29 03:00:00+00:00    2.0
2017-10-29 04:00:00+00:00    3.0
""", ts)

    hist = tsh.get_history(engine, 'tztest')
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
                           2017-10-29 04:00:00+00:00    3.0
""", hist)

    hist = tsh.get_history(engine, 'tztest',
                           from_value_date=utcdt(2017, 10, 29, 1),
                           to_value_date=utcdt(2017, 10, 29, 3))
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
""", hist)

119
120
121
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 04:00:00+0000', tz='UTC')
122
    assert_structures(engine, tsh)
123

124

125
def test_differential(engine, tsh):
126
    assert_structures(engine, tsh)
127
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
128
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
129

130
    id1 = tsh.last_id(engine, 'ts_test')
131
    assert tsh._previous_cset(engine, 'ts_test', id1) is None
132

133
134
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
135

136
137
138
139
140
141
142
143
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 10, 0, 0),
        closed='both'
    )

    with pytest.raises(ValueError):
        assert tsh.interval(engine, 'nosuchts')

144
    fetched = tsh.get(engine, 'ts_test')
145
    assert_df("""
146
147
148
149
150
151
152
153
154
155
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
156
157
""", fetched)
    assert fetched.name == 'ts_test'
158
159

    # we should detect the emission of a message
160
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
161

162
    assert_df("""
163
164
165
166
167
168
169
170
171
172
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
173
""", tsh.get(engine, 'ts_test'))
174
175
176
177

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
178
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
179
    id2 = tsh.last_id(engine, 'ts_test')
180
    assert tsh._previous_cset(engine, 'ts_test', id2) == id1
181

182
    assert_df("""
183
184
185
186
187
188
189
190
191
192
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
193
""", tsh.get(engine, 'ts_test'))
194

195
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
196
197
198
199
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

200
    with engine.begin() as cn:
201
        tsh.insert(cn, ts_longer, 'ts_test', 'test')
202
203
204
    id3 = tsh.last_id(engine, 'ts_test')

    assert id1 < id2 < id3
205

206
    assert_df("""
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
224
""", tsh.get(engine, 'ts_test'))
225

226
227
228
229
230
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 17, 0, 0),
        closed='both'
    )

231
    # insert single data, in override of previous one
232
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
233
    ts_begin.loc['2010-01-04'] = -1
234
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
235

236
    assert_df("""
237
238
239
240
241
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
242
""", tsh.get(engine, 'ts_mixte'))
243

244
    # add new series with one additional values
245
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
246
    ts_more.loc['2010-01-04'] = -1
247
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
248

249
    assert_df("""
250
251
252
253
254
255
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
256
""", tsh.get(engine, 'ts_mixte'))
257
258

    # just append an extra data point
259
260
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
261
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
262

263
    assert_df("""
264
265
266
267
268
269
270
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
271
""", tsh.get(engine, 'ts_mixte'))
272

273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
    assert_df("""
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
    """, tsh.get(engine, 'ts_mixte',
                 from_value_date=datetime(2010, 1, 3),
                 to_value_date=datetime(2010, 1, 6)
    ))

    assert_df("""
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
        """, tsh.get(engine, 'ts_mixte',
                     from_value_date=datetime(2010, 1, 4)
    ))

    assert_df("""
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
            """, tsh.get(engine, 'ts_mixte',
                         to_value_date=datetime(2010, 1, 5)
        ))

302
    with engine.begin() as cn:
303
        cn.execute('set search_path to "{0}.timeserie", {0}, public'.format(tsh.namespace))
304
305
        allts = pd.read_sql("select seriename, table_name from registry "
                            "where seriename in ('ts_test', 'ts_mixte')",
306
                            cn)
307

308
        assert_df("""
309
310
311
seriename table_name
0   ts_test    ts_test
1  ts_mixte   ts_mixte
312
""".format(tsh.namespace), allts)
313

314
        assert_df("""
315
316
317
318
319
320
321
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
322
""", tsh.get(cn, 'ts_mixte',
323
             revision_date=datetime.now()))
324

325
326
    assert_structures(engine, tsh)

327

328
329
330
331
332
333
def test_serie_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
    tsh.insert(engine, serie, 'ts-metadata', 'babar')

    initialmeta = tsh.metadata(engine, 'ts-metadata')
    assert initialmeta == {
334
335
        'index_dtype': '<M8[ns]',
        'index_names': [],
336
        'index_type': 'datetime64[ns]',
337
338
339
        'tzaware': False,
        'value_dtype': '<f8',
        'value_type': 'float64'
340
341
342
343
344
345
346
347
348
349
350
    }

    tsh.update_metadata(engine, 'ts-metadata',
                        {'topic': 'banana spot price'}
    )
    assert tsh.metadata(engine, 'ts-metadata')['topic'] == 'banana spot price'

    with pytest.raises(AssertionError):
        tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True})

    assert tsh.metadata(engine, 'ts-metadata') == {
351
        'index_dtype': '<M8[ns]',
352
        'index_names': [],
353
354
        'index_type': 'datetime64[ns]',
        'topic': 'banana spot price',
355
        'tzaware': False,
356
357
        'value_dtype': '<f8',
        'value_type': 'float64'
358
    }
359

360

361
362
def test_changeset_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
363
    tsh.insert(engine, serie, 'ts-cs-metadata', 'babar',
364
365
               {'foo': 'A', 'bar': 42})

366
    log = tsh.log(engine, names=['ts-cs-metadata'])
367
368
369
370
    meta = tsh.changeset_metadata(engine, log[0]['rev'])
    assert meta == {'foo': 'A', 'bar': 42}


371
def test_revision_date(engine, tsh):
372
    for i in range(1, 5):
373
        with engine.begin() as cn:
374
375
            tsh.insert(cn, genserie(datetime(2017, 1, i), 'D', 3, [i]), 'revdate',
                       'test', _insertion_date=utcdt(2016, 1, i))
376
377

    # end of prologue, now some real meat
378
    idate0 = pd.Timestamp('2015-1-1 00:00:00', tz='UTC')
379
380
381
382
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [0], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate0)
    assert idate0 == tsh.latest_insertion_date(engine, 'ts_through_time')
383

384
    idate1 = pd.Timestamp('2015-1-1 15:45:23', tz='UTC')
385
386
387
388
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate1)
    assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
389

390
    idate2 = pd.Timestamp('2015-1-2 15:43:23', tz='UTC')
391
392
393
394
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate2)
    assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
395

396
    idate3 = pd.Timestamp('2015-1-3', tz='UTC')
397
398
399
400
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate3)
    assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
401

402
    ts = tsh.get(engine, 'ts_through_time')
403

404
    assert_df("""
405
406
407
408
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
409
""", ts)
410

411
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
412
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
413

414
    assert_df("""
415
416
417
418
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
419
""", ts)
420

421
    ts = tsh.get(engine, 'ts_through_time',
422
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
423

424
    assert_df("""
425
426
427
428
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
429
""", ts)
430

431
    ts = tsh.get(engine, 'ts_through_time',
432
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
433
434
435

    assert ts is None

436
437
438
439
440
441
442
443
444
445
    # epilogue: back to the revdate issue
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
2017-01-03    3.0
2017-01-04    4.0
2017-01-05    4.0
2017-01-06    4.0
""", tsh.get(engine, 'revdate'))

446
    oldstate = tsh.get(engine, 'revdate', revision_date=datetime(2016, 1, 2))
447
448
449
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
450
451
452
2017-01-03    2.0
2017-01-04    2.0
""", oldstate)
453

454

455
def test_point_deletion(engine, tsh):
456
457
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
458
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
459

460
    _, ts = Snapshot(engine, tsh, 'ts_del').find()
461
    assert ts.iloc[-2] == 8.0
462

463
    ts_begin.iloc[0] = np.nan
464
    ts_begin.iloc[3] = np.nan
465

466
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
467

468
    assert_df("""
469
470
471
472
473
474
475
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
476
2010-01-10    9.0
477
""", tsh.get(engine, 'ts_del'))
478

479
    ts2 = tsh.get(engine, 'ts_del',
480
481
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
482
    assert (tsh.get(engine, 'ts_del') == ts2).all()
483

484
485
486
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

487
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
488

489
    assert_df("""
490
491
492
493
494
495
496
497
498
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
499
2010-01-10     9.0
500
""", tsh.get(engine, 'ts_del'))
501
502
503

    # now with string!

504
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
505
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
506
507
508
509

    ts_string[4] = None
    ts_string[5] = None

510
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
511
    assert_df("""
512
513
514
515
516
517
518
519
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
520
""", tsh.get(engine, 'ts_string_del'))
521
522
523
524

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

525
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
526
    assert_df("""
527
528
529
530
531
532
533
534
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
535
2010-01-10    machin
536
""", tsh.get(engine, 'ts_string_del'))
537

538
    ts_string[ts_string.index] = np.nan
539
540
    with pytest.raises(ValueError):
        tsh.insert(engine, ts_string, 'ts_string_del', 'test')
541

542

543
544
def test_nan_first(engine, tsh):
    # first insertion with only nan
545
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
546
    assert tsh.insert(engine, ts_begin, 'ts_null', 'test') is None
547

548

549
def test_more_point_deletion(engine, tsh):
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

568
569
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
570
571
    assert diff is None

572
    # there is no difference
573
    assert 0 == len(tsh.diff(ts_repushed, ts_repushed))
574
575
576
577
578

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
579
    diff = tsh.diff(ts_repushed, ts_add)
580
581
582
583
584
585

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
586
2010-01-13    12.0""", diff.sort_index())
587
588
589
590
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
591

Aurélien Campéas's avatar
Aurélien Campéas committed
592
    # full erasing
593
594
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
595
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
596

Aurélien Campéas's avatar
Aurélien Campéas committed
597
    ts_begin.iloc[:] = np.nan
598
599
    with pytest.raises(ValueError):
        tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
600
601

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
602
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
603
604
605
606

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
607
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
608

609
610
    ts_begin = pd.Series([np.nan] * 4, name='ts_full_del_str',
                         index=ts_begin.index)
611
612
613

    with pytest.raises(ValueError):
        tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
614
615

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
616
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
617

Aurélien Campéas's avatar
Aurélien Campéas committed
618

619
def test_deletion_over_horizon(engine, tsh):
620
    idate = utcdt(2018, 2, 1)
621
622
623
624
625
626
    ts = pd.Series(
        [1, 2, 3],
        index=pd.date_range(datetime(2018, 1, 1), freq='D', periods=3)
    )

    name = 'delete_over_hz'
627
628
    tsh.insert(engine, ts, name, 'Babar',
               _insertion_date=idate)
629
630
631
632
633
634

    ts = pd.Series(
        [np.nan, np.nan, np.nan],
        index=pd.date_range(datetime(2018, 1, 3), freq='D', periods=3)
    )

635
636
    tsh.insert(engine, ts, name, 'Celeste',
               _insertion_date=idate.replace(day=2))
637
638
    ival = tsh.interval(engine, name)
    assert ival.left == datetime(2018, 1, 1)
639
    assert ival.right == datetime(2018, 1, 2)
640
641
642
643
644

    ts = pd.Series(
        [np.nan, np.nan, np.nan],
        index=pd.date_range(datetime(2017, 12, 30), freq='D', periods=3)
    )
645
646
    tsh.insert(engine, ts, name, 'Arthur',
               _insertion_date=idate.replace(day=3))
647
    ival = tsh.interval(engine, name)
648
649
    assert ival.left == datetime(2018, 1, 2)
    assert ival.right == datetime(2018, 1, 2)
650
651


652
def test_get_history(engine, tsh):
653
    for numserie in (1, 2, 3):
654
        with engine.begin() as cn:
655
656
657
            tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie',
                       'aurelien.campeas@pythonian.fr',
                       _insertion_date=utcdt(2017, 2, numserie))
658
659
660
661
662
663
664
665
666
667
668

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
669
         'meta': {},
670
         'date': pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
671
672
673
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
674
         'meta': {},
675
         'date': pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
676
677
678
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
679
         'meta': {},
680
         'date': pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC'),
681
682
683
684
685
686
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')

687
    assert_hist("""
688
689
690
691
692
693
694
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
695
696
""", histts)

697
    diffs = tsh.get_history(engine, 'smallserie', diffmode=True)
698
    assert_hist("""
699
700
701
702
703
704
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-03    2.0
""", diffs)

705
    for idate in histts:
706
        with engine.begin() as cn:
707
            idate = idate.replace(tzinfo=pytz.timezone('UTC'))
708
709
            tsh.insert(cn, histts[idate], 'smallserie2',
                       'aurelien.campeas@pythonian.f', _insertion_date=idate)
710
711
712

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
713
    assert_hist_equals(tsh.get_history(engine, 'smallserie2'), histts)
714
715
716
717

    # get history ranges
    tsa = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2))
718
    assert_hist("""
719
720
721
722
723
724
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
725
726
727
728
""", tsa)

    tsb = tsh.get_history(engine, 'smallserie',
                          to_insertion_date=datetime(2017, 2, 2))
729
    assert_hist("""
730
731
732
733
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
734
735
736
737
738
""", tsb)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2),
                          to_insertion_date=datetime(2017, 2, 2))
739
    assert_hist("""
740
741
742
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
743
744
""", tsc)

745
746
747
    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 4),
                          to_insertion_date=datetime(2017, 2, 4))
748
    assert tsc == {}
749
750
751
752

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2017, 2, 2))
753
    assert_hist("""
754
755
756
757
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
758
759
760
761
762
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2016, 12, 31))
763
    assert tsc == {}
764

765
766
767
768
    # restrictions on value dates
    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 1),
                          to_value_date=datetime(2017, 1, 2))
769
    assert_hist("""
770
771
772
773
774
775
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
776
777
778
779
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 2))
780
    assert_hist("""
781
782
783
784
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-02    1.0
                           2017-01-03    2.0
785
786
787
788
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          to_value_date=datetime(2017, 1, 2))
789
    assert_hist("""
790
791
792
793
794
795
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
796
797
""", tsc)

798
799
800
    tsc = tsh.get_history(engine, 'no-such-series')
    assert tsc is None

801
802
803
804
805
806
    idates = tsh.insertion_dates(engine, 'smallserie')
    assert idates == [
        pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
        pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
        pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC')
    ]
807
808
809
810
811
812
813
814
    idates = tsh.insertion_dates(
        engine, 'smallserie',
        fromdate=utcdt(2017, 2, 2),
        todate=utcdt(2017, 2, 2)
    )
    assert idates == [
        pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
    ]
815
816


817

818
819
820
821
822
823
824
def test_history_delta(engine, tsh):
    for d in range(1, 3):
        idate = utcdt(2018, 1, d)
        serie = genserie(idate - timedelta(hours=1), 'H', 6, initval=[d])
        tsh.insert(engine, serie, 'hd', 'aurelien.campeas@pythonian.fr',
                   _insertion_date=idate)

825
    assert_hist("""
826
insertion_date             value_date               
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
                           2018-01-02 03:00:00+00:00    2.0
                           2018-01-02 04:00:00+00:00    2.0
    """, tsh.get_history(engine, 'hd'))

847
    assert_hist("""
848
insertion_date             value_date               
849
850
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
851
852
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
853
854
855
856
857
858
859
860
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
861
862
863
864
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd', deltaafter=timedelta(hours=2)))

865
    assert_hist("""
866
insertion_date             value_date               
867
2018-01-01 00:00:00+00:00  2018-01-01 00:00:00+00:00    1.0
868
                           2018-01-01 01:00:00+00:00    1.0
869
2018-01-02 00:00:00+00:00  2018-01-02 00:00:00+00:00    2.0
870
871
                           2018-01-02 01:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd',
872
                      deltabefore=timedelta(hours=0),
873
874
875
                      deltaafter=timedelta(hours=1)))


876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
def test_delta_na(engine, tsh):
    ldates = (
        utcdt(2015, 1, 20),
        utcdt(2015, 1, 21),
        utcdt(2015, 1, 22)
    )

    for idx, idate in enumerate(ldates):
        ts = pd.Series([idx] * 3, index = ldates)
        tsh.insert(engine, ts, 'without_na', 'arnaud',
                   _insertion_date=idate)

    assert_df("""
2015-01-20 00:00:00+00:00    2.0
2015-01-21 00:00:00+00:00    2.0
2015-01-22 00:00:00+00:00    2.0
""", tsh.get(engine, 'without_na'))

    assert_df("""
2015-01-20 00:00:00+00:00    0.0
2015-01-21 00:00:00+00:00    1.0
2015-01-22 00:00:00+00:00    2.0
    """, tsh.get_delta(engine, 'without_na', delta=timedelta(hours=0)))
    #as expected

    assert_hist("""
insertion_date             value_date               
2015-01-20 00:00:00+00:00  2015-01-20 00:00:00+00:00    0.0
                           2015-01-21 00:00:00+00:00    0.0
                           2015-01-22 00:00:00+00:00    0.0
2015-01-21 00:00:00+00:00  2015-01-20 00:00:00+00:00    1.0
                           2015-01-21 00:00:00+00:00    1.0
                           2015-01-22 00:00:00+00:00    1.0
2015-01-22 00:00:00+00:00  2015-01-20 00:00:00+00:00    2.0
                           2015-01-21 00:00:00+00:00    2.0
                           2015-01-22 00:00:00+00:00    2.0
    """, tsh.get_history(engine, 'without_na'))

    # now, the last insertion has Na as last value
    ldates = (
        utcdt(2015, 1, 20),
        utcdt(2015, 1, 21),
        utcdt(2015, 1, 22)
    )

    for idx, idate in enumerate(ldates):
        serie = pd.Series([float(idx)] * 3, index = ldates)
        if idx == 2:
            serie[-1] = np.nan
        tsh.insert(engine, serie, 'with_na', 'arnaud',
                   _insertion_date=idate)

    # the value at 2015-01-22 is hidden by the inserted nan
    assert_df("""
2015-01-20 00:00:00+00:00    2.0
2015-01-21 00:00:00+00:00    2.0
    """, tsh.get(engine, 'with_na'))

934
    # the last value is also correctly hidden
935
936
937
938
939
    assert_df("""
2015-01-20 00:00:00+00:00    0.0
2015-01-21 00:00:00+00:00    1.0
    """, tsh.get_delta(engine, 'with_na', delta=timedelta(hours=0)))

940
941
    # the value gathered by get_delta at value date 2015-01-22 is a
    # nan, so it masks the previous ones at the same date
942
943
944
945
946
947
948
949
950
951
    assert_hist("""
insertion_date             value_date               
2015-01-20 00:00:00+00:00  2015-01-20 00:00:00+00:00    0.0
                           2015-01-21 00:00:00+00:00    0.0
                           2015-01-22 00:00:00+00:00    0.0
2015-01-21 00:00:00+00:00  2015-01-20 00:00:00+00:00    1.0
                           2015-01-21 00:00:00+00:00    1.0
                           2015-01-22 00:00:00+00:00    1.0
2015-01-22 00:00:00+00:00  2015-01-20 00:00:00+00:00    2.0
                           2015-01-21 00:00:00+00:00    2.0
952
953
                           2015-01-22 00:00:00+00:00    NaN
        """, tsh.get_history(engine, 'with_na', _keep_nans=True))
954
955


956
957
958
959
960
961
962
963
964
965
966
def test_nr_gethistory(engine, tsh):
    s0 = pd.Series([-1, 0, 0, -1],
                   index=pd.DatetimeIndex(start=datetime(2016, 12, 29),
                                          end=datetime(2017, 1, 1),
                                          freq='D'))
    tsh.insert(engine, s0, 'foo', 'zogzog')

    s1 = pd.Series([1, 0, 0, 1],
                   index=pd.DatetimeIndex(start=datetime(2017, 1, 1),
                                          end=datetime(2017, 1, 4),
                                          freq='D'))
967
    idate = utcdt(2016, 1, 1)
968
    for i in range(5):
969
        with engine.begin() as cn:
970
971
972
            tsh.insert(cn, s1 * i, 'foo',
                       'aurelien.campeas@pythonian.f',
                       _insertion_date=idate + timedelta(days=i))
973
974
975
976
977
978
979

    df = tsh.get_history(engine, 'foo',
                         datetime(2016, 1, 3),
                         datetime(2016, 1, 4),
                         datetime(2017, 1, 1),
                         datetime(2017, 1, 4))

980
    assert_hist("""
981
982
983
984
985
986
987
988
989
insertion_date             value_date
2016-01-03 00:00:00+00:00  2017-01-01    2.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    2.0
2016-01-04 00:00:00+00:00  2017-01-01    3.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    3.0
990
991
992
""", df)


993
def test_add_na(engine, tsh):
994
995
996
997
998
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

999
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
1000
    assert diff is None
1001
1002
1003

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
1004
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
1005
1006
1007
1008
1009

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

1010
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
1011
1012
    assert diff is None

1013
    result = tsh.get(engine, 'ts_add_na')
1014
    assert len(result) == 5
1015
1016


1017
def test_dtype_mismatch(engine, tsh):
1018
    tsh.insert(engine,
1019
1020
1021
1022
1023
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
1024
        tsh.insert(engine,
1025
1026
1027
1028
1029
1030
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

1031
    tsh.insert(engine,
1032
1033
1034
1035
1036
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
1037
        tsh.insert(engine,
1038
1039
1040
1041
1042
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)
1043
1044


1045
1046
1047
1048
1049
1050
def test_precision(engine, tsh):
    floaty = 0.123456789123456789
    ts = genserie(datetime(2015, 1, 1), 'D', 5, initval=[floaty])

    tsh.insert(engine, ts, 'precision', 'test')
    ts_round = tsh.get(engine, 'precision')
1051
    assert 0.12345678912345678 == ts_round.iloc[0]
1052
1053

    diff = tsh.insert(engine, ts_round, 'precision', 'test')
Aurélien Campéas's avatar
Aurélien Campéas committed
1054
    assert diff is None  # the roundtriped series does not produce a diff when reinserted
Arnaud Campeas's avatar