test_tsio.py 51.9 KB
Newer Older
1
from datetime import datetime, timedelta
2
3
from pathlib import Path
import pytz
4

5
from dateutil import parser
6
import pytest
7
8
import numpy as np
import pandas as pd
9

10
from tshistory.snapshot import Snapshot
11
12
from tshistory.util import rename_series, threadpool
from tshistory.tsio import TimeSerie
13
14
from tshistory.testutil import (
    assert_df,
15
16
    assert_hist,
    assert_hist_equals,
17
    assert_group_equals,
18
    assert_structures,
19
20
21
    genserie,
    tempattr
)
22

23
DATADIR = Path(__file__).parent / 'data'
24

Aurélien Campéas's avatar
Aurélien Campéas committed
25

26
27
28
29
def utcdt(*dt):
    return pd.Timestamp(datetime(*dt), tz='UTC')


30
31
32
def test_in_tx(engine):
    tsh = TimeSerie()

33
34
    assert tsh.type(engine, 'foo') == 'primary'

35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
    with pytest.raises(TypeError) as err:
        tsh.insert(engine, 0, 0, 0)
    assert err.value.args[0] == 'You must use a transaction object'

    with engine.connect() as cn:
        with pytest.raises(TypeError) as err:
            tsh.insert(cn, 0, 0, 0)
    assert err.value.args[0] == 'You must use a transaction object'

    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
    with engine.begin() as cn:
        tsh.insert(cn, ts, 'test_tx', 'Babar')


50
def test_tstamp_roundtrip(engine, tsh):
51
    assert_structures(engine, tsh)
52
53
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
54
55
56
57
58
59
60
61
62
63
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

64
65
    tsh.insert(engine, ts, 'tztest', 'Babar',
               _insertion_date=utcdt(2018, 1, 1))
66
    back = tsh.get(engine, 'tztest')
67
68
69

    # though un localized we understand it's been normalized to utc
    assert_df("""
70
71
72
73
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    2.0
2017-10-29 02:00:00+00:00    3.0
74
75
76
""", back)

    assert (ts.index == back.index).all()
77
    assert str(back.index.dtype) == 'datetime64[ns, UTC]'
78

79
80
81
82
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 02:00:00+0000', tz='UTC')

83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
    ts = genserie(datetime(2017, 10, 29, 1),
                  'H', 4, tz='UTC')
    ts.index = ts.index.tz_convert('Europe/Paris')
    tsh.insert(engine, ts, 'tztest', 'Celeste',
                   _insertion_date=utcdt(2018, 1, 3))

    ts = tsh.get(engine, 'tztest')
    assert_df("""
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    0.0
2017-10-29 02:00:00+00:00    1.0
2017-10-29 03:00:00+00:00    2.0
2017-10-29 04:00:00+00:00    3.0
""", ts)

    hist = tsh.get_history(engine, 'tztest')
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
                           2017-10-29 04:00:00+00:00    3.0
""", hist)

    hist = tsh.get_history(engine, 'tztest',
                           from_value_date=utcdt(2017, 10, 29, 1),
                           to_value_date=utcdt(2017, 10, 29, 3))
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
""", hist)

126
127
128
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 04:00:00+0000', tz='UTC')
129
    assert_structures(engine, tsh)
130

131

132
def test_differential(engine, tsh):
133
    assert_structures(engine, tsh)
134
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
135
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
136

137
    id1 = tsh.last_id(engine, 'ts_test')
138
    assert tsh._previous_cset(engine, 'ts_test', id1) is None
139

140
141
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
142

143
144
145
146
147
148
149
150
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 10, 0, 0),
        closed='both'
    )

    with pytest.raises(ValueError):
        assert tsh.interval(engine, 'nosuchts')

151
    fetched = tsh.get(engine, 'ts_test')
152
    assert_df("""
153
154
155
156
157
158
159
160
161
162
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
163
164
""", fetched)
    assert fetched.name == 'ts_test'
165
166

    # we should detect the emission of a message
167
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
168

169
    assert_df("""
170
171
172
173
174
175
176
177
178
179
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
180
""", tsh.get(engine, 'ts_test'))
181
182
183
184

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
185
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
186
    id2 = tsh.last_id(engine, 'ts_test')
187
    assert tsh._previous_cset(engine, 'ts_test', id2) == id1
188

189
    assert_df("""
190
191
192
193
194
195
196
197
198
199
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
200
""", tsh.get(engine, 'ts_test'))
201

202
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
203
204
205
206
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

207
    with engine.begin() as cn:
208
        tsh.insert(cn, ts_longer, 'ts_test', 'test')
209
210
211
    id3 = tsh.last_id(engine, 'ts_test')

    assert id1 < id2 < id3
212

213
    assert_df("""
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
231
""", tsh.get(engine, 'ts_test'))
232

233
234
235
236
237
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 17, 0, 0),
        closed='both'
    )

238
    # insert single data, in override of previous one
239
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
240
    ts_begin.loc['2010-01-04'] = -1
241
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
242

243
    assert_df("""
244
245
246
247
248
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
249
""", tsh.get(engine, 'ts_mixte'))
250

251
    # add new series with one additional values
252
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
253
    ts_more.loc['2010-01-04'] = -1
254
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
255

256
    assert_df("""
257
258
259
260
261
262
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
263
""", tsh.get(engine, 'ts_mixte'))
264
265

    # just append an extra data point
266
267
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
268
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
269

270
    assert_df("""
271
272
273
274
275
276
277
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
278
""", tsh.get(engine, 'ts_mixte'))
279

280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
    assert_df("""
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
    """, tsh.get(engine, 'ts_mixte',
                 from_value_date=datetime(2010, 1, 3),
                 to_value_date=datetime(2010, 1, 6)
    ))

    assert_df("""
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
        """, tsh.get(engine, 'ts_mixte',
                     from_value_date=datetime(2010, 1, 4)
    ))

    assert_df("""
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
            """, tsh.get(engine, 'ts_mixte',
                         to_value_date=datetime(2010, 1, 5)
        ))

309
    with engine.begin() as cn:
310
        cn.execute('set search_path to "{0}.timeserie", {0}, public'.format(tsh.namespace))
311
312
        allts = pd.read_sql("select seriename, table_name from registry "
                            "where seriename in ('ts_test', 'ts_mixte')",
313
                            cn)
314

315
        assert_df("""
316
317
318
seriename table_name
0   ts_test    ts_test
1  ts_mixte   ts_mixte
319
""".format(tsh.namespace), allts)
320

321
        assert_df("""
322
323
324
325
326
327
328
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
329
""", tsh.get(cn, 'ts_mixte',
330
             revision_date=datetime.now()))
331

332
333
    assert_structures(engine, tsh)

334

335
336
337
338
339
340
def test_serie_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
    tsh.insert(engine, serie, 'ts-metadata', 'babar')

    initialmeta = tsh.metadata(engine, 'ts-metadata')
    assert initialmeta == {
341
342
        'index_dtype': '<M8[ns]',
        'index_names': [],
343
        'index_type': 'datetime64[ns]',
344
345
346
        'tzaware': False,
        'value_dtype': '<f8',
        'value_type': 'float64'
347
348
349
350
351
352
353
354
355
356
357
    }

    tsh.update_metadata(engine, 'ts-metadata',
                        {'topic': 'banana spot price'}
    )
    assert tsh.metadata(engine, 'ts-metadata')['topic'] == 'banana spot price'

    with pytest.raises(AssertionError):
        tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True})

    assert tsh.metadata(engine, 'ts-metadata') == {
358
        'index_dtype': '<M8[ns]',
359
        'index_names': [],
360
361
        'index_type': 'datetime64[ns]',
        'topic': 'banana spot price',
362
        'tzaware': False,
363
364
        'value_dtype': '<f8',
        'value_type': 'float64'
365
    }
366

367

368
369
def test_changeset_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
370
    tsh.insert(engine, serie, 'ts-cs-metadata', 'babar',
371
372
               {'foo': 'A', 'bar': 42})

373
    log = tsh.log(engine, names=['ts-cs-metadata'])
374
375
376
377
    meta = tsh.changeset_metadata(engine, log[0]['rev'])
    assert meta == {'foo': 'A', 'bar': 42}


378
def test_revision_date(engine, tsh):
379
    for i in range(1, 5):
380
        with engine.begin() as cn:
381
382
            tsh.insert(cn, genserie(datetime(2017, 1, i), 'D', 3, [i]), 'revdate',
                       'test', _insertion_date=utcdt(2016, 1, i))
383
384

    # end of prologue, now some real meat
385
    idate0 = pd.Timestamp('2015-1-1 00:00:00', tz='UTC')
386
387
388
389
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [0], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate0)
    assert idate0 == tsh.latest_insertion_date(engine, 'ts_through_time')
390

391
    idate1 = pd.Timestamp('2015-1-1 15:45:23', tz='UTC')
392
393
394
395
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate1)
    assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
396

397
    idate2 = pd.Timestamp('2015-1-2 15:43:23', tz='UTC')
398
399
400
401
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate2)
    assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
402

403
    idate3 = pd.Timestamp('2015-1-3', tz='UTC')
404
405
406
407
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate3)
    assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
408

409
    ts = tsh.get(engine, 'ts_through_time')
410

411
    assert_df("""
412
413
414
415
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
416
""", ts)
417

418
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
419
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
420

421
    assert_df("""
422
423
424
425
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
426
""", ts)
427

428
    ts = tsh.get(engine, 'ts_through_time',
429
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
430

431
    assert_df("""
432
433
434
435
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
436
""", ts)
437

438
    ts = tsh.get(engine, 'ts_through_time',
439
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
440
441
442

    assert ts is None

443
444
445
446
447
448
449
450
451
452
    # epilogue: back to the revdate issue
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
2017-01-03    3.0
2017-01-04    4.0
2017-01-05    4.0
2017-01-06    4.0
""", tsh.get(engine, 'revdate'))

453
    oldstate = tsh.get(engine, 'revdate', revision_date=datetime(2016, 1, 2))
454
455
456
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
457
458
459
2017-01-03    2.0
2017-01-04    2.0
""", oldstate)
460

461

462
def test_point_deletion(engine, tsh):
463
464
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
465
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
466

467
    _, ts = Snapshot(engine, tsh, 'ts_del').find()
468
    assert ts.iloc[-2] == 8.0
469

470
    ts_begin.iloc[0] = np.nan
471
    ts_begin.iloc[3] = np.nan
472

473
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
474

475
    assert_df("""
476
477
478
479
480
481
482
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
483
2010-01-10    9.0
484
""", tsh.get(engine, 'ts_del'))
485

486
    ts2 = tsh.get(engine, 'ts_del',
487
488
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
489
    assert (tsh.get(engine, 'ts_del') == ts2).all()
490

491
492
493
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

494
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
495

496
    assert_df("""
497
498
499
500
501
502
503
504
505
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
506
2010-01-10     9.0
507
""", tsh.get(engine, 'ts_del'))
508
509
510

    # now with string!

511
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
512
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
513
514
515
516

    ts_string[4] = None
    ts_string[5] = None

517
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
518
    assert_df("""
519
520
521
522
523
524
525
526
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
527
""", tsh.get(engine, 'ts_string_del'))
528
529
530
531

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

532
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
533
    assert_df("""
534
535
536
537
538
539
540
541
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
542
2010-01-10    machin
543
""", tsh.get(engine, 'ts_string_del'))
544

545
    ts_string[ts_string.index] = np.nan
546
547
    with pytest.raises(ValueError):
        tsh.insert(engine, ts_string, 'ts_string_del', 'test')
548

549

550
551
def test_nan_first(engine, tsh):
    # first insertion with only nan
552
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
553
    assert tsh.insert(engine, ts_begin, 'ts_null', 'test') is None
554

555

556
def test_more_point_deletion(engine, tsh):
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

575
576
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
577
578
    assert diff is None

579
    # there is no difference
580
    assert 0 == len(tsh.diff(ts_repushed, ts_repushed))
581
582
583
584
585

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
586
    diff = tsh.diff(ts_repushed, ts_add)
587
588
589
590
591
592

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
593
2010-01-13    12.0""", diff.sort_index())
594
595
596
597
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
598

Aurélien Campéas's avatar
Aurélien Campéas committed
599
    # full erasing
600
601
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
602
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
603

Aurélien Campéas's avatar
Aurélien Campéas committed
604
    ts_begin.iloc[:] = np.nan
605
606
    with pytest.raises(ValueError):
        tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
607
608

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
609
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
610
611
612
613

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
614
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
615

616
617
    ts_begin = pd.Series([np.nan] * 4, name='ts_full_del_str',
                         index=ts_begin.index)
618
619
620

    with pytest.raises(ValueError):
        tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
621
622

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
623
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
624

Aurélien Campéas's avatar
Aurélien Campéas committed
625

626
def test_deletion_over_horizon(engine, tsh):
627
    idate = utcdt(2018, 2, 1)
628
629
630
631
632
633
    ts = pd.Series(
        [1, 2, 3],
        index=pd.date_range(datetime(2018, 1, 1), freq='D', periods=3)
    )

    name = 'delete_over_hz'
634
635
    tsh.insert(engine, ts, name, 'Babar',
               _insertion_date=idate)
636
637
638
639
640
641

    ts = pd.Series(
        [np.nan, np.nan, np.nan],
        index=pd.date_range(datetime(2018, 1, 3), freq='D', periods=3)
    )

642
643
    tsh.insert(engine, ts, name, 'Celeste',
               _insertion_date=idate.replace(day=2))
644
645
    ival = tsh.interval(engine, name)
    assert ival.left == datetime(2018, 1, 1)
646
    assert ival.right == datetime(2018, 1, 2)
647
648
649
650
651

    ts = pd.Series(
        [np.nan, np.nan, np.nan],
        index=pd.date_range(datetime(2017, 12, 30), freq='D', periods=3)
    )
652
653
    tsh.insert(engine, ts, name, 'Arthur',
               _insertion_date=idate.replace(day=3))
654
    ival = tsh.interval(engine, name)
655
656
    assert ival.left == datetime(2018, 1, 2)
    assert ival.right == datetime(2018, 1, 2)
657
658


659
def test_get_history(engine, tsh):
660
    for numserie in (1, 2, 3):
661
        with engine.begin() as cn:
662
663
664
            tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie',
                       'aurelien.campeas@pythonian.fr',
                       _insertion_date=utcdt(2017, 2, numserie))
665
666
667
668
669
670
671
672
673
674
675

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
676
         'meta': {},
677
         'date': pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
678
679
680
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
681
         'meta': {},
682
         'date': pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
683
684
685
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
686
         'meta': {},
687
         'date': pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC'),
688
689
690
691
692
693
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')

694
    assert_hist("""
695
696
697
698
699
700
701
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
702
703
""", histts)

704
    diffs = tsh.get_history(engine, 'smallserie', diffmode=True)
705
    assert_hist("""
706
707
708
709
710
711
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-03    2.0
""", diffs)

712
    for idate in histts:
713
        with engine.begin() as cn:
714
            idate = idate.replace(tzinfo=pytz.timezone('UTC'))
715
716
            tsh.insert(cn, histts[idate], 'smallserie2',
                       'aurelien.campeas@pythonian.f', _insertion_date=idate)
717
718
719

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
720
    assert_hist_equals(tsh.get_history(engine, 'smallserie2'), histts)
721
722
723
724

    # get history ranges
    tsa = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2))
725
    assert_hist("""
726
727
728
729
730
731
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
732
733
734
735
""", tsa)

    tsb = tsh.get_history(engine, 'smallserie',
                          to_insertion_date=datetime(2017, 2, 2))
736
    assert_hist("""
737
738
739
740
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
741
742
743
744
745
""", tsb)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2),
                          to_insertion_date=datetime(2017, 2, 2))
746
    assert_hist("""
747
748
749
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
750
751
""", tsc)

752
753
754
    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 4),
                          to_insertion_date=datetime(2017, 2, 4))
755
    assert tsc == {}
756
757
758
759

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2017, 2, 2))
760
    assert_hist("""
761
762
763
764
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
765
766
767
768
769
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2016, 12, 31))
770
    assert tsc == {}
771

772
773
774
775
    # restrictions on value dates
    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 1),
                          to_value_date=datetime(2017, 1, 2))
776
    assert_hist("""
777
778
779
780
781
782
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
783
784
785
786
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 2))
787
    assert_hist("""
788
789
790
791
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-02    1.0
                           2017-01-03    2.0
792
793
794
795
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          to_value_date=datetime(2017, 1, 2))
796
    assert_hist("""
797
798
799
800
801
802
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
803
804
""", tsc)

805
806
807
    tsc = tsh.get_history(engine, 'no-such-series')
    assert tsc is None

808
809
810
811
812
813
    idates = tsh.insertion_dates(engine, 'smallserie')
    assert idates == [
        pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
        pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
        pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC')
    ]
814
815
816
817
818
819
820
821
    idates = tsh.insertion_dates(
        engine, 'smallserie',
        fromdate=utcdt(2017, 2, 2),
        todate=utcdt(2017, 2, 2)
    )
    assert idates == [
        pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
    ]
822
823


824

825
826
827
828
829
830
831
def test_history_delta(engine, tsh):
    for d in range(1, 3):
        idate = utcdt(2018, 1, d)
        serie = genserie(idate - timedelta(hours=1), 'H', 6, initval=[d])
        tsh.insert(engine, serie, 'hd', 'aurelien.campeas@pythonian.fr',
                   _insertion_date=idate)

832
    assert_hist("""
833
insertion_date             value_date               
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
                           2018-01-02 03:00:00+00:00    2.0
                           2018-01-02 04:00:00+00:00    2.0
    """, tsh.get_history(engine, 'hd'))

854
    assert_hist("""
855
insertion_date             value_date               
856
857
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
858
859
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
860
861
862
863
864
865
866
867
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
868
869
870
871
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd', deltaafter=timedelta(hours=2)))

872
    assert_hist("""
873
insertion_date             value_date               
874
2018-01-01 00:00:00+00:00  2018-01-01 00:00:00+00:00    1.0
875
                           2018-01-01 01:00:00+00:00    1.0
876
2018-01-02 00:00:00+00:00  2018-01-02 00:00:00+00:00    2.0
877
878
                           2018-01-02 01:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd',
879
                      deltabefore=timedelta(hours=0),
880
881
882
                      deltaafter=timedelta(hours=1)))


883
884
885
886
887
888
889
890
891
892
893
def test_nr_gethistory(engine, tsh):
    s0 = pd.Series([-1, 0, 0, -1],
                   index=pd.DatetimeIndex(start=datetime(2016, 12, 29),
                                          end=datetime(2017, 1, 1),
                                          freq='D'))
    tsh.insert(engine, s0, 'foo', 'zogzog')

    s1 = pd.Series([1, 0, 0, 1],
                   index=pd.DatetimeIndex(start=datetime(2017, 1, 1),
                                          end=datetime(2017, 1, 4),
                                          freq='D'))
894
    idate = utcdt(2016, 1, 1)
895
    for i in range(5):
896
        with engine.begin() as cn:
897
898
899
            tsh.insert(cn, s1 * i, 'foo',
                       'aurelien.campeas@pythonian.f',
                       _insertion_date=idate + timedelta(days=i))
900
901
902
903
904
905
906

    df = tsh.get_history(engine, 'foo',
                         datetime(2016, 1, 3),
                         datetime(2016, 1, 4),
                         datetime(2017, 1, 1),
                         datetime(2017, 1, 4))

907
    assert_hist("""
908
909
910
911
912
913
914
915
916
insertion_date             value_date
2016-01-03 00:00:00+00:00  2017-01-01    2.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    2.0
2016-01-04 00:00:00+00:00  2017-01-01    3.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    3.0
917
918
919
""", df)


920
def test_add_na(engine, tsh):
921
922
923
924
925
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

926
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
927
    assert diff is None
928
929
930

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
931
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
932
933
934
935
936

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

937
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
938
939
    assert diff is None

940
    result = tsh.get(engine, 'ts_add_na')
941
    assert len(result) == 5
942
943


944
def test_dtype_mismatch(engine, tsh):
945
    tsh.insert(engine,
946
947
948
949
950
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
951
        tsh.insert(engine,
952
953
954
955
956
957
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

958
    tsh.insert(engine,
959
960
961
962
963
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
964
        tsh.insert(engine,
965
966
967
968
969
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)
970
971


972
973
974
975
976
977
def test_precision(engine, tsh):
    floaty = 0.123456789123456789
    ts = genserie(datetime(2015, 1, 1), 'D', 5, initval=[floaty])

    tsh.insert(engine, ts, 'precision', 'test')
    ts_round = tsh.get(engine, 'precision')
978
    assert 0.12345678912345678 == ts_round.iloc[0]
979
980

    diff = tsh.insert(engine, ts_round, 'precision', 'test')
Aurélien Campéas's avatar
Aurélien Campéas committed
981
    assert diff is None  # the roundtriped series does not produce a diff when reinserted
982

Aurélien Campéas's avatar
Aurélien Campéas committed
983
    diff = tsh.insert(engine, ts, 'precision', 'test')  # neither does the original series
984
985
986
    assert diff is None


987
988
989
990
991
992
993
994
def test_serie_deletion(engine, tsh):
    ts = genserie(datetime(2018, 1, 10), 'H', 10)
    tsh.insert(engine, ts, 'keepme', 'Babar')
    tsh.insert(engine, ts, 'deleteme', 'Celeste')
    ts = genserie(datetime(2018, 1, 12), 'H', 10)
    tsh.insert(engine, ts, 'keepme', 'Babar')
    tsh.insert(engine, ts, 'deleteme', 'Celeste')

995
    seriecount, csetcount, csetseriecount = assert_structures(engine, tsh)
996

997
    with engine.begin() as cn:
998
999
1000
1001
1002
1003
1004
        tsh.delete(cn, 'deleteme')

    assert not tsh.exists(engine, 'deleteme')
    log = [entry['author']
           for entry in tsh.log(engine, names=('keepme', 'deleteme'))]
    assert log == ['Babar', 'Babar']

1005
1006
1007
1008
    seriecount2, csetcount2, csetseriecount2 = assert_structures(engine, tsh)

    assert csetcount - csetcount2  == 2
    assert csetseriecount - csetseriecount2 == 2
1009
1010
1011
1012
1013
1014
    assert seriecount - seriecount2 == 1

    with pytest.raises(AssertionError) as werr:
        tsh.delete(engine, 'keepme')
    assert werr.value.args[0] == 'use a transaction object'

1015
    tsh.insert(engine, ts, 'deleteme', 'Celeste')
1016

1017

1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044