test_tsio.py 51.7 KB
Newer Older
1
from datetime import datetime, timedelta
2
3
from pathlib import Path
import pytz
4

5
from dateutil import parser
6
import pytest
7
8
import numpy as np
import pandas as pd
9

10
from tshistory.snapshot import Snapshot
11
12
from tshistory.util import rename_series, threadpool
from tshistory.tsio import TimeSerie
13
14
from tshistory.testutil import (
    assert_df,
15
16
    assert_hist,
    assert_hist_equals,
17
    assert_group_equals,
18
    assert_structures,
19
20
21
    genserie,
    tempattr
)
22

23
DATADIR = Path(__file__).parent / 'data'
24

Aurélien Campéas's avatar
Aurélien Campéas committed
25

26
27
28
29
def utcdt(*dt):
    return pd.Timestamp(datetime(*dt), tz='UTC')


30
31
32
def test_in_tx(engine):
    tsh = TimeSerie()

33
34
    assert tsh.type(engine, 'foo') == 'primary'

35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
    with pytest.raises(TypeError) as err:
        tsh.insert(engine, 0, 0, 0)
    assert err.value.args[0] == 'You must use a transaction object'

    with engine.connect() as cn:
        with pytest.raises(TypeError) as err:
            tsh.insert(cn, 0, 0, 0)
    assert err.value.args[0] == 'You must use a transaction object'

    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
    with engine.begin() as cn:
        tsh.insert(cn, ts, 'test_tx', 'Babar')


50
def test_tstamp_roundtrip(engine, tsh):
51
    assert_structures(engine, tsh)
52
53
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
54
55
56
57
58
59
60
61
62
63
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

64
65
    tsh.insert(engine, ts, 'tztest', 'Babar',
               _insertion_date=utcdt(2018, 1, 1))
66
    back = tsh.get(engine, 'tztest')
67
68
69

    # though un localized we understand it's been normalized to utc
    assert_df("""
70
71
72
73
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    2.0
2017-10-29 02:00:00+00:00    3.0
74
75
76
""", back)

    assert (ts.index == back.index).all()
77
    assert str(back.index.dtype) == 'datetime64[ns, UTC]'
78

79
80
81
82
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 02:00:00+0000', tz='UTC')

83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
    ts = genserie(datetime(2017, 10, 29, 1),
                  'H', 4, tz='UTC')
    ts.index = ts.index.tz_convert('Europe/Paris')
    tsh.insert(engine, ts, 'tztest', 'Celeste',
                   _insertion_date=utcdt(2018, 1, 3))

    ts = tsh.get(engine, 'tztest')
    assert_df("""
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    0.0
2017-10-29 02:00:00+00:00    1.0
2017-10-29 03:00:00+00:00    2.0
2017-10-29 04:00:00+00:00    3.0
""", ts)

    hist = tsh.get_history(engine, 'tztest')
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
                           2017-10-29 04:00:00+00:00    3.0
""", hist)

    hist = tsh.get_history(engine, 'tztest',
                           from_value_date=utcdt(2017, 10, 29, 1),
                           to_value_date=utcdt(2017, 10, 29, 3))
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
""", hist)

126
127
128
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 04:00:00+0000', tz='UTC')
129
    assert_structures(engine, tsh)
130

131

132
def test_differential(engine, tsh):
133
    assert_structures(engine, tsh)
134
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
135
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
136

137
    id1 = tsh.last_id(engine, 'ts_test')
138
    assert tsh._previous_cset(engine, 'ts_test', id1) is None
139

140
141
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
142

143
144
145
146
147
148
149
150
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 10, 0, 0),
        closed='both'
    )

    with pytest.raises(ValueError):
        assert tsh.interval(engine, 'nosuchts')

151
    fetched = tsh.get(engine, 'ts_test')
152
    assert_df("""
153
154
155
156
157
158
159
160
161
162
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
163
164
""", fetched)
    assert fetched.name == 'ts_test'
165
166

    # we should detect the emission of a message
167
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
168

169
    assert_df("""
170
171
172
173
174
175
176
177
178
179
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
180
""", tsh.get(engine, 'ts_test'))
181
182
183
184

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
185
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
186
    id2 = tsh.last_id(engine, 'ts_test')
187
    assert tsh._previous_cset(engine, 'ts_test', id2) == id1
188

189
    assert_df("""
190
191
192
193
194
195
196
197
198
199
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
200
""", tsh.get(engine, 'ts_test'))
201

202
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
203
204
205
206
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

207
    with engine.begin() as cn:
208
        tsh.insert(cn, ts_longer, 'ts_test', 'test')
209
210
211
    id3 = tsh.last_id(engine, 'ts_test')

    assert id1 < id2 < id3
212

213
    assert_df("""
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
231
""", tsh.get(engine, 'ts_test'))
232

233
234
235
236
237
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 17, 0, 0),
        closed='both'
    )

238
    # insert single data, in override of previous one
239
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
240
    ts_begin.loc['2010-01-04'] = -1
241
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
242

243
    assert_df("""
244
245
246
247
248
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
249
""", tsh.get(engine, 'ts_mixte'))
250

251
    # add new series with one additional values
252
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
253
    ts_more.loc['2010-01-04'] = -1
254
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
255

256
    assert_df("""
257
258
259
260
261
262
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
263
""", tsh.get(engine, 'ts_mixte'))
264
265

    # just append an extra data point
266
267
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
268
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
269

270
    assert_df("""
271
272
273
274
275
276
277
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
278
""", tsh.get(engine, 'ts_mixte'))
279

280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
    assert_df("""
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
    """, tsh.get(engine, 'ts_mixte',
                 from_value_date=datetime(2010, 1, 3),
                 to_value_date=datetime(2010, 1, 6)
    ))

    assert_df("""
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
        """, tsh.get(engine, 'ts_mixte',
                     from_value_date=datetime(2010, 1, 4)
    ))

    assert_df("""
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
            """, tsh.get(engine, 'ts_mixte',
                         to_value_date=datetime(2010, 1, 5)
        ))

309
    with engine.begin() as cn:
310
        cn.execute('set search_path to "{0}.timeserie", {0}, public'.format(tsh.namespace))
311
312
        allts = pd.read_sql("select seriename, table_name from registry "
                            "where seriename in ('ts_test', 'ts_mixte')",
313
                            cn)
314

315
        assert_df("""
316
317
318
seriename table_name
0   ts_test    ts_test
1  ts_mixte   ts_mixte
319
""".format(tsh.namespace), allts)
320

321
        assert_df("""
322
323
324
325
326
327
328
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
329
""", tsh.get(cn, 'ts_mixte',
330
             revision_date=datetime.now()))
331

332
333
    assert_structures(engine, tsh)

334

335
336
337
338
339
340
def test_serie_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
    tsh.insert(engine, serie, 'ts-metadata', 'babar')

    initialmeta = tsh.metadata(engine, 'ts-metadata')
    assert initialmeta == {
341
342
        'index_dtype': '<M8[ns]',
        'index_names': [],
343
        'index_type': 'datetime64[ns]',
344
345
346
        'tzaware': False,
        'value_dtype': '<f8',
        'value_type': 'float64'
347
348
349
350
351
352
353
354
355
356
357
    }

    tsh.update_metadata(engine, 'ts-metadata',
                        {'topic': 'banana spot price'}
    )
    assert tsh.metadata(engine, 'ts-metadata')['topic'] == 'banana spot price'

    with pytest.raises(AssertionError):
        tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True})

    assert tsh.metadata(engine, 'ts-metadata') == {
358
        'index_dtype': '<M8[ns]',
359
        'index_names': [],
360
361
        'index_type': 'datetime64[ns]',
        'topic': 'banana spot price',
362
        'tzaware': False,
363
364
        'value_dtype': '<f8',
        'value_type': 'float64'
365
    }
366

367

368
369
def test_changeset_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
370
    tsh.insert(engine, serie, 'ts-cs-metadata', 'babar',
371
372
               {'foo': 'A', 'bar': 42})

373
    log = tsh.log(engine, names=['ts-cs-metadata'])
374
375
376
377
    meta = tsh.changeset_metadata(engine, log[0]['rev'])
    assert meta == {'foo': 'A', 'bar': 42}


378
def test_revision_date(engine, tsh):
379
    for i in range(1, 5):
380
        with engine.begin() as cn:
381
382
            tsh.insert(cn, genserie(datetime(2017, 1, i), 'D', 3, [i]), 'revdate',
                       'test', _insertion_date=utcdt(2016, 1, i))
383
384

    # end of prologue, now some real meat
385
    idate0 = pd.Timestamp('2015-1-1 00:00:00', tz='UTC')
386
387
388
389
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [0], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate0)
    assert idate0 == tsh.latest_insertion_date(engine, 'ts_through_time')
390

391
    idate1 = pd.Timestamp('2015-1-1 15:45:23', tz='UTC')
392
393
394
395
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate1)
    assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
396

397
    idate2 = pd.Timestamp('2015-1-2 15:43:23', tz='UTC')
398
399
400
401
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate2)
    assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
402

403
    idate3 = pd.Timestamp('2015-1-3', tz='UTC')
404
405
406
407
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate3)
    assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
408

409
    ts = tsh.get(engine, 'ts_through_time')
410

411
    assert_df("""
412
413
414
415
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
416
""", ts)
417

418
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
419
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
420

421
    assert_df("""
422
423
424
425
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
426
""", ts)
427

428
    ts = tsh.get(engine, 'ts_through_time',
429
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
430

431
    assert_df("""
432
433
434
435
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
436
""", ts)
437

438
    ts = tsh.get(engine, 'ts_through_time',
439
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
440
441
442

    assert ts is None

443
444
445
446
447
448
449
450
451
452
    # epilogue: back to the revdate issue
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
2017-01-03    3.0
2017-01-04    4.0
2017-01-05    4.0
2017-01-06    4.0
""", tsh.get(engine, 'revdate'))

453
    oldstate = tsh.get(engine, 'revdate', revision_date=datetime(2016, 1, 2))
454
455
456
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
457
458
459
2017-01-03    2.0
2017-01-04    2.0
""", oldstate)
460

461

462
def test_point_deletion(engine, tsh):
463
464
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
465
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
466

467
    _, ts = Snapshot(engine, tsh, 'ts_del').find()
468
    assert ts.iloc[-2] == 8.0
469

470
    ts_begin.iloc[0] = np.nan
471
    ts_begin.iloc[3] = np.nan
472

473
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
474

475
    assert_df("""
476
477
478
479
480
481
482
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
483
2010-01-10    9.0
484
""", tsh.get(engine, 'ts_del'))
485

486
    ts2 = tsh.get(engine, 'ts_del',
487
488
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
489
    assert (tsh.get(engine, 'ts_del') == ts2).all()
490

491
492
493
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

494
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
495

496
    assert_df("""
497
498
499
500
501
502
503
504
505
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
506
2010-01-10     9.0
507
""", tsh.get(engine, 'ts_del'))
508
509
510

    # now with string!

511
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
512
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
513
514
515
516

    ts_string[4] = None
    ts_string[5] = None

517
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
518
    assert_df("""
519
520
521
522
523
524
525
526
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
527
""", tsh.get(engine, 'ts_string_del'))
528
529
530
531

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

532
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
533
    assert_df("""
534
535
536
537
538
539
540
541
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
542
2010-01-10    machin
543
""", tsh.get(engine, 'ts_string_del'))
544

545
    ts_string[ts_string.index] = np.nan
546
547
    with pytest.raises(ValueError):
        tsh.insert(engine, ts_string, 'ts_string_del', 'test')
548

549

550
551
def test_nan_first(engine, tsh):
    # first insertion with only nan
552
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
553
    assert tsh.insert(engine, ts_begin, 'ts_null', 'test') is None
554

555

556
def test_more_point_deletion(engine, tsh):
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

575
576
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
577
578
    assert diff is None

579
    # there is no difference
580
    assert 0 == len(tsh.diff(ts_repushed, ts_repushed))
581
582
583
584
585

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
586
    diff = tsh.diff(ts_repushed, ts_add)
587
588
589
590
591
592

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
593
2010-01-13    12.0""", diff.sort_index())
594
595
596
597
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
598

Aurélien Campéas's avatar
Aurélien Campéas committed
599
    # full erasing
600
601
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
602
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
603

Aurélien Campéas's avatar
Aurélien Campéas committed
604
    ts_begin.iloc[:] = np.nan
605
606
    with pytest.raises(ValueError):
        tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
607
608

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
609
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
610
611
612
613

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
614
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
615

616
617
    ts_begin = pd.Series([np.nan] * 4, name='ts_full_del_str',
                         index=ts_begin.index)
618
619
620

    with pytest.raises(ValueError):
        tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
621
622

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
623
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
624

Aurélien Campéas's avatar
Aurélien Campéas committed
625

626
def test_deletion_over_horizon(engine, tsh):
627
    idate = utcdt(2018, 2, 1)
628
629
630
631
632
633
    ts = pd.Series(
        [1, 2, 3],
        index=pd.date_range(datetime(2018, 1, 1), freq='D', periods=3)
    )

    name = 'delete_over_hz'
634
635
    tsh.insert(engine, ts, name, 'Babar',
               _insertion_date=idate)
636
637
638
639
640
641

    ts = pd.Series(
        [np.nan, np.nan, np.nan],
        index=pd.date_range(datetime(2018, 1, 3), freq='D', periods=3)
    )

642
643
    tsh.insert(engine, ts, name, 'Celeste',
               _insertion_date=idate.replace(day=2))
644
645
    ival = tsh.interval(engine, name)
    assert ival.left == datetime(2018, 1, 1)
646
    assert ival.right == datetime(2018, 1, 2)
647
648
649
650
651

    ts = pd.Series(
        [np.nan, np.nan, np.nan],
        index=pd.date_range(datetime(2017, 12, 30), freq='D', periods=3)
    )
652
653
    tsh.insert(engine, ts, name, 'Arthur',
               _insertion_date=idate.replace(day=3))
654
    ival = tsh.interval(engine, name)
655
656
    assert ival.left == datetime(2018, 1, 2)
    assert ival.right == datetime(2018, 1, 2)
657
658


659
def test_get_history(engine, tsh):
660
    for numserie in (1, 2, 3):
661
        with engine.begin() as cn:
662
663
664
            tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie',
                       'aurelien.campeas@pythonian.fr',
                       _insertion_date=utcdt(2017, 2, numserie))
665
666
667
668
669
670
671
672
673
674
675

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
676
         'meta': {},
677
         'date': pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
678
679
680
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
681
         'meta': {},
682
         'date': pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
683
684
685
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
686
         'meta': {},
687
         'date': pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC'),
688
689
690
691
692
693
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')

694
    assert_hist("""
695
696
697
698
699
700
701
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
702
703
""", histts)

704
    diffs = tsh.get_history(engine, 'smallserie', diffmode=True)
705
    assert_hist("""
706
707
708
709
710
711
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-03    2.0
""", diffs)

712
    for idate in histts:
713
        with engine.begin() as cn:
714
            idate = idate.replace(tzinfo=pytz.timezone('UTC'))
715
716
            tsh.insert(cn, histts[idate], 'smallserie2',
                       'aurelien.campeas@pythonian.f', _insertion_date=idate)
717
718
719

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
720
    assert_hist_equals(tsh.get_history(engine, 'smallserie2'), histts)
721
722
723
724

    # get history ranges
    tsa = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2))
725
    assert_hist("""
726
727
728
729
730
731
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
732
733
734
735
""", tsa)

    tsb = tsh.get_history(engine, 'smallserie',
                          to_insertion_date=datetime(2017, 2, 2))
736
    assert_hist("""
737
738
739
740
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
741
742
743
744
745
""", tsb)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2),
                          to_insertion_date=datetime(2017, 2, 2))
746
    assert_hist("""
747
748
749
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
750
751
""", tsc)

752
753
754
    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 4),
                          to_insertion_date=datetime(2017, 2, 4))
755
    assert tsc == {}
756
757
758
759

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2017, 2, 2))
760
    assert_hist("""
761
762
763
764
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
765
766
767
768
769
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2016, 12, 31))
770
    assert tsc == {}
771

772
773
774
775
    # restrictions on value dates
    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 1),
                          to_value_date=datetime(2017, 1, 2))
776
    assert_hist("""
777
778
779
780
781
782
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
783
784
785
786
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 2))
787
    assert_hist("""
788
789
790
791
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-02    1.0
                           2017-01-03    2.0
792
793
794
795
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          to_value_date=datetime(2017, 1, 2))
796
    assert_hist("""
797
798
799
800
801
802
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
803
804
""", tsc)

805
806
807
    tsc = tsh.get_history(engine, 'no-such-series')
    assert tsc is None

808
809
810
811
812
813
814
815
    idates = tsh.insertion_dates(engine, 'smallserie')
    assert idates == [
        pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
        pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
        pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC')
    ]


816

817
818
819
820
821
822
823
def test_history_delta(engine, tsh):
    for d in range(1, 3):
        idate = utcdt(2018, 1, d)
        serie = genserie(idate - timedelta(hours=1), 'H', 6, initval=[d])
        tsh.insert(engine, serie, 'hd', 'aurelien.campeas@pythonian.fr',
                   _insertion_date=idate)

824
    assert_hist("""
825
insertion_date             value_date               
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
                           2018-01-02 03:00:00+00:00    2.0
                           2018-01-02 04:00:00+00:00    2.0
    """, tsh.get_history(engine, 'hd'))

846
    assert_hist("""
847
insertion_date             value_date               
848
849
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
850
851
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
852
853
854
855
856
857
858
859
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
860
861
862
863
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd', deltaafter=timedelta(hours=2)))

864
    assert_hist("""
865
insertion_date             value_date               
866
2018-01-01 00:00:00+00:00  2018-01-01 00:00:00+00:00    1.0
867
                           2018-01-01 01:00:00+00:00    1.0
868
2018-01-02 00:00:00+00:00  2018-01-02 00:00:00+00:00    2.0
869
870
                           2018-01-02 01:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd',
871
                      deltabefore=timedelta(hours=0),
872
873
874
                      deltaafter=timedelta(hours=1)))


875
876
877
878
879
880
881
882
883
884
885
def test_nr_gethistory(engine, tsh):
    s0 = pd.Series([-1, 0, 0, -1],
                   index=pd.DatetimeIndex(start=datetime(2016, 12, 29),
                                          end=datetime(2017, 1, 1),
                                          freq='D'))
    tsh.insert(engine, s0, 'foo', 'zogzog')

    s1 = pd.Series([1, 0, 0, 1],
                   index=pd.DatetimeIndex(start=datetime(2017, 1, 1),
                                          end=datetime(2017, 1, 4),
                                          freq='D'))
886
    idate = utcdt(2016, 1, 1)
887
    for i in range(5):
888
        with engine.begin() as cn:
889
890
891
            tsh.insert(cn, s1 * i, 'foo',
                       'aurelien.campeas@pythonian.f',
                       _insertion_date=idate + timedelta(days=i))
892
893
894
895
896
897
898

    df = tsh.get_history(engine, 'foo',
                         datetime(2016, 1, 3),
                         datetime(2016, 1, 4),
                         datetime(2017, 1, 1),
                         datetime(2017, 1, 4))

899
    assert_hist("""
900
901
902
903
904
905
906
907
908
insertion_date             value_date
2016-01-03 00:00:00+00:00  2017-01-01    2.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    2.0
2016-01-04 00:00:00+00:00  2017-01-01    3.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    3.0
909
910
911
""", df)


912
def test_add_na(engine, tsh):
913
914
915
916
917
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

918
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
919
    assert diff is None
920
921
922

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
923
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
924
925
926
927
928

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

929
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
930
931
    assert diff is None

932
    result = tsh.get(engine, 'ts_add_na')
933
    assert len(result) == 5
934
935


936
def test_dtype_mismatch(engine, tsh):
937
    tsh.insert(engine,
938
939
940
941
942
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
943
        tsh.insert(engine,
944
945
946
947
948
949
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

950
    tsh.insert(engine,
951
952
953
954
955
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
956
        tsh.insert(engine,
957
958
959
960
961
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)
962
963


964
965
966
967
968
969
def test_precision(engine, tsh):
    floaty = 0.123456789123456789
    ts = genserie(datetime(2015, 1, 1), 'D', 5, initval=[floaty])

    tsh.insert(engine, ts, 'precision', 'test')
    ts_round = tsh.get(engine, 'precision')
970
    assert 0.12345678912345678 == ts_round.iloc[0]
971
972

    diff = tsh.insert(engine, ts_round, 'precision', 'test')
Aurélien Campéas's avatar
Aurélien Campéas committed
973
    assert diff is None  # the roundtriped series does not produce a diff when reinserted
974

Aurélien Campéas's avatar
Aurélien Campéas committed
975
    diff = tsh.insert(engine, ts, 'precision', 'test')  # neither does the original series
976
977
978
    assert diff is None


979
980
981
982
983
984
985
986
def test_serie_deletion(engine, tsh):
    ts = genserie(datetime(2018, 1, 10), 'H', 10)
    tsh.insert(engine, ts, 'keepme', 'Babar')
    tsh.insert(engine, ts, 'deleteme', 'Celeste')
    ts = genserie(datetime(2018, 1, 12), 'H', 10)
    tsh.insert(engine, ts, 'keepme', 'Babar')
    tsh.insert(engine, ts, 'deleteme', 'Celeste')

987
    seriecount, csetcount, csetseriecount = assert_structures(engine, tsh)
988

989
    with engine.begin() as cn:
990
991
992
993
994
995
996
        tsh.delete(cn, 'deleteme')

    assert not tsh.exists(engine, 'deleteme')
    log = [entry['author']
           for entry in tsh.log(engine, names=('keepme', 'deleteme'))]
    assert log == ['Babar', 'Babar']

997
998
999
1000
    seriecount2, csetcount2, csetseriecount2 = assert_structures(engine, tsh)

    assert csetcount - csetcount2  == 2
    assert csetseriecount - csetseriecount2 == 2
1001
1002
1003
1004
1005
1006
    assert seriecount - seriecount2 == 1

    with pytest.raises(AssertionError) as werr:
        tsh.delete(engine, 'keepme')
    assert werr.value.args[0] == 'use a transaction object'

1007
    tsh.insert(engine, ts, 'deleteme', 'Celeste')
1008

1009

1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
def test_strip(engine, tsh):
    for i in range(1, 5):
        pubdate = utcdt(2017, 1, i)
        ts = genserie(datetime(2017, 1, 10), 'H', 1 + i)
        tsh.insert(engine, ts, 'xserie', 'babar', _insertion_date=pubdate)
        # also insert something completely unrelated
        tsh.insert(engine, genserie(datetime(2018, 1, 1), 'D', 1 + i), 'yserie', 'celeste')

    csida = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    assert csida is not None
    csidb = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='before')
    csidc = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='after')
    assert csidb < csida < csidc

    log = tsh.log(engine, names=['xserie', 'yserie'])
    assert [(i