test_tsio.py 43.9 KB
Newer Older
1
from datetime import datetime, timedelta
2
3
from pathlib import Path
import pytz
4

5
from dateutil import parser
6
import pytest
7
8
import numpy as np
import pandas as pd
9

10
from tshistory.snapshot import Snapshot
11
12
from tshistory.util import rename_series, threadpool
from tshistory.tsio import TimeSerie
13
14
from tshistory.testutil import (
    assert_df,
15
16
    assert_hist,
    assert_hist_equals,
17
    assert_group_equals,
18
    assert_structures,
19
20
21
    genserie,
    tempattr
)
22

23
DATADIR = Path(__file__).parent / 'data'
24

Aurélien Campéas's avatar
Aurélien Campéas committed
25

26
27
28
29
def utcdt(*dt):
    return pd.Timestamp(datetime(*dt), tz='UTC')


30
def test_tstamp_roundtrip(engine, tsh):
31
    assert_structures(engine, tsh)
32
33
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
34
35
36
37
38
39
40
41
42
43
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

44
45
    tsh.insert(engine, ts, 'tztest', 'Babar',
               _insertion_date=utcdt(2018, 1, 1))
46
    back = tsh.get(engine, 'tztest')
47
48
49

    # though un localized we understand it's been normalized to utc
    assert_df("""
50
51
52
53
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    2.0
2017-10-29 02:00:00+00:00    3.0
54
55
56
""", back)

    assert (ts.index == back.index).all()
57
    assert str(back.index.dtype) == 'datetime64[ns, UTC]'
58

59
60
61
62
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 02:00:00+0000', tz='UTC')

63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
    ts = genserie(datetime(2017, 10, 29, 1),
                  'H', 4, tz='UTC')
    ts.index = ts.index.tz_convert('Europe/Paris')
    tsh.insert(engine, ts, 'tztest', 'Celeste',
                   _insertion_date=utcdt(2018, 1, 3))

    ts = tsh.get(engine, 'tztest')
    assert_df("""
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    0.0
2017-10-29 02:00:00+00:00    1.0
2017-10-29 03:00:00+00:00    2.0
2017-10-29 04:00:00+00:00    3.0
""", ts)

    hist = tsh.get_history(engine, 'tztest')
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
                           2017-10-29 04:00:00+00:00    3.0
""", hist)

    hist = tsh.get_history(engine, 'tztest',
                           from_value_date=utcdt(2017, 10, 29, 1),
                           to_value_date=utcdt(2017, 10, 29, 3))
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
""", hist)

106
107
108
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 04:00:00+0000', tz='UTC')
109
    assert_structures(engine, tsh)
110

111

112
def test_differential(engine, tsh):
113
    assert_structures(engine, tsh)
114
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
115
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
116

117
    id1 = tsh.last_id(engine, 'ts_test')
118
    assert tsh._previous_cset(engine, 'ts_test', id1) is None
119

120
121
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
122

123
124
125
126
127
128
129
130
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 10, 0, 0),
        closed='both'
    )

    with pytest.raises(ValueError):
        assert tsh.interval(engine, 'nosuchts')

131
    fetched = tsh.get(engine, 'ts_test')
132
    assert_df("""
133
134
135
136
137
138
139
140
141
142
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
143
144
""", fetched)
    assert fetched.name == 'ts_test'
145
146

    # we should detect the emission of a message
147
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
148

149
    assert_df("""
150
151
152
153
154
155
156
157
158
159
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
160
""", tsh.get(engine, 'ts_test'))
161
162
163
164

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
165
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
166
    id2 = tsh.last_id(engine, 'ts_test')
167
    assert tsh._previous_cset(engine, 'ts_test', id2) == id1
168

169
    assert_df("""
170
171
172
173
174
175
176
177
178
179
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
180
""", tsh.get(engine, 'ts_test'))
181

182
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
183
184
185
186
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

187
    with engine.begin() as cn:
188
        tsh.insert(cn, ts_longer, 'ts_test', 'test')
189
190
191
    id3 = tsh.last_id(engine, 'ts_test')

    assert id1 < id2 < id3
192

193
    assert_df("""
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
211
""", tsh.get(engine, 'ts_test'))
212

213
214
215
216
217
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 17, 0, 0),
        closed='both'
    )

218
    # start testing manual overrides
219
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
220
    ts_begin.loc['2010-01-04'] = -1
221
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
222
223

    # -1 represents bogus upstream data
224
    assert_df("""
225
226
227
228
229
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
230
""", tsh.get(engine, 'ts_mixte'))
231
232

    # refresh all the period + 1 extra data point
233
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
234
    ts_more.loc['2010-01-04'] = -1
235
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
236

237
    assert_df("""
238
239
240
241
242
243
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
244
""", tsh.get(engine, 'ts_mixte'))
245
246

    # just append an extra data point
247
248
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
249
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
250

251
    assert_df("""
252
253
254
255
256
257
258
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
259
""", tsh.get(engine, 'ts_mixte'))
260

261
    with engine.begin() as cn:
262
        cn.execute('set search_path to "{0}.timeserie", {0}, public'.format(tsh.namespace))
263
264
        allts = pd.read_sql("select seriename, table_name from registry "
                            "where seriename in ('ts_test', 'ts_mixte')",
265
                            cn)
266

267
        assert_df("""
268
269
270
seriename table_name
0   ts_test    ts_test
1  ts_mixte   ts_mixte
271
""".format(tsh.namespace), allts)
272

273
        assert_df("""
274
275
276
277
278
279
280
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
281
""", tsh.get(cn, 'ts_mixte',
282
             revision_date=datetime.now()))
283

284
285
    assert_structures(engine, tsh)

286

287
288
289
290
291
292
def test_serie_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
    tsh.insert(engine, serie, 'ts-metadata', 'babar')

    initialmeta = tsh.metadata(engine, 'ts-metadata')
    assert initialmeta == {
293
294
        'index_dtype': '<M8[ns]',
        'index_names': [],
295
        'index_type': 'datetime64[ns]',
296
297
298
        'tzaware': False,
        'value_dtype': '<f8',
        'value_type': 'float64'
299
300
301
302
303
304
305
306
307
308
309
310
    }

    tsh.update_metadata(engine, 'ts-metadata',
                        {'topic': 'banana spot price'}
    )
    assert tsh.metadata(engine, 'ts-metadata')['topic'] == 'banana spot price'

    with pytest.raises(AssertionError):
        tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True})

    tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True}, internal=True)
    assert tsh.metadata(engine, 'ts-metadata') == {
311
        'index_dtype': '<M8[ns]',
312
        'index_names': [],
313
314
315
316
317
        'index_type': 'datetime64[ns]',
        'topic': 'banana spot price',
        'tzaware': True,
        'value_dtype': '<f8',
        'value_type': 'float64'
318
    }
319

320
321
322
323
    # unbreak the serie for the second test pass :o
    tsh.update_metadata(engine, 'ts-metadata', initialmeta, internal=True)


324
325
def test_changeset_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
326
    tsh.insert(engine, serie, 'ts-cs-metadata', 'babar',
327
328
               {'foo': 'A', 'bar': 42})

329
    log = tsh.log(engine, names=['ts-cs-metadata'])
330
331
332
333
    meta = tsh.changeset_metadata(engine, log[0]['rev'])
    assert meta == {'foo': 'A', 'bar': 42}


334
def test_bad_import(engine, tsh):
335
    # the data were parsed as date by pd.read_json()
Aurélien Campéas's avatar
Aurélien Campéas committed
336
    df_result = pd.read_csv(str(DATADIR / 'test_data.csv'))
337
338
339
    df_result['Gas Day'] = df_result['Gas Day'].apply(parser.parse, dayfirst=True, yearfirst=False)
    df_result.set_index('Gas Day', inplace=True)
    ts = df_result['SC']
340

341
342
    tsh.insert(engine, ts, 'SND_SC', 'test')
    result = tsh.get(engine, 'SND_SC')
343
    assert result.dtype == 'float64'
344
345


346
def test_revision_date(engine, tsh):
347
    for i in range(1, 5):
348
        with engine.begin() as cn:
349
350
            tsh.insert(cn, genserie(datetime(2017, 1, i), 'D', 3, [i]), 'revdate',
                       'test', _insertion_date=utcdt(2016, 1, i))
351
352

    # end of prologue, now some real meat
353
    idate0 = pd.Timestamp('2015-1-1 00:00:00', tz='UTC')
354
355
356
357
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [0], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate0)
    assert idate0 == tsh.latest_insertion_date(engine, 'ts_through_time')
358

359
    idate1 = pd.Timestamp('2015-1-1 15:45:23', tz='UTC')
360
361
362
363
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate1)
    assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
364

365
    idate2 = pd.Timestamp('2015-1-2 15:43:23', tz='UTC')
366
367
368
369
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate2)
    assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
370

371
    idate3 = pd.Timestamp('2015-1-3', tz='UTC')
372
373
374
375
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate3)
    assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
376

377
    ts = tsh.get(engine, 'ts_through_time')
378

379
    assert_df("""
380
381
382
383
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
384
""", ts)
385

386
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
387
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
388

389
    assert_df("""
390
391
392
393
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
394
""", ts)
395

396
    ts = tsh.get(engine, 'ts_through_time',
397
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
398

399
    assert_df("""
400
401
402
403
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
404
""", ts)
405

406
    ts = tsh.get(engine, 'ts_through_time',
407
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
408
409
410

    assert ts is None

411
412
413
414
415
416
417
418
419
420
    # epilogue: back to the revdate issue
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
2017-01-03    3.0
2017-01-04    4.0
2017-01-05    4.0
2017-01-06    4.0
""", tsh.get(engine, 'revdate'))

421
    oldstate = tsh.get(engine, 'revdate', revision_date=datetime(2016, 1, 2))
422
423
424
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
425
426
427
2017-01-03    2.0
2017-01-04    2.0
""", oldstate)
428

429

430
def test_point_deletion(engine, tsh):
431
432
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
433
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
434

435
    _, ts = Snapshot(engine, tsh, 'ts_del').find()
436
    assert ts.iloc[-2] == 9.0
437

438
    ts_begin.iloc[0] = np.nan
439
    ts_begin.iloc[3] = np.nan
440

441
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
442

443
    assert_df("""
444
445
446
447
448
449
450
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
451
2010-01-10    9.0
452
""", tsh.get(engine, 'ts_del'))
453

454
    ts2 = tsh.get(engine, 'ts_del',
455
456
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
457
    assert (tsh.get(engine, 'ts_del') == ts2).all()
458

459
460
461
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

462
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
463

464
    assert_df("""
465
466
467
468
469
470
471
472
473
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
474
2010-01-10     9.0
475
""", tsh.get(engine, 'ts_del'))
476
477
478

    # now with string!

479
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
480
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
481
482
483
484

    ts_string[4] = None
    ts_string[5] = None

485
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
486
    assert_df("""
487
488
489
490
491
492
493
494
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
495
""", tsh.get(engine, 'ts_string_del'))
496
497
498
499

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

500
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
501
    assert_df("""
502
503
504
505
506
507
508
509
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
510
2010-01-10    machin
511
""", tsh.get(engine, 'ts_string_del'))
512

513
    ts_string[ts_string.index] = np.nan
514
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
515

516
    erased = tsh.get(engine, 'ts_string_del')
517
518
    assert len(erased) == 0

519
520
    # first insertion with only nan

521
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
522
    tsh.insert(engine, ts_begin, 'ts_null', 'test')
523

524
    assert len(tsh.get(engine, 'ts_null')) == 0
525

526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

544
545
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
546
547
    assert diff is None

548
    # there is no difference
549
    assert 0 == len(tsh.diff(ts_repushed, ts_repushed))
550
551
552
553
554

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
555
    diff = tsh.diff(ts_repushed, ts_add)
556
557
558
559
560
561

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
562
2010-01-13    12.0""", diff.sort_index())
563
564
565
566
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
567

Aurélien Campéas's avatar
Aurélien Campéas committed
568
    # full erasing
569
570
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
571
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
572

Aurélien Campéas's avatar
Aurélien Campéas committed
573
    ts_begin.iloc[:] = np.nan
574
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
575
576

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
577
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
578
579
580
581

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
582
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
583

584
585
    ts_begin = pd.Series([np.nan] * 4, name='ts_full_del_str',
                         index=ts_begin.index)
586
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
587
588

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
589
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
590

Aurélien Campéas's avatar
Aurélien Campéas committed
591

592
def test_get_history(engine, tsh):
593
    for numserie in (1, 2, 3):
594
        with engine.begin() as cn:
595
596
597
            tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie',
                       'aurelien.campeas@pythonian.fr',
                       _insertion_date=utcdt(2017, 2, numserie))
598
599
600
601
602
603
604
605
606
607
608

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
609
         'meta': {},
610
         'date': pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
611
612
613
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
614
         'meta': {},
615
         'date': pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
616
617
618
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
619
         'meta': {},
620
         'date': pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC'),
621
622
623
624
625
626
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')

627
    assert_hist("""
628
629
630
631
632
633
634
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
635
636
""", histts)

637
    diffs = tsh.get_history(engine, 'smallserie', diffmode=True)
638
    assert_hist("""
639
640
641
642
643
644
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-03    2.0
""", diffs)

645
    for idate in histts:
646
        with engine.begin() as cn:
647
            idate = idate.replace(tzinfo=pytz.timezone('UTC'))
648
649
            tsh.insert(cn, histts[idate], 'smallserie2',
                       'aurelien.campeas@pythonian.f', _insertion_date=idate)
650
651
652

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
653
    assert_hist_equals(tsh.get_history(engine, 'smallserie2'), histts)
654
655
656
657

    # get history ranges
    tsa = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2))
658
    assert_hist("""
659
660
661
662
663
664
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
665
666
667
668
""", tsa)

    tsb = tsh.get_history(engine, 'smallserie',
                          to_insertion_date=datetime(2017, 2, 2))
669
    assert_hist("""
670
671
672
673
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
674
675
676
677
678
""", tsb)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2),
                          to_insertion_date=datetime(2017, 2, 2))
679
    assert_hist("""
680
681
682
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
683
684
""", tsc)

685
686
687
688
689
690
691
692
    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 4),
                          to_insertion_date=datetime(2017, 2, 4))
    assert tsc is None

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2017, 2, 2))
693
    assert_hist("""
694
695
696
697
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
698
699
700
701
702
703
704
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2016, 12, 31))
    assert tsc is None

705
706
707
708
    # restrictions on value dates
    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 1),
                          to_value_date=datetime(2017, 1, 2))
709
    assert_hist("""
710
711
712
713
714
715
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
716
717
718
719
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 2))
720
    assert_hist("""
721
722
723
724
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-02    1.0
                           2017-01-03    2.0
725
726
727
728
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          to_value_date=datetime(2017, 1, 2))
729
    assert_hist("""
730
731
732
733
734
735
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
736
737
""", tsc)

738

739
740
741
742
743
744
745
def test_history_delta(engine, tsh):
    for d in range(1, 3):
        idate = utcdt(2018, 1, d)
        serie = genserie(idate - timedelta(hours=1), 'H', 6, initval=[d])
        tsh.insert(engine, serie, 'hd', 'aurelien.campeas@pythonian.fr',
                   _insertion_date=idate)

746
    assert_hist("""
747
insertion_date             value_date               
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
                           2018-01-02 03:00:00+00:00    2.0
                           2018-01-02 04:00:00+00:00    2.0
    """, tsh.get_history(engine, 'hd'))

768
    assert_hist("""
769
insertion_date             value_date               
770
771
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
772
773
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
774
775
776
777
778
779
780
781
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
782
783
784
785
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd', deltaafter=timedelta(hours=2)))

786
    assert_hist("""
787
insertion_date             value_date               
788
2018-01-01 00:00:00+00:00  2018-01-01 00:00:00+00:00    1.0
789
                           2018-01-01 01:00:00+00:00    1.0
790
2018-01-02 00:00:00+00:00  2018-01-02 00:00:00+00:00    2.0
791
792
                           2018-01-02 01:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd',
793
                      deltabefore=timedelta(hours=0),
794
795
796
                      deltaafter=timedelta(hours=1)))


797
798
799
800
801
802
803
804
805
806
807
def test_nr_gethistory(engine, tsh):
    s0 = pd.Series([-1, 0, 0, -1],
                   index=pd.DatetimeIndex(start=datetime(2016, 12, 29),
                                          end=datetime(2017, 1, 1),
                                          freq='D'))
    tsh.insert(engine, s0, 'foo', 'zogzog')

    s1 = pd.Series([1, 0, 0, 1],
                   index=pd.DatetimeIndex(start=datetime(2017, 1, 1),
                                          end=datetime(2017, 1, 4),
                                          freq='D'))
808
    idate = utcdt(2016, 1, 1)
809
    for i in range(5):
810
        with engine.begin() as cn:
811
812
813
            tsh.insert(cn, s1 * i, 'foo',
                       'aurelien.campeas@pythonian.f',
                       _insertion_date=idate + timedelta(days=i))
814
815
816
817
818
819
820

    df = tsh.get_history(engine, 'foo',
                         datetime(2016, 1, 3),
                         datetime(2016, 1, 4),
                         datetime(2017, 1, 1),
                         datetime(2017, 1, 4))

821
    assert_hist("""
822
823
824
825
826
827
828
829
830
insertion_date             value_date
2016-01-03 00:00:00+00:00  2017-01-01    2.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    2.0
2016-01-04 00:00:00+00:00  2017-01-01    3.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    3.0
831
832
833
""", df)


834
def test_add_na(engine, tsh):
835
836
837
838
839
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

840
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
841
    assert len(diff) == 5
842
    result = tsh.get(engine, 'ts_add_na')
843
844
845
846
847
848
849
850
851
852
    assert len(result) == 0

    result = tsh.get(engine, 'ts_add_na', _keep_nans=True)
    assert_df("""
2010-01-01   NaN
2010-01-02   NaN
2010-01-03   NaN
2010-01-04   NaN
2010-01-05   NaN
""", result)
853
854
855

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
856
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
857
858
859
860
861

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

862
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
863
864
    assert diff is None

865
    result = tsh.get(engine, 'ts_add_na')
866
    assert len(result) == 5
867
868


869
def test_dtype_mismatch(engine, tsh):
870
    tsh.insert(engine,
871
872
873
874
875
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
876
        tsh.insert(engine,
877
878
879
880
881
882
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

883
    tsh.insert(engine,
884
885
886
887
888
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
889
        tsh.insert(engine,
890
891
892
893
894
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)
895
896


897
898
899
900
901
902
def test_precision(engine, tsh):
    floaty = 0.123456789123456789
    ts = genserie(datetime(2015, 1, 1), 'D', 5, initval=[floaty])

    tsh.insert(engine, ts, 'precision', 'test')
    ts_round = tsh.get(engine, 'precision')
903
    assert 0.12345678912345678 == ts_round.iloc[0]
904
905

    diff = tsh.insert(engine, ts_round, 'precision', 'test')
Aurélien Campéas's avatar
Aurélien Campéas committed
906
    assert diff is None  # the roundtriped series does not produce a diff when reinserted
907

Aurélien Campéas's avatar
Aurélien Campéas committed
908
    diff = tsh.insert(engine, ts, 'precision', 'test')  # neither does the original series
909
910
911
    assert diff is None


912
913
914
915
916
917
918
919
def test_serie_deletion(engine, tsh):
    ts = genserie(datetime(2018, 1, 10), 'H', 10)
    tsh.insert(engine, ts, 'keepme', 'Babar')
    tsh.insert(engine, ts, 'deleteme', 'Celeste')
    ts = genserie(datetime(2018, 1, 12), 'H', 10)
    tsh.insert(engine, ts, 'keepme', 'Babar')
    tsh.insert(engine, ts, 'deleteme', 'Celeste')

920
    seriecount, csetcount, csetseriecount = assert_structures(engine, tsh)
921

922
    with engine.begin() as cn:
923
924
925
926
927
928
929
        tsh.delete(cn, 'deleteme')

    assert not tsh.exists(engine, 'deleteme')
    log = [entry['author']
           for entry in tsh.log(engine, names=('keepme', 'deleteme'))]
    assert log == ['Babar', 'Babar']

930
931
932
933
    seriecount2, csetcount2, csetseriecount2 = assert_structures(engine, tsh)

    assert csetcount - csetcount2  == 2
    assert csetseriecount - csetseriecount2 == 2
934
935
936
937
938
939
    assert seriecount - seriecount2 == 1

    with pytest.raises(AssertionError) as werr:
        tsh.delete(engine, 'keepme')
    assert werr.value.args[0] == 'use a transaction object'

940
    tsh.insert(engine, ts, 'deleteme', 'Celeste')
941

942

943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
def test_strip(engine, tsh):
    for i in range(1, 5):
        pubdate = utcdt(2017, 1, i)
        ts = genserie(datetime(2017, 1, 10), 'H', 1 + i)
        tsh.insert(engine, ts, 'xserie', 'babar', _insertion_date=pubdate)
        # also insert something completely unrelated
        tsh.insert(engine, genserie(datetime(2018, 1, 1), 'D', 1 + i), 'yserie', 'celeste')

    csida = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    assert csida is not None
    csidb = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='before')
    csidc = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='after')
    assert csidb < csida < csidc

    log = tsh.log(engine, names=['xserie', 'yserie'])
    assert [(idx, l['author']) for idx, l in enumerate(log, start=1)
    ] == [
        (1, 'babar'),
        (2, 'celeste'),
        (3, 'babar'),
        (4, 'celeste'),
        (5, 'babar'),
        (6, 'celeste'),
        (7, 'babar'),
        (8, 'celeste')
    ]

    h = tsh.get_history(engine, 'xserie')
971
    assert_hist("""
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
2017-01-03 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
2017-01-04 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
                           2017-01-10 04:00:00    4.0
""", h)

    csid = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
990
    with engine.begin() as cn:
991
992
        tsh.strip(cn, 'xserie', csid)

993
    assert_hist("""
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
""", tsh.get_history(engine, 'xserie'))

    assert_df("""
2017-01-10 00:00:00    0.0
2017-01-10 01:00:00    1.0
2017-01-10 02:00:00    2.0
""", tsh.get(engine, 'xserie'))

    log = tsh.log(engine, names=['xserie', 'yserie'])
    # 5 and 7 have disappeared
    assert [l['author'] for l in log
    ] == ['babar', 'celeste', 'babar', 'celeste', 'celeste', 'celeste']

    log = tsh.log(engine, stripped=True, names=['xserie', 'yserie'])
1014
1015
1016
1017
1018
1019
    for l in log:
        if l['meta']:
            meta = l['meta']
            stripinfo = meta.get('tshistory.info')
            if stripinfo:
                assert stripinfo.startswith('got stripped from')
1020
1021


1022
1023
1024
def test_long_name(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 40)

1025
1026
1027
    name = 'a' * 64
    tsh.insert(engine, serie, name, 'babar')
    assert tsh.get(engine, name) is not None
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038


def test_get_delta(engine, tsh):
    for idate in pd.DatetimeIndex(start=utcdt(2015, 1, 1),
                                  end=utcdt(2015, 1, 1, 3),
                                  freq='H'):
        ts = genserie(start=idate, freq='H', repeat=7)
        tsh.insert(engine, ts, 'republication', 'test',
                   _insertion_date=idate)

    hist = tsh.get_history(engine, 'republication')
1039
    assert_hist("""
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056