test_tsio.py 36.5 KB
Newer Older
1
from datetime import datetime, timedelta
2
3
from pathlib import Path
import pytz
4

5
from dateutil import parser
6
import pytest
7
8
import numpy as np
import pandas as pd
9

10
from tshistory.snapshot import Snapshot
11
12
13
14
15
16
from tshistory.testutil import (
    assert_df,
    assert_group_equals,
    genserie,
    tempattr
)
17

18
DATADIR = Path(__file__).parent / 'data'
19

Aurélien Campéas's avatar
Aurélien Campéas committed
20

21
22
23
24
def utcdt(*dt):
    return pd.Timestamp(datetime(*dt), tz='UTC')


25
def test_tstamp_roundtrip(engine, tsh):
26
27
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
28
29
30
31
32
33
34
35
36
37
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

38
39
    tsh.insert(engine, ts, 'tztest', 'Babar')
    back = tsh.get(engine, 'tztest')
40
41
42

    # though un localized we understand it's been normalized to utc
    assert_df("""
43
44
45
46
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    2.0
2017-10-29 02:00:00+00:00    3.0
47
48
49
""", back)

    assert (ts.index == back.index).all()
50
    assert str(back.index.dtype) == 'datetime64[ns, UTC]'
51
52


53
def test_differential(engine, tsh):
54
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
55
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
56

57
58
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
59

60
    assert_df("""
61
62
63
64
65
66
67
68
69
70
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
71
""", tsh.get(engine, 'ts_test'))
72
73

    # we should detect the emission of a message
74
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
75

76
    assert_df("""
77
78
79
80
81
82
83
84
85
86
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
87
""", tsh.get(engine, 'ts_test'))
88
89
90
91

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
92
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
93

94
    assert_df("""
95
96
97
98
99
100
101
102
103
104
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
105
""", tsh.get(engine, 'ts_test'))
106

107
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
108
109
110
111
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

112
    tsh.insert(engine, ts_longer, 'ts_test', 'test')
113

114
    assert_df("""
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
132
""", tsh.get(engine, 'ts_test'))
133
134

    # start testing manual overrides
135
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
136
    ts_begin.loc['2010-01-04'] = -1
137
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
138
139

    # -1 represents bogus upstream data
140
    assert_df("""
141
142
143
144
145
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
146
""", tsh.get(engine, 'ts_mixte'))
147
148

    # refresh all the period + 1 extra data point
149
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
150
    ts_more.loc['2010-01-04'] = -1
151
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
152

153
    assert_df("""
154
155
156
157
158
159
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
160
""", tsh.get(engine, 'ts_mixte'))
161
162

    # just append an extra data point
163
164
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
165
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
166

167
    assert_df("""
168
169
170
171
172
173
174
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
175
""", tsh.get(engine, 'ts_mixte'))
176

177
    with engine.connect() as cn:
178
        cn.execute('set search_path to "{0}.timeserie", {0}, public'.format(tsh.namespace))
179
180
181
        allts = pd.read_sql("select name, table_name from registry "
                            "where name in ('ts_test', 'ts_mixte')",
                            cn)
182

183
184
        assert_df("""
name              table_name
185
186
187
0   ts_test   {0}.timeserie.ts_test
1  ts_mixte  {0}.timeserie.ts_mixte
""".format(tsh.namespace), allts)
188

189
        assert_df("""
190
191
192
193
194
195
196
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
197
""", tsh.get(cn, 'ts_mixte',
198
             revision_date=datetime.now()))
199
200


201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
def test_serie_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
    tsh.insert(engine, serie, 'ts-metadata', 'babar')

    initialmeta = tsh.metadata(engine, 'ts-metadata')
    assert initialmeta == {
        'tzaware': False,
        'index_type': 'datetime64[ns]',
        'value_type': 'float64',
        'index_names': []
    }

    tsh.update_metadata(engine, 'ts-metadata',
                        {'topic': 'banana spot price'}
    )
    assert tsh.metadata(engine, 'ts-metadata')['topic'] == 'banana spot price'

    with pytest.raises(AssertionError):
        tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True})

    tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True}, internal=True)
    assert tsh.metadata(engine, 'ts-metadata') == {
        'tzaware': True,
        'index_type': 'datetime64[ns]',
        'value_type': 'float64',
        'index_names': [],
        'topic': 'banana spot price'
    }
    # unbreak the serie for the second test pass :o
    tsh.update_metadata(engine, 'ts-metadata', initialmeta, internal=True)


233
234
def test_changeset_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
235
    tsh.insert(engine, serie, 'ts-cs-metadata', 'babar',
236
237
               {'foo': 'A', 'bar': 42})

238
    log = tsh.log(engine, names=['ts-cs-metadata'])
239
240
241
242
    meta = tsh.changeset_metadata(engine, log[0]['rev'])
    assert meta == {'foo': 'A', 'bar': 42}


243
def test_bad_import(engine, tsh):
244
    # the data were parsed as date by pd.read_json()
Aurélien Campéas's avatar
Aurélien Campéas committed
245
    df_result = pd.read_csv(str(DATADIR / 'test_data.csv'))
246
247
248
    df_result['Gas Day'] = df_result['Gas Day'].apply(parser.parse, dayfirst=True, yearfirst=False)
    df_result.set_index('Gas Day', inplace=True)
    ts = df_result['SC']
249

250
251
    tsh.insert(engine, ts, 'SND_SC', 'test')
    result = tsh.get(engine, 'SND_SC')
252
    assert result.dtype == 'float64'
253
254
255

    # insertion of empty ts
    ts = pd.Series(name='truc', dtype='object')
256
257
    tsh.insert(engine, ts, 'empty_ts', 'test')
    assert tsh.get(engine, 'empty_ts') is None
258
259
260

    # nan in ts
    # all na
261
    ts = genserie(datetime(2010, 1, 10), 'D', 10, [np.nan], name='truc')
262
    tsh.insert(engine, ts, 'test_nan', 'test')
263
264
    assert len(tsh.get(engine, 'test_nan')) == 0
    assert len(tsh.get(engine, 'test_nan', _keep_nans=True)) == 10
265
266
267
268
269

    # mixe na
    ts = pd.Series([np.nan] * 5 + [3] * 5,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
270
271
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
272

273
274
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
275
    assert_df("""
276
277
278
279
280
2010-01-15    3.0
2010-01-16    3.0
2010-01-17    3.0
2010-01-18    3.0
2010-01-19    3.0
281
""", result)
282
283

    # get_ts with name not in database
284
    assert tsh.get(engine, 'inexisting_name', 'test') is None
285
286


287
def test_revision_date(engine, tsh):
288
    # we prepare a good joke for the end of the test
289
290
    # ival = Snapshot._interval
    # Snapshot._interval = 3
291
292
293

    for i in range(1, 5):
        with engine.connect() as cn:
294
295
            tsh.insert(cn, genserie(datetime(2017, 1, i), 'D', 3, [i]), 'revdate',
                       'test', _insertion_date=utcdt(2016, 1, i))
296
297

    # end of prologue, now some real meat
298
    idate0 = pd.Timestamp('2015-1-1 00:00:00', tz='UTC')
299
300
301
302
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [0], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate0)
    assert idate0 == tsh.latest_insertion_date(engine, 'ts_through_time')
303

304
    idate1 = pd.Timestamp('2015-1-1 15:45:23', tz='UTC')
305
306
307
308
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate1)
    assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
309

310
    idate2 = pd.Timestamp('2015-1-2 15:43:23', tz='UTC')
311
312
313
314
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate2)
    assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
315

316
    idate3 = pd.Timestamp('2015-1-3', tz='UTC')
317
318
319
320
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate3)
    assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
321

322
    ts = tsh.get(engine, 'ts_through_time')
323

324
    assert_df("""
325
326
327
328
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
329
""", ts)
330

331
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
332
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
333

334
    assert_df("""
335
336
337
338
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
339
""", ts)
340

341
    ts = tsh.get(engine, 'ts_through_time',
342
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
343

344
    assert_df("""
345
346
347
348
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
349
""", ts)
350

351
    ts = tsh.get(engine, 'ts_through_time',
352
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
353
354
355

    assert ts is None

356
357
358
359
360
361
362
363
364
365
    # epilogue: back to the revdate issue
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
2017-01-03    3.0
2017-01-04    4.0
2017-01-05    4.0
2017-01-06    4.0
""", tsh.get(engine, 'revdate'))

366
    oldstate = tsh.get(engine, 'revdate', revision_date=datetime(2016, 1, 2))
367
368
369
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
370
371
372
2017-01-03    2.0
2017-01-04    2.0
""", oldstate)
373

374

375
def test_deletion(engine, tsh):
376
377
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
378
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
379

380
    _, ts = Snapshot(engine, tsh, 'ts_del').find()
381
    assert ts.iloc[-2] == 9.0
382

383
    ts_begin.iloc[0] = np.nan
384
    ts_begin.iloc[3] = np.nan
385

386
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
387

388
    assert_df("""
389
390
391
392
393
394
395
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
396
2010-01-10    9.0
397
""", tsh.get(engine, 'ts_del'))
398

399
    ts2 = tsh.get(engine, 'ts_del',
400
401
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
402
    assert (tsh.get(engine, 'ts_del') == ts2).all()
403

404
405
406
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

407
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
408

409
    assert_df("""
410
411
412
413
414
415
416
417
418
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
419
2010-01-10     9.0
420
""", tsh.get(engine, 'ts_del'))
421
422
423

    # now with string!

424
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
425
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
426
427
428
429

    ts_string[4] = None
    ts_string[5] = None

430
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
431
    assert_df("""
432
433
434
435
436
437
438
439
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
440
""", tsh.get(engine, 'ts_string_del'))
441
442
443
444

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

445
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
446
    assert_df("""
447
448
449
450
451
452
453
454
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
455
2010-01-10    machin
456
""", tsh.get(engine, 'ts_string_del'))
457

458
    ts_string[ts_string.index] = np.nan
459
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
460

461
    erased = tsh.get(engine, 'ts_string_del')
462
463
    assert len(erased) == 0

464
465
    # first insertion with only nan

466
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
467
    tsh.insert(engine, ts_begin, 'ts_null', 'test')
468

469
    assert len(tsh.get(engine, 'ts_null')) == 0
470

471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
    # exhibit issue with nans handling
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

490
491
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
492
493
    assert diff is None

494
    # there is no difference
495
    assert 0 == len(tsh.diff(ts_repushed, ts_repushed))
496
497
498
499
500

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
501
    diff = tsh.diff(ts_repushed, ts_add)
502
503
504
505
506
507

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
508
2010-01-13    12.0""", diff.sort_index())
509
510
511
512
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
513

Aurélien Campéas's avatar
Aurélien Campéas committed
514
    # full erasing
515
516
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
517
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
518

Aurélien Campéas's avatar
Aurélien Campéas committed
519
    ts_begin.iloc[:] = np.nan
520
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
521
522

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
523
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
524
525
526
527

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
528
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
529
530

    ts_begin.iloc[:] = np.nan
531
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
532
533

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
534
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
535

Aurélien Campéas's avatar
Aurélien Campéas committed
536

537
def test_get_history(engine, tsh):
538
539
    for numserie in (1, 2, 3):
        with engine.connect() as cn:
540
541
542
            tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie',
                       'aurelien.campeas@pythonian.fr',
                       _insertion_date=utcdt(2017, 2, numserie))
543
544
545
546
547
548
549
550
551
552
553

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
554
         'meta': {},
555
         'date': pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
556
557
558
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
559
         'meta': {},
560
         'date': pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
561
562
563
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
564
         'meta': {},
565
         'date': pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC'),
566
567
568
569
570
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')
571
    assert histts.name == 'smallserie'
572
573

    assert_df("""
574
575
576
577
578
579
580
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
581
582
""", histts)

583
    for idate in histts.index.get_level_values('insertion_date').unique():
584
        with engine.connect() as cn:
585
            idate = idate.replace(tzinfo=pytz.timezone('UTC'))
586
587
            tsh.insert(cn, histts[idate], 'smallserie2',
                       'aurelien.campeas@pythonian.f', _insertion_date=idate)
588
589
590
591
592
593
594
595
596

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
    assert (tsh.get_history(engine, 'smallserie2') == histts).all()

    # get history ranges
    tsa = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2))
    assert_df("""
597
598
599
600
601
602
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
603
604
605
606
607
""", tsa)

    tsb = tsh.get_history(engine, 'smallserie',
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
608
609
610
611
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
612
613
614
615
616
617
""", tsb)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2),
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
618
619
620
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
621
622
""", tsc)

623
624
625
626
627
628
629
630
631
    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 4),
                          to_insertion_date=datetime(2017, 2, 4))
    assert tsc is None

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
632
633
634
635
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
636
637
638
639
640
641
642
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2016, 12, 31))
    assert tsc is None

643
644
645
646
647
    # restrictions on value dates
    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 1),
                          to_value_date=datetime(2017, 1, 2))
    assert_df("""
648
649
650
651
652
653
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
654
655
656
657
658
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 2))
    assert_df("""
659
660
661
662
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-02    1.0
                           2017-01-03    2.0
663
664
665
666
667
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          to_value_date=datetime(2017, 1, 2))
    assert_df("""
668
669
670
671
672
673
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
674
675
""", tsc)

676

677
678
679
680
681
682
683
684
685
def test_history_delta(engine, tsh):
    for d in range(1, 3):
        idate = utcdt(2018, 1, d)
        serie = genserie(idate - timedelta(hours=1), 'H', 6, initval=[d])
        tsh.insert(engine, serie, 'hd', 'aurelien.campeas@pythonian.fr',
                   _insertion_date=idate)

    assert_df("""
insertion_date             value_date               
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
                           2018-01-02 03:00:00+00:00    2.0
                           2018-01-02 04:00:00+00:00    2.0
    """, tsh.get_history(engine, 'hd'))

    assert_df("""
insertion_date             value_date               
708
709
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
710
711
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
712
713
714
715
716
717
718
719
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
720
721
722
723
724
725
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd', deltaafter=timedelta(hours=2)))

    assert_df("""
insertion_date             value_date               
726
2018-01-01 00:00:00+00:00  2018-01-01 00:00:00+00:00    1.0
727
                           2018-01-01 01:00:00+00:00    1.0
728
2018-01-02 00:00:00+00:00  2018-01-02 00:00:00+00:00    2.0
729
730
                           2018-01-02 01:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd',
731
                      deltabefore=timedelta(hours=0),
732
733
734
                      deltaafter=timedelta(hours=1)))


735
736
737
738
739
740
741
742
743
744
745
def test_nr_gethistory(engine, tsh):
    s0 = pd.Series([-1, 0, 0, -1],
                   index=pd.DatetimeIndex(start=datetime(2016, 12, 29),
                                          end=datetime(2017, 1, 1),
                                          freq='D'))
    tsh.insert(engine, s0, 'foo', 'zogzog')

    s1 = pd.Series([1, 0, 0, 1],
                   index=pd.DatetimeIndex(start=datetime(2017, 1, 1),
                                          end=datetime(2017, 1, 4),
                                          freq='D'))
746
    idate = utcdt(2016, 1, 1)
747
748
    for i in range(5):
        with engine.connect() as cn:
749
750
751
            tsh.insert(cn, s1 * i, 'foo',
                       'aurelien.campeas@pythonian.f',
                       _insertion_date=idate + timedelta(days=i))
752
753
754
755
756
757
758
759

    df = tsh.get_history(engine, 'foo',
                         datetime(2016, 1, 3),
                         datetime(2016, 1, 4),
                         datetime(2017, 1, 1),
                         datetime(2017, 1, 4))

    assert_df("""
760
761
762
763
764
765
766
767
768
insertion_date             value_date
2016-01-03 00:00:00+00:00  2017-01-01    2.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    2.0
2016-01-04 00:00:00+00:00  2017-01-01    3.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    3.0
769
770
771
""", df)


772
def test_add_na(engine, tsh):
773
774
775
776
777
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

778
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
779
    assert len(diff) == 5
780
    result = tsh.get(engine, 'ts_add_na')
781
782
783
784
785
786
787
788
789
790
    assert len(result) == 0

    result = tsh.get(engine, 'ts_add_na', _keep_nans=True)
    assert_df("""
2010-01-01   NaN
2010-01-02   NaN
2010-01-03   NaN
2010-01-04   NaN
2010-01-05   NaN
""", result)
791
792
793

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
794
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
795
796
797
798
799

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

800
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
801
802
    assert diff is None

803
    result = tsh.get(engine, 'ts_add_na')
804
    assert len(result) == 5
805
806


807
def test_dtype_mismatch(engine, tsh):
808
    tsh.insert(engine,
809
810
811
812
813
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
814
        tsh.insert(engine,
815
816
817
818
819
820
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

821
    tsh.insert(engine,
822
823
824
825
826
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
827
        tsh.insert(engine,
828
829
830
831
832
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)
833
834


835
836
837
838
839
840
841
842
843
def test_precision(engine, tsh):
    floaty = 0.123456789123456789
    ts = genserie(datetime(2015, 1, 1), 'D', 5, initval=[floaty])

    tsh.insert(engine, ts, 'precision', 'test')
    ts_round = tsh.get(engine, 'precision')
    assert 0.12345678912346 == ts_round.iloc[0]

    diff = tsh.insert(engine, ts_round, 'precision', 'test')
Aurélien Campéas's avatar
Aurélien Campéas committed
844
    assert diff is None  # the roundtriped series does not produce a diff when reinserted
845

Aurélien Campéas's avatar
Aurélien Campéas committed
846
    diff = tsh.insert(engine, ts, 'precision', 'test')  # neither does the original series
847
848
849
    assert diff is None


850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
def test_strip(engine, tsh):
    for i in range(1, 5):
        pubdate = utcdt(2017, 1, i)
        ts = genserie(datetime(2017, 1, 10), 'H', 1 + i)
        tsh.insert(engine, ts, 'xserie', 'babar', _insertion_date=pubdate)
        # also insert something completely unrelated
        tsh.insert(engine, genserie(datetime(2018, 1, 1), 'D', 1 + i), 'yserie', 'celeste')

    csida = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    assert csida is not None
    csidb = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='before')
    csidc = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='after')
    assert csidb < csida < csidc

    log = tsh.log(engine, names=['xserie', 'yserie'])
    assert [(idx, l['author']) for idx, l in enumerate(log, start=1)
    ] == [
        (1, 'babar'),
        (2, 'celeste'),
        (3, 'babar'),
        (4, 'celeste'),
        (5, 'babar'),
        (6, 'celeste'),
        (7, 'babar'),
        (8, 'celeste')
    ]

    h = tsh.get_history(engine, 'xserie')
    assert_df("""
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
2017-01-03 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
2017-01-04 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
                           2017-01-10 04:00:00    4.0
""", h)

    csid = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    with engine.connect() as cn:
        tsh.strip(cn, 'xserie', csid)

    assert_df("""
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
""", tsh.get_history(engine, 'xserie'))

    assert_df("""
2017-01-10 00:00:00    0.0
2017-01-10 01:00:00    1.0
2017-01-10 02:00:00    2.0
""", tsh.get(engine, 'xserie'))

    log = tsh.log(engine, names=['xserie', 'yserie'])
    # 5 and 7 have disappeared
    assert [l['author'] for l in log
    ] == ['babar', 'celeste', 'babar', 'celeste', 'celeste', 'celeste']

    log = tsh.log(engine, stripped=True, names=['xserie', 'yserie'])
921
922
923
924
925
926
    for l in log:
        if l['meta']:
            meta = l['meta']
            stripinfo = meta.get('tshistory.info')
            if stripinfo:
                assert stripinfo.startswith('got stripped from')
927
928
929
930
931
932
933
934
935
936
937
938


def test_prepend(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 40)

    assert 40 == len(serie)
    ts_insert = serie[2:]
    tsh.insert(engine, ts_insert, 'prepend', 'test')
    assert 38 == len(tsh.get(engine, 'prepend'))

    tsh.insert(engine, serie, 'prepend', 'test')
    assert 40 == len(tsh.get(engine, 'prepend'))
939
940
941
942
943


def test_long_name(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 40)

944
945
946
    name = 'a' * 64
    tsh.insert(engine, serie, name, 'babar')
    assert tsh.get(engine, name) is not None
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089


def test_get_delta(engine, tsh):
    for idate in pd.DatetimeIndex(start=utcdt(2015, 1, 1),
                                  end=utcdt(2015, 1, 1, 3),
                                  freq='H'):
        ts = genserie(start=idate, freq='H', repeat=7)
        tsh.insert(engine, ts, 'republication', 'test',
                   _insertion_date=idate)

    hist = tsh.get_history(engine, 'republication')
    assert_df("""
insertion_date             value_date               
2015-01-01 00:00:00+00:00  2015-01-01 00:00:00+00:00    0.0
                           2015-01-01 01:00:00+00:00    1.0
                           2015-01-01 02:00:00+00:00    2.0
                           2015-01-01 03:00:00+00:00    3.0
                           2015-01-01 04:00:00+00:00    4.0
                           2015-01-01 05:00:00+00:00    5.0
                           2015-01-01 06:00:00+00:00    6.0
2015-01-01 01:00:00+00:00  2015-01-01 00:00:00+00:00    0.0
                           2015-01-01 01:00:00+00:00    0.0
                           2015-01-01 02:00:00+00:00    1.0
                           2015-01-01 03:00:00+00:00    2.0
                           2015-01-01 04:00:00+00:00    3.0
                           2015-01-01 05:00:00+00:00    4.0
                           2015-01-01 06:00:00+00:00    5.0
                           2015-01-01 07:00:00+00:00    6.0
2015-01-01 02:00:00+00:00  2015-01-01 00:00:00+00:00    0.0
                           2015-01-01 01:00:00+00:00    0.0
                           2015-01-01 02:00:00+00:00    0.0
                           2015-01-01 03:00:00+00:00    1.0
                           2015-01-01 04:00:00+00:00    2.0
                           2015-01-01 05:00:00+00:00    3.0
                           2015-01-01 06:00:00+00:00    4.0
                           2015-01-01 07:00:00+00:00    5.0
                           2015-01-01 08:00:00+00:00    6.0
2015-01-01 03:00:00+00:00  2015-01-01 00:00:00+00:00    0.0
                           2015-01-01 01:00:00+00:00    0.0
                           2015-01-01 02:00:00+00:00    0.0
                           2015-01-01 03:00:00+00:00    0.0
                           2015-01-01 04:00:00+00:00    1.0
                           2015-01-01 05:00:00+00:00    2.0
                           2015-01-01 06:00:00+00:00    3.0
                           2015-01-01 07:00:00+00:00    4.0
                           2015-01-01 08:00:00+00:00    5.0
                           2015-01-01 09:00:00+00:00    6.0
""", hist)

    deltas = tsh.get_delta(engine,  'republication', delta=timedelta(hours=3))

    assert_df("""
2015-01-01 03:00:00+00:00    3.0
2015-01-01 04:00:00+00:00    3.0
2015-01-01 05:00:00+00:00    3.0
2015-01-01 06:00:00+00:00    3.0
2015-01-01 07:00:00+00:00    4.0
2015-01-01 08:00:00+00:00    5.0
2015-01-01 09:00:00+00:00    6.0
""", deltas)

    deltas = tsh.get_delta(engine,  'republication', delta=timedelta(hours=5))
    assert_df("""
2015-01-01 05:00:00+00:00    5.0
2015-01-01 06:00:00+00:00    5.0
2015-01-01 07:00:00+00:00    5.0
2015-01-01 08:00:00+00:00    5.0
2015-01-01 09:00:00+00:00    6.0
""", deltas)

    hist = tsh.get_history(engine, 'republication',
                           deltabefore=-timedelta(hours=3),
                           deltaafter=timedelta(hours=3))
    assert_df("""
insertion_date             value_date               
2015-01-01 00:00:00+00:00  2015-01-01 03:00:00+00:00    3.0
2015-01-01 01:00:00+00:00  2015-01-01 04:00:00+00:00    3.0
2015-01-01 02:00:00+00:00  2015-01-01 05:00:00+00:00    3.0
2015-01-01 03:00:00+00:00  2015-01-01 06:00:00+00:00    3.0
""", hist)

    hist = tsh.get_history(engine, 'republication',
                           deltabefore=-timedelta(hours=5),
                           deltaafter=timedelta(hours=5))

    assert_df("""
insertion_date             value_date               
2015-01-01 00:00:00+00:00  2015-01-01 05:00:00+00:00    5.0
2015-01-01 01:00:00+00:00  2015-01-01 06:00:00+00:00    5.0
2015-01-01 02:00:00+00:00  2015-01-01 07:00:00+00:00    5.0
2015-01-01 03:00:00+00:00  2015-01-01 08:00:00+00:00    5.0
""", hist)


    # maybe a more interesting example, each days we insert 7 data points
    for idx, idate in enumerate(pd.DatetimeIndex(start=utcdt(2015, 1, 1),
                                                 end=utcdt(2015, 1, 4),
                                                 freq='D')):
        ts = genserie(start=idate, freq='H', repeat=7)
        tsh.insert(engine, ts, 'repu2', 'test', _insertion_date=idate)

    deltas = tsh.get_delta(engine, 'repu2', delta=timedelta(hours=3))
    assert_df("""
2015-01-01 03:00:00+00:00    3.0
2015-01-01 04:00:00+00:00    4.0
2015-01-01 05:00:00+00:00    5.0
2015-01-01 06:00:00+00:00    6.0
2015-01-02 03:00:00+00:00    3.0
2015-01-02 04:00:00+00:00    4.0
2015-01-02 05:00:00+00:00    5.0
2015-01-02 06:00:00+00:00    6.0
2015-01-03 03:00:00+00:00    3.0
2015-01-03 04:00:00+00:00    4.0
2015-01-03 05:00:00+00:00    5.0
2015-01-03 06:00:00+00:00    6.0
2015-01-04 03:00:00+00:00    3.0
2015-01-04 04:00:00+00:00    4.0
2015-01-04 05:00:00+00:00    5.0
2015-01-04 06:00:00+00:00    6.0
""", deltas)

    # which is basically the same as below
    hist = tsh.get_history(engine, 'repu2',
                           deltabefore=-timedelta(hours=3))
    assert_df("""
insertion_date             value_date               
2015-01-01 00:00:00+00:00  2015-01-01 03:00:00+00:00    3.0
                           2015-01-01 04:00:00+00:00    4.0
                           2015-01-01 05:00:00+00:00    5.0
                           2015-01-01 06:00:00+00:00    6.0
2015-01-02 00:00:00+00:00  2015-01-02 03:00:00+00:00    3.0
                           2015-01-02 04:00:00+00:00    4.0
                           2015-01-02 05:00:00+00:00    5.0
                           2015-01-02 06:00:00+00:00    6.0
2015-01-03 00:00:00+00:00  2015-01-03 03:00:00+00:00    3.0
                           2015-01-03 04:00:00+00:00    4.0
                           2015-01-03 05:00:00+00:00    5.0
                           2015-01-03 06:00:00+00:00    6.0
2015-01-04 00:00:00+00:00  2015-01-04 03:00:00+00:00    3.0
                           2015-01-04 04:00:00+00:00    4.0
                           2015-01-04 05:00:00+00:00    5.0
                           2015-01-04 06:00:00+00:00    6.0
""", hist)