test_tsio.py 30 KB
Newer Older
1
from datetime import datetime, timedelta
2
3
from pathlib import Path
import pytz
4

5
from dateutil import parser
6
import pytest
7
8
import numpy as np
import pandas as pd
9

10
from tshistory.snapshot import Snapshot
11
12
13
14
15
16
from tshistory.testutil import (
    assert_df,
    assert_group_equals,
    genserie,
    tempattr
)
17

18
DATADIR = Path(__file__).parent / 'data'
19

Aurélien Campéas's avatar
Aurélien Campéas committed
20

21
22
23
24
def utcdt(*dt):
    return pd.Timestamp(datetime(*dt), tz='UTC')


25
def test_tstamp_roundtrip(engine, tsh):
26
27
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
28
29
30
31
32
33
34
35
36
37
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

38
39
    tsh.insert(engine, ts, 'tztest', 'Babar')
    back = tsh.get(engine, 'tztest')
40
41
42

    # though un localized we understand it's been normalized to utc
    assert_df("""
43
44
45
46
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    2.0
2017-10-29 02:00:00+00:00    3.0
47
48
49
""", back)

    assert (ts.index == back.index).all()
50
    assert str(back.index.dtype) == 'datetime64[ns, UTC]'
51
52


53
def test_differential(engine, tsh):
54
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
55
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
56

57
58
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
59

60
    assert_df("""
61
62
63
64
65
66
67
68
69
70
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
71
""", tsh.get(engine, 'ts_test'))
72
73

    # we should detect the emission of a message
74
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
75

76
    assert_df("""
77
78
79
80
81
82
83
84
85
86
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
87
""", tsh.get(engine, 'ts_test'))
88
89
90
91

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
92
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
93

94
    assert_df("""
95
96
97
98
99
100
101
102
103
104
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
105
""", tsh.get(engine, 'ts_test'))
106

107
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
108
109
110
111
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

112
    tsh.insert(engine, ts_longer, 'ts_test', 'test')
113

114
    assert_df("""
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
132
""", tsh.get(engine, 'ts_test'))
133
134

    # start testing manual overrides
135
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
136
    ts_begin.loc['2010-01-04'] = -1
137
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
138
139

    # -1 represents bogus upstream data
140
    assert_df("""
141
142
143
144
145
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
146
""", tsh.get(engine, 'ts_mixte'))
147
148

    # refresh all the period + 1 extra data point
149
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
150
    ts_more.loc['2010-01-04'] = -1
151
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
152

153
    assert_df("""
154
155
156
157
158
159
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
160
""", tsh.get(engine, 'ts_mixte'))
161
162

    # just append an extra data point
163
164
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
165
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
166

167
    assert_df("""
168
169
170
171
172
173
174
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
175
""", tsh.get(engine, 'ts_mixte'))
176

177
    with engine.connect() as cn:
178
        cn.execute('set search_path to "{0}.timeserie", {0}, public'.format(tsh.namespace))
179
180
181
        allts = pd.read_sql("select name, table_name from registry "
                            "where name in ('ts_test', 'ts_mixte')",
                            cn)
182

183
184
        assert_df("""
name              table_name
185
186
187
0   ts_test   {0}.timeserie.ts_test
1  ts_mixte  {0}.timeserie.ts_mixte
""".format(tsh.namespace), allts)
188

189
        assert_df("""
190
191
192
193
194
195
196
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
197
""", tsh.get(cn, 'ts_mixte',
198
             revision_date=datetime.now()))
199
200


201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
def test_serie_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
    tsh.insert(engine, serie, 'ts-metadata', 'babar')

    initialmeta = tsh.metadata(engine, 'ts-metadata')
    assert initialmeta == {
        'tzaware': False,
        'index_type': 'datetime64[ns]',
        'value_type': 'float64',
        'index_names': []
    }

    tsh.update_metadata(engine, 'ts-metadata',
                        {'topic': 'banana spot price'}
    )
    assert tsh.metadata(engine, 'ts-metadata')['topic'] == 'banana spot price'

    with pytest.raises(AssertionError):
        tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True})

    tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True}, internal=True)
    assert tsh.metadata(engine, 'ts-metadata') == {
        'tzaware': True,
        'index_type': 'datetime64[ns]',
        'value_type': 'float64',
        'index_names': [],
        'topic': 'banana spot price'
    }
    # unbreak the serie for the second test pass :o
    tsh.update_metadata(engine, 'ts-metadata', initialmeta, internal=True)


233
234
def test_changeset_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
235
    tsh.insert(engine, serie, 'ts-cs-metadata', 'babar',
236
237
               {'foo': 'A', 'bar': 42})

238
    log = tsh.log(engine, names=['ts-cs-metadata'])
239
240
241
242
    meta = tsh.changeset_metadata(engine, log[0]['rev'])
    assert meta == {'foo': 'A', 'bar': 42}


243
def test_bad_import(engine, tsh):
244
    # the data were parsed as date by pd.read_json()
Aurélien Campéas's avatar
Aurélien Campéas committed
245
    df_result = pd.read_csv(str(DATADIR / 'test_data.csv'))
246
247
248
    df_result['Gas Day'] = df_result['Gas Day'].apply(parser.parse, dayfirst=True, yearfirst=False)
    df_result.set_index('Gas Day', inplace=True)
    ts = df_result['SC']
249

250
251
    tsh.insert(engine, ts, 'SND_SC', 'test')
    result = tsh.get(engine, 'SND_SC')
252
    assert result.dtype == 'float64'
253
254
255

    # insertion of empty ts
    ts = pd.Series(name='truc', dtype='object')
256
257
    tsh.insert(engine, ts, 'empty_ts', 'test')
    assert tsh.get(engine, 'empty_ts') is None
258
259
260

    # nan in ts
    # all na
261
    ts = genserie(datetime(2010, 1, 10), 'D', 10, [np.nan], name='truc')
262
    tsh.insert(engine, ts, 'test_nan', 'test')
263
264
    assert len(tsh.get(engine, 'test_nan')) == 0
    assert len(tsh.get(engine, 'test_nan', _keep_nans=True)) == 10
265
266
267
268
269

    # mixe na
    ts = pd.Series([np.nan] * 5 + [3] * 5,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
270
271
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
272

273
274
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
275
    assert_df("""
276
277
278
279
280
2010-01-15    3.0
2010-01-16    3.0
2010-01-17    3.0
2010-01-18    3.0
2010-01-19    3.0
281
""", result)
282
283

    # get_ts with name not in database
284
    assert tsh.get(engine, 'inexisting_name', 'test') is None
285
286


287
def test_revision_date(engine, tsh):
288
    # we prepare a good joke for the end of the test
289
290
    # ival = Snapshot._interval
    # Snapshot._interval = 3
291
292
293

    for i in range(1, 5):
        with engine.connect() as cn:
294
295
            tsh.insert(cn, genserie(datetime(2017, 1, i), 'D', 3, [i]), 'revdate',
                       'test', _insertion_date=utcdt(2016, 1, i))
296
297

    # end of prologue, now some real meat
298
    idate0 = pd.Timestamp('2015-1-1 00:00:00', tz='UTC')
299
300
301
302
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [0], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate0)
    assert idate0 == tsh.latest_insertion_date(engine, 'ts_through_time')
303

304
    idate1 = pd.Timestamp('2015-1-1 15:45:23', tz='UTC')
305
306
307
308
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate1)
    assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
309

310
    idate2 = pd.Timestamp('2015-1-2 15:43:23', tz='UTC')
311
312
313
314
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate2)
    assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
315

316
    idate3 = pd.Timestamp('2015-1-3', tz='UTC')
317
318
319
320
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate3)
    assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
321

322
    ts = tsh.get(engine, 'ts_through_time')
323

324
    assert_df("""
325
326
327
328
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
329
""", ts)
330

331
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
332
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
333

334
    assert_df("""
335
336
337
338
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
339
""", ts)
340

341
    ts = tsh.get(engine, 'ts_through_time',
342
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
343

344
    assert_df("""
345
346
347
348
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
349
""", ts)
350

351
    ts = tsh.get(engine, 'ts_through_time',
352
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
353
354
355

    assert ts is None

356
357
358
359
360
361
362
363
364
365
    # epilogue: back to the revdate issue
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
2017-01-03    3.0
2017-01-04    4.0
2017-01-05    4.0
2017-01-06    4.0
""", tsh.get(engine, 'revdate'))

366
    oldstate = tsh.get(engine, 'revdate', revision_date=datetime(2016, 1, 2))
367
368
369
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
370
371
372
2017-01-03    2.0
2017-01-04    2.0
""", oldstate)
373

374

375
def test_deletion(engine, tsh):
376
377
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
378
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
379

380
    _, ts = Snapshot(engine, tsh, 'ts_del').find()
381
    assert ts.iloc[-2] == 9.0
382

383
    ts_begin.iloc[0] = np.nan
384
    ts_begin.iloc[3] = np.nan
385

386
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
387

388
    assert_df("""
389
390
391
392
393
394
395
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
396
2010-01-10    9.0
397
""", tsh.get(engine, 'ts_del'))
398

399
    ts2 = tsh.get(engine, 'ts_del',
400
401
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
402
    assert (tsh.get(engine, 'ts_del') == ts2).all()
403

404
405
406
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

407
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
408

409
    assert_df("""
410
411
412
413
414
415
416
417
418
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
419
2010-01-10     9.0
420
""", tsh.get(engine, 'ts_del'))
421
422
423

    # now with string!

424
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
425
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
426
427
428
429

    ts_string[4] = None
    ts_string[5] = None

430
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
431
    assert_df("""
432
433
434
435
436
437
438
439
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
440
""", tsh.get(engine, 'ts_string_del'))
441
442
443
444

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

445
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
446
    assert_df("""
447
448
449
450
451
452
453
454
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
455
2010-01-10    machin
456
""", tsh.get(engine, 'ts_string_del'))
457

458
    ts_string[ts_string.index] = np.nan
459
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
460

461
    erased = tsh.get(engine, 'ts_string_del')
462
463
    assert len(erased) == 0

464
465
    # first insertion with only nan

466
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
467
    tsh.insert(engine, ts_begin, 'ts_null', 'test')
468

469
    assert len(tsh.get(engine, 'ts_null')) == 0
470

471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
    # exhibit issue with nans handling
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

490
491
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
492
493
    assert diff is None

494
    # there is no difference
495
    assert 0 == len(tsh.diff(ts_repushed, ts_repushed))
496
497
498
499
500

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
501
    diff = tsh.diff(ts_repushed, ts_add)
502
503
504
505
506
507

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
508
2010-01-13    12.0""", diff.sort_index())
509
510
511
512
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
513

Aurélien Campéas's avatar
Aurélien Campéas committed
514
    # full erasing
515
516
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
517
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
518

Aurélien Campéas's avatar
Aurélien Campéas committed
519
    ts_begin.iloc[:] = np.nan
520
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
521
522

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
523
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
524
525
526
527

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
528
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
529
530

    ts_begin.iloc[:] = np.nan
531
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
532
533

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
534
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
535

Aurélien Campéas's avatar
Aurélien Campéas committed
536

537
def test_get_history(engine, tsh):
538
539
    for numserie in (1, 2, 3):
        with engine.connect() as cn:
540
541
542
            tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie',
                       'aurelien.campeas@pythonian.fr',
                       _insertion_date=utcdt(2017, 2, numserie))
543
544
545
546
547
548
549
550
551
552
553

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
554
         'meta': {},
555
         'date': pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
556
557
558
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
559
         'meta': {},
560
         'date': pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
561
562
563
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
564
         'meta': {},
565
         'date': pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC'),
566
567
568
569
570
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')
571
    assert histts.name == 'smallserie'
572
573

    assert_df("""
574
575
576
577
578
579
580
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
581
582
""", histts)

583
    for idate in histts.index.get_level_values('insertion_date').unique():
584
        with engine.connect() as cn:
585
            idate = idate.replace(tzinfo=pytz.timezone('UTC'))
586
587
            tsh.insert(cn, histts[idate], 'smallserie2',
                       'aurelien.campeas@pythonian.f', _insertion_date=idate)
588
589
590
591
592
593
594
595
596

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
    assert (tsh.get_history(engine, 'smallserie2') == histts).all()

    # get history ranges
    tsa = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2))
    assert_df("""
597
598
599
600
601
602
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
603
604
605
606
607
""", tsa)

    tsb = tsh.get_history(engine, 'smallserie',
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
608
609
610
611
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
612
613
614
615
616
617
""", tsb)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2),
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
618
619
620
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
621
622
""", tsc)

623
624
625
626
627
628
629
630
631
    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 4),
                          to_insertion_date=datetime(2017, 2, 4))
    assert tsc is None

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
632
633
634
635
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
636
637
638
639
640
641
642
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2016, 12, 31))
    assert tsc is None

643
644
645
646
647
    # restrictions on value dates
    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 1),
                          to_value_date=datetime(2017, 1, 2))
    assert_df("""
648
649
650
651
652
653
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
654
655
656
657
658
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 2))
    assert_df("""
659
660
661
662
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-02    1.0
                           2017-01-03    2.0
663
664
665
666
667
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          to_value_date=datetime(2017, 1, 2))
    assert_df("""
668
669
670
671
672
673
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
674
675
""", tsc)

676

677
678
679
680
681
682
683
684
685
def test_history_delta(engine, tsh):
    for d in range(1, 3):
        idate = utcdt(2018, 1, d)
        serie = genserie(idate - timedelta(hours=1), 'H', 6, initval=[d])
        tsh.insert(engine, serie, 'hd', 'aurelien.campeas@pythonian.fr',
                   _insertion_date=idate)

    assert_df("""
insertion_date             value_date               
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
                           2018-01-02 03:00:00+00:00    2.0
                           2018-01-02 04:00:00+00:00    2.0
    """, tsh.get_history(engine, 'hd'))

    assert_df("""
insertion_date             value_date               
708
709
710
711
712
713
714
715
2018-01-01 00:00:00+00:00  2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
2018-01-02 00:00:00+00:00  2018-01-02 00:00:00+00:00    2.0
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd', deltaafter=timedelta(hours=2)))

716
717
    # this test puts the feature in light: the value date of 23 h disapeared
    # auc: I consider this a feature, but I agree we can be more liberal
718
719
720
721
722
723
724
725
726
727
728
729
730
731

    assert_df("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
2018-01-02 00:00:00+00:00  2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
                           2018-01-02 01:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd',
                      deltabefore=timedelta(hours=1),
                      deltaafter=timedelta(hours=1)))


732
733
734
735
736
737
738
739
740
741
742
def test_nr_gethistory(engine, tsh):
    s0 = pd.Series([-1, 0, 0, -1],
                   index=pd.DatetimeIndex(start=datetime(2016, 12, 29),
                                          end=datetime(2017, 1, 1),
                                          freq='D'))
    tsh.insert(engine, s0, 'foo', 'zogzog')

    s1 = pd.Series([1, 0, 0, 1],
                   index=pd.DatetimeIndex(start=datetime(2017, 1, 1),
                                          end=datetime(2017, 1, 4),
                                          freq='D'))
743
    idate = utcdt(2016, 1, 1)
744
745
    for i in range(5):
        with engine.connect() as cn:
746
747
748
            tsh.insert(cn, s1 * i, 'foo',
                       'aurelien.campeas@pythonian.f',
                       _insertion_date=idate + timedelta(days=i))
749
750
751
752
753
754
755
756

    df = tsh.get_history(engine, 'foo',
                         datetime(2016, 1, 3),
                         datetime(2016, 1, 4),
                         datetime(2017, 1, 1),
                         datetime(2017, 1, 4))

    assert_df("""
757
758
759
760
761
762
763
764
765
insertion_date             value_date
2016-01-03 00:00:00+00:00  2017-01-01    2.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    2.0
2016-01-04 00:00:00+00:00  2017-01-01    3.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    3.0
766
767
768
""", df)


769
def test_add_na(engine, tsh):
770
771
772
773
774
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

775
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
776
    assert len(diff) == 5
777
    result = tsh.get(engine, 'ts_add_na')
778
779
780
781
782
783
784
785
786
787
    assert len(result) == 0

    result = tsh.get(engine, 'ts_add_na', _keep_nans=True)
    assert_df("""
2010-01-01   NaN
2010-01-02   NaN
2010-01-03   NaN
2010-01-04   NaN
2010-01-05   NaN
""", result)
788
789
790

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
791
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
792
793
794
795
796

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

797
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
798
799
    assert diff is None

800
    result = tsh.get(engine, 'ts_add_na')
801
    assert len(result) == 5
802
803


804
def test_dtype_mismatch(engine, tsh):
805
    tsh.insert(engine,
806
807
808
809
810
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
811
        tsh.insert(engine,
812
813
814
815
816
817
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

818
    tsh.insert(engine,
819
820
821
822
823
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
824
        tsh.insert(engine,
825
826
827
828
829
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)
830
831


832
833
834
835
836
837
838
839
840
def test_precision(engine, tsh):
    floaty = 0.123456789123456789
    ts = genserie(datetime(2015, 1, 1), 'D', 5, initval=[floaty])

    tsh.insert(engine, ts, 'precision', 'test')
    ts_round = tsh.get(engine, 'precision')
    assert 0.12345678912346 == ts_round.iloc[0]

    diff = tsh.insert(engine, ts_round, 'precision', 'test')
Aurélien Campéas's avatar
Aurélien Campéas committed
841
    assert diff is None  # the roundtriped series does not produce a diff when reinserted
842

Aurélien Campéas's avatar
Aurélien Campéas committed
843
    diff = tsh.insert(engine, ts, 'precision', 'test')  # neither does the original series
844
845
846
    assert diff is None


847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
def test_strip(engine, tsh):
    for i in range(1, 5):
        pubdate = utcdt(2017, 1, i)
        ts = genserie(datetime(2017, 1, 10), 'H', 1 + i)
        tsh.insert(engine, ts, 'xserie', 'babar', _insertion_date=pubdate)
        # also insert something completely unrelated
        tsh.insert(engine, genserie(datetime(2018, 1, 1), 'D', 1 + i), 'yserie', 'celeste')

    csida = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    assert csida is not None
    csidb = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='before')
    csidc = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='after')
    assert csidb < csida < csidc

    log = tsh.log(engine, names=['xserie', 'yserie'])
    assert [(idx, l['author']) for idx, l in enumerate(log, start=1)
    ] == [
        (1, 'babar'),
        (2, 'celeste'),
        (3, 'babar'),
        (4, 'celeste'),
        (5, 'babar'),
        (6, 'celeste'),
        (7, 'babar'),
        (8, 'celeste')
    ]

    h = tsh.get_history(engine, 'xserie')
    assert_df("""
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
2017-01-03 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
2017-01-04 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
                           2017-01-10 04:00:00    4.0
""", h)

    csid = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    with engine.connect() as cn:
        tsh.strip(cn, 'xserie', csid)

    assert_df("""
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
""", tsh.get_history(engine, 'xserie'))

    assert_df("""
2017-01-10 00:00:00    0.0
2017-01-10 01:00:00    1.0
2017-01-10 02:00:00    2.0
""", tsh.get(engine, 'xserie'))

    log = tsh.log(engine, names=['xserie', 'yserie'])
    # 5 and 7 have disappeared
    assert [l['author'] for l in log
    ] == ['babar', 'celeste', 'babar', 'celeste', 'celeste', 'celeste']

    log = tsh.log(engine, stripped=True, names=['xserie', 'yserie'])
918
919
920
921
922
923
    for l in log:
        if l['meta']:
            meta = l['meta']
            stripinfo = meta.get('tshistory.info')
            if stripinfo:
                assert stripinfo.startswith('got stripped from')
924
925
926
927
928
929
930
931
932
933
934
935


def test_prepend(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 40)

    assert 40 == len(serie)
    ts_insert = serie[2:]
    tsh.insert(engine, ts_insert, 'prepend', 'test')
    assert 38 == len(tsh.get(engine, 'prepend'))

    tsh.insert(engine, serie, 'prepend', 'test')
    assert 40 == len(tsh.get(engine, 'prepend'))
936
937
938
939
940


def test_long_name(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 40)

941
942
943
    name = 'a' * 64
    tsh.insert(engine, serie, name, 'babar')
    assert tsh.get(engine, name) is not None