test_tsio.py 39.5 KB
Newer Older
1
from datetime import datetime, timedelta
2
3
from pathlib import Path
import pytz
4

5
from dateutil import parser
6
import pytest
7
8
import numpy as np
import pandas as pd
9

10
from tshistory.snapshot import Snapshot
11
from tshistory.util import rename_series
12
13
from tshistory.testutil import (
    assert_df,
14
15
    assert_hist,
    assert_hist_equals,
16
17
18
19
    assert_group_equals,
    genserie,
    tempattr
)
20

21
DATADIR = Path(__file__).parent / 'data'
22

Aurélien Campéas's avatar
Aurélien Campéas committed
23

24
25
26
27
def utcdt(*dt):
    return pd.Timestamp(datetime(*dt), tz='UTC')


28
def test_tstamp_roundtrip(engine, tsh):
29
30
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
31
32
33
34
35
36
37
38
39
40
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

41
42
    tsh.insert(engine, ts, 'tztest', 'Babar')
    back = tsh.get(engine, 'tztest')
43
44
45

    # though un localized we understand it's been normalized to utc
    assert_df("""
46
47
48
49
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    2.0
2017-10-29 02:00:00+00:00    3.0
50
51
52
""", back)

    assert (ts.index == back.index).all()
53
    assert str(back.index.dtype) == 'datetime64[ns, UTC]'
54
55


56
def test_differential(engine, tsh):
57
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
58
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
59

60
    id1 = tsh.last_id(engine, 'ts_test')
61
    assert tsh._previous_cset(engine, 'ts_test', id1) is None
62

63
64
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
65

66
    assert_df("""
67
68
69
70
71
72
73
74
75
76
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
77
""", tsh.get(engine, 'ts_test'))
78
79

    # we should detect the emission of a message
80
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
81

82
    assert_df("""
83
84
85
86
87
88
89
90
91
92
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
93
""", tsh.get(engine, 'ts_test'))
94
95
96
97

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
98
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
99
    id2 = tsh.last_id(engine, 'ts_test')
100
    assert tsh._previous_cset(engine, 'ts_test', id2) == id1
101

102
    assert_df("""
103
104
105
106
107
108
109
110
111
112
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
113
""", tsh.get(engine, 'ts_test'))
114

115
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
116
117
118
119
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

120
    tsh.insert(engine, ts_longer, 'ts_test', 'test')
121
122
123
    id3 = tsh.last_id(engine, 'ts_test')

    assert id1 < id2 < id3
124

125
    assert_df("""
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
143
""", tsh.get(engine, 'ts_test'))
144
145

    # start testing manual overrides
146
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
147
    ts_begin.loc['2010-01-04'] = -1
148
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
149
150

    # -1 represents bogus upstream data
151
    assert_df("""
152
153
154
155
156
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
157
""", tsh.get(engine, 'ts_mixte'))
158
159

    # refresh all the period + 1 extra data point
160
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
161
    ts_more.loc['2010-01-04'] = -1
162
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
163

164
    assert_df("""
165
166
167
168
169
170
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
171
""", tsh.get(engine, 'ts_mixte'))
172
173

    # just append an extra data point
174
175
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
176
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
177

178
    assert_df("""
179
180
181
182
183
184
185
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
186
""", tsh.get(engine, 'ts_mixte'))
187

188
    with engine.connect() as cn:
189
        cn.execute('set search_path to "{0}.timeserie", {0}, public'.format(tsh.namespace))
190
191
        allts = pd.read_sql("select seriename, table_name from registry "
                            "where seriename in ('ts_test', 'ts_mixte')",
192
                            cn)
193

194
        assert_df("""
195
196
197
seriename table_name
0   ts_test    ts_test
1  ts_mixte   ts_mixte
198
""".format(tsh.namespace), allts)
199

200
        assert_df("""
201
202
203
204
205
206
207
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
208
""", tsh.get(cn, 'ts_mixte',
209
             revision_date=datetime.now()))
210
211


212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
def test_serie_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
    tsh.insert(engine, serie, 'ts-metadata', 'babar')

    initialmeta = tsh.metadata(engine, 'ts-metadata')
    assert initialmeta == {
        'tzaware': False,
        'index_type': 'datetime64[ns]',
        'value_type': 'float64',
        'index_names': []
    }

    tsh.update_metadata(engine, 'ts-metadata',
                        {'topic': 'banana spot price'}
    )
    assert tsh.metadata(engine, 'ts-metadata')['topic'] == 'banana spot price'

    with pytest.raises(AssertionError):
        tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True})

    tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True}, internal=True)
    assert tsh.metadata(engine, 'ts-metadata') == {
        'tzaware': True,
        'index_type': 'datetime64[ns]',
        'value_type': 'float64',
        'index_names': [],
        'topic': 'banana spot price'
    }
    # unbreak the serie for the second test pass :o
    tsh.update_metadata(engine, 'ts-metadata', initialmeta, internal=True)


244
245
def test_changeset_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
246
    tsh.insert(engine, serie, 'ts-cs-metadata', 'babar',
247
248
               {'foo': 'A', 'bar': 42})

249
    log = tsh.log(engine, names=['ts-cs-metadata'])
250
251
252
253
    meta = tsh.changeset_metadata(engine, log[0]['rev'])
    assert meta == {'foo': 'A', 'bar': 42}


254
def test_bad_import(engine, tsh):
255
    # the data were parsed as date by pd.read_json()
Aurélien Campéas's avatar
Aurélien Campéas committed
256
    df_result = pd.read_csv(str(DATADIR / 'test_data.csv'))
257
258
259
    df_result['Gas Day'] = df_result['Gas Day'].apply(parser.parse, dayfirst=True, yearfirst=False)
    df_result.set_index('Gas Day', inplace=True)
    ts = df_result['SC']
260

261
262
    tsh.insert(engine, ts, 'SND_SC', 'test')
    result = tsh.get(engine, 'SND_SC')
263
    assert result.dtype == 'float64'
264
265
266

    # insertion of empty ts
    ts = pd.Series(name='truc', dtype='object')
267
268
    tsh.insert(engine, ts, 'empty_ts', 'test')
    assert tsh.get(engine, 'empty_ts') is None
269
270
271

    # nan in ts
    # all na
272
    ts = genserie(datetime(2010, 1, 10), 'D', 10, [np.nan], name='truc')
273
    tsh.insert(engine, ts, 'test_nan', 'test')
274
275
    assert len(tsh.get(engine, 'test_nan')) == 0
    assert len(tsh.get(engine, 'test_nan', _keep_nans=True)) == 10
276
277
278
279
280

    # mixe na
    ts = pd.Series([np.nan] * 5 + [3] * 5,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
281
282
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
283

284
285
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
286
    assert_df("""
287
288
289
290
291
2010-01-15    3.0
2010-01-16    3.0
2010-01-17    3.0
2010-01-18    3.0
2010-01-19    3.0
292
""", result)
293
294

    # get_ts with name not in database
295
    assert tsh.get(engine, 'inexisting_name', 'test') is None
296
297


298
def test_revision_date(engine, tsh):
299
300
    for i in range(1, 5):
        with engine.connect() as cn:
301
302
            tsh.insert(cn, genserie(datetime(2017, 1, i), 'D', 3, [i]), 'revdate',
                       'test', _insertion_date=utcdt(2016, 1, i))
303
304

    # end of prologue, now some real meat
305
    idate0 = pd.Timestamp('2015-1-1 00:00:00', tz='UTC')
306
307
308
309
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [0], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate0)
    assert idate0 == tsh.latest_insertion_date(engine, 'ts_through_time')
310

311
    idate1 = pd.Timestamp('2015-1-1 15:45:23', tz='UTC')
312
313
314
315
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate1)
    assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
316

317
    idate2 = pd.Timestamp('2015-1-2 15:43:23', tz='UTC')
318
319
320
321
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate2)
    assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
322

323
    idate3 = pd.Timestamp('2015-1-3', tz='UTC')
324
325
326
327
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate3)
    assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
328

329
    ts = tsh.get(engine, 'ts_through_time')
330

331
    assert_df("""
332
333
334
335
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
336
""", ts)
337

338
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
339
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
340

341
    assert_df("""
342
343
344
345
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
346
""", ts)
347

348
    ts = tsh.get(engine, 'ts_through_time',
349
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
350

351
    assert_df("""
352
353
354
355
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
356
""", ts)
357

358
    ts = tsh.get(engine, 'ts_through_time',
359
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
360
361
362

    assert ts is None

363
364
365
366
367
368
369
370
371
372
    # epilogue: back to the revdate issue
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
2017-01-03    3.0
2017-01-04    4.0
2017-01-05    4.0
2017-01-06    4.0
""", tsh.get(engine, 'revdate'))

373
    oldstate = tsh.get(engine, 'revdate', revision_date=datetime(2016, 1, 2))
374
375
376
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
377
378
379
2017-01-03    2.0
2017-01-04    2.0
""", oldstate)
380

381

382
def test_point_deletion(engine, tsh):
383
384
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
385
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
386

387
    _, ts = Snapshot(engine, tsh, 'ts_del').find()
388
    assert ts.iloc[-2] == 9.0
389

390
    ts_begin.iloc[0] = np.nan
391
    ts_begin.iloc[3] = np.nan
392

393
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
394

395
    assert_df("""
396
397
398
399
400
401
402
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
403
2010-01-10    9.0
404
""", tsh.get(engine, 'ts_del'))
405

406
    ts2 = tsh.get(engine, 'ts_del',
407
408
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
409
    assert (tsh.get(engine, 'ts_del') == ts2).all()
410

411
412
413
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

414
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
415

416
    assert_df("""
417
418
419
420
421
422
423
424
425
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
426
2010-01-10     9.0
427
""", tsh.get(engine, 'ts_del'))
428
429
430

    # now with string!

431
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
432
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
433
434
435
436

    ts_string[4] = None
    ts_string[5] = None

437
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
438
    assert_df("""
439
440
441
442
443
444
445
446
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
447
""", tsh.get(engine, 'ts_string_del'))
448
449
450
451

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

452
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
453
    assert_df("""
454
455
456
457
458
459
460
461
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
462
2010-01-10    machin
463
""", tsh.get(engine, 'ts_string_del'))
464

465
    ts_string[ts_string.index] = np.nan
466
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
467

468
    erased = tsh.get(engine, 'ts_string_del')
469
470
    assert len(erased) == 0

471
472
    # first insertion with only nan

473
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
474
    tsh.insert(engine, ts_begin, 'ts_null', 'test')
475

476
    assert len(tsh.get(engine, 'ts_null')) == 0
477

478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
    # exhibit issue with nans handling
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

497
498
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
499
500
    assert diff is None

501
    # there is no difference
502
    assert 0 == len(tsh.diff(ts_repushed, ts_repushed))
503
504
505
506
507

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
508
    diff = tsh.diff(ts_repushed, ts_add)
509
510
511
512
513
514

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
515
2010-01-13    12.0""", diff.sort_index())
516
517
518
519
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
520

Aurélien Campéas's avatar
Aurélien Campéas committed
521
    # full erasing
522
523
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
524
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
525

Aurélien Campéas's avatar
Aurélien Campéas committed
526
    ts_begin.iloc[:] = np.nan
527
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
528
529

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
530
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
531
532
533
534

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
535
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
536
537

    ts_begin.iloc[:] = np.nan
538
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
539
540

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
541
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
542

Aurélien Campéas's avatar
Aurélien Campéas committed
543

544
def test_get_history(engine, tsh):
545
546
    for numserie in (1, 2, 3):
        with engine.connect() as cn:
547
548
549
            tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie',
                       'aurelien.campeas@pythonian.fr',
                       _insertion_date=utcdt(2017, 2, numserie))
550
551
552
553
554
555
556
557
558
559
560

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
561
         'meta': {},
562
         'date': pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
563
564
565
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
566
         'meta': {},
567
         'date': pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
568
569
570
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
571
         'meta': {},
572
         'date': pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC'),
573
574
575
576
577
578
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')

579
    assert_hist("""
580
581
582
583
584
585
586
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
587
588
""", histts)

589
    diffs = tsh.get_history(engine, 'smallserie', diffmode=True)
590
    assert_hist("""
591
592
593
594
595
596
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-03    2.0
""", diffs)

597
    for idate in histts:
598
        with engine.connect() as cn:
599
            idate = idate.replace(tzinfo=pytz.timezone('UTC'))
600
601
            tsh.insert(cn, histts[idate], 'smallserie2',
                       'aurelien.campeas@pythonian.f', _insertion_date=idate)
602
603
604

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
605
    assert_hist_equals(tsh.get_history(engine, 'smallserie2'), histts)
606
607
608
609

    # get history ranges
    tsa = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2))
610
    assert_hist("""
611
612
613
614
615
616
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
617
618
619
620
""", tsa)

    tsb = tsh.get_history(engine, 'smallserie',
                          to_insertion_date=datetime(2017, 2, 2))
621
    assert_hist("""
622
623
624
625
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
626
627
628
629
630
""", tsb)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2),
                          to_insertion_date=datetime(2017, 2, 2))
631
    assert_hist("""
632
633
634
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
635
636
""", tsc)

637
638
639
640
641
642
643
644
    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 4),
                          to_insertion_date=datetime(2017, 2, 4))
    assert tsc is None

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2017, 2, 2))
645
    assert_hist("""
646
647
648
649
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
650
651
652
653
654
655
656
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2016, 12, 31))
    assert tsc is None

657
658
659
660
    # restrictions on value dates
    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 1),
                          to_value_date=datetime(2017, 1, 2))
661
    assert_hist("""
662
663
664
665
666
667
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
668
669
670
671
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 2))
672
    assert_hist("""
673
674
675
676
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-02    1.0
                           2017-01-03    2.0
677
678
679
680
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          to_value_date=datetime(2017, 1, 2))
681
    assert_hist("""
682
683
684
685
686
687
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
688
689
""", tsc)

690

691
692
693
694
695
696
697
def test_history_delta(engine, tsh):
    for d in range(1, 3):
        idate = utcdt(2018, 1, d)
        serie = genserie(idate - timedelta(hours=1), 'H', 6, initval=[d])
        tsh.insert(engine, serie, 'hd', 'aurelien.campeas@pythonian.fr',
                   _insertion_date=idate)

698
    assert_hist("""
699
insertion_date             value_date               
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
                           2018-01-02 03:00:00+00:00    2.0
                           2018-01-02 04:00:00+00:00    2.0
    """, tsh.get_history(engine, 'hd'))

720
    assert_hist("""
721
insertion_date             value_date               
722
723
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
724
725
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
726
727
728
729
730
731
732
733
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
734
735
736
737
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd', deltaafter=timedelta(hours=2)))

738
    assert_hist("""
739
insertion_date             value_date               
740
2018-01-01 00:00:00+00:00  2018-01-01 00:00:00+00:00    1.0
741
                           2018-01-01 01:00:00+00:00    1.0
742
2018-01-02 00:00:00+00:00  2018-01-02 00:00:00+00:00    2.0
743
744
                           2018-01-02 01:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd',
745
                      deltabefore=timedelta(hours=0),
746
747
748
                      deltaafter=timedelta(hours=1)))


749
750
751
752
753
754
755
756
757
758
759
def test_nr_gethistory(engine, tsh):
    s0 = pd.Series([-1, 0, 0, -1],
                   index=pd.DatetimeIndex(start=datetime(2016, 12, 29),
                                          end=datetime(2017, 1, 1),
                                          freq='D'))
    tsh.insert(engine, s0, 'foo', 'zogzog')

    s1 = pd.Series([1, 0, 0, 1],
                   index=pd.DatetimeIndex(start=datetime(2017, 1, 1),
                                          end=datetime(2017, 1, 4),
                                          freq='D'))
760
    idate = utcdt(2016, 1, 1)
761
762
    for i in range(5):
        with engine.connect() as cn:
763
764
765
            tsh.insert(cn, s1 * i, 'foo',
                       'aurelien.campeas@pythonian.f',
                       _insertion_date=idate + timedelta(days=i))
766
767
768
769
770
771
772

    df = tsh.get_history(engine, 'foo',
                         datetime(2016, 1, 3),
                         datetime(2016, 1, 4),
                         datetime(2017, 1, 1),
                         datetime(2017, 1, 4))

773
    assert_hist("""
774
775
776
777
778
779
780
781
782
insertion_date             value_date
2016-01-03 00:00:00+00:00  2017-01-01    2.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    2.0
2016-01-04 00:00:00+00:00  2017-01-01    3.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    3.0
783
784
785
""", df)


786
def test_add_na(engine, tsh):
787
788
789
790
791
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

792
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
793
    assert len(diff) == 5
794
    result = tsh.get(engine, 'ts_add_na')
795
796
797
798
799
800
801
802
803
804
    assert len(result) == 0

    result = tsh.get(engine, 'ts_add_na', _keep_nans=True)
    assert_df("""
2010-01-01   NaN
2010-01-02   NaN
2010-01-03   NaN
2010-01-04   NaN
2010-01-05   NaN
""", result)
805
806
807

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
808
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
809
810
811
812
813

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

814
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
815
816
    assert diff is None

817
    result = tsh.get(engine, 'ts_add_na')
818
    assert len(result) == 5
819
820


821
def test_dtype_mismatch(engine, tsh):
822
    tsh.insert(engine,
823
824
825
826
827
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
828
        tsh.insert(engine,
829
830
831
832
833
834
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

835
    tsh.insert(engine,
836
837
838
839
840
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
841
        tsh.insert(engine,
842
843
844
845
846
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)
847
848


849
850
851
852
853
854
855
856
857
def test_precision(engine, tsh):
    floaty = 0.123456789123456789
    ts = genserie(datetime(2015, 1, 1), 'D', 5, initval=[floaty])

    tsh.insert(engine, ts, 'precision', 'test')
    ts_round = tsh.get(engine, 'precision')
    assert 0.12345678912346 == ts_round.iloc[0]

    diff = tsh.insert(engine, ts_round, 'precision', 'test')
Aurélien Campéas's avatar
Aurélien Campéas committed
858
    assert diff is None  # the roundtriped series does not produce a diff when reinserted
859

Aurélien Campéas's avatar
Aurélien Campéas committed
860
    diff = tsh.insert(engine, ts, 'precision', 'test')  # neither does the original series
861
862
863
    assert diff is None


864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
def test_serie_deletion(engine, tsh):
    ts = genserie(datetime(2018, 1, 10), 'H', 10)
    tsh.insert(engine, ts, 'keepme', 'Babar')
    tsh.insert(engine, ts, 'deleteme', 'Celeste')
    ts = genserie(datetime(2018, 1, 12), 'H', 10)
    tsh.insert(engine, ts, 'keepme', 'Babar')
    tsh.insert(engine, ts, 'deleteme', 'Celeste')

    seriecount = engine.execute(
        'select count(*) from {}.registry'.format(tsh.namespace)
    ).scalar()
    csetcount = engine.execute(
        'select count(*) from {}.changeset'.format(tsh.namespace)
    ).scalar()
    csetcount2 = engine.execute(
        'select count(*) from {}.changeset_series'.format(tsh.namespace)
    ).scalar()
    assert csetcount == csetcount2

    with engine.connect() as cn:
        tsh.delete(cn, 'deleteme')

    assert not tsh.exists(engine, 'deleteme')
    log = [entry['author']
           for entry in tsh.log(engine, names=('keepme', 'deleteme'))]
    assert log == ['Babar', 'Babar']

    csetcount3 = engine.execute(
        'select count(*) from {}.changeset'.format(tsh.namespace)
    ).scalar()
    csetcount4 = engine.execute(
        'select count(*) from {}.changeset_series'.format(tsh.namespace)
    ).scalar()
    seriecount2 = engine.execute(
        'select count (*) from {}.registry'.format(tsh.namespace)
    ).scalar()

    assert csetcount - csetcount3  == 2
    assert csetcount2 - csetcount4 == 2
    assert seriecount - seriecount2 == 1

    with pytest.raises(AssertionError) as werr:
        tsh.delete(engine, 'keepme')
    assert werr.value.args[0] == 'use a transaction object'

909
    tsh.insert(engine, ts, 'deleteme', 'Celeste')
910

911

912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
def test_strip(engine, tsh):
    for i in range(1, 5):
        pubdate = utcdt(2017, 1, i)
        ts = genserie(datetime(2017, 1, 10), 'H', 1 + i)
        tsh.insert(engine, ts, 'xserie', 'babar', _insertion_date=pubdate)
        # also insert something completely unrelated
        tsh.insert(engine, genserie(datetime(2018, 1, 1), 'D', 1 + i), 'yserie', 'celeste')

    csida = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    assert csida is not None
    csidb = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='before')
    csidc = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='after')
    assert csidb < csida < csidc

    log = tsh.log(engine, names=['xserie', 'yserie'])
    assert [(idx, l['author']) for idx, l in enumerate(log, start=1)
    ] == [
        (1, 'babar'),
        (2, 'celeste'),
        (3, 'babar'),
        (4, 'celeste'),
        (5, 'babar'),
        (6, 'celeste'),
        (7, 'babar'),
        (8, 'celeste')
    ]

    h = tsh.get_history(engine, 'xserie')
940
    assert_hist("""
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
2017-01-03 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
2017-01-04 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
                           2017-01-10 04:00:00    4.0
""", h)

    csid = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    with engine.connect() as cn:
        tsh.strip(cn, 'xserie', csid)

962
    assert_hist("""
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
""", tsh.get_history(engine, 'xserie'))

    assert_df("""
2017-01-10 00:00:00    0.0
2017-01-10 01:00:00    1.0
2017-01-10 02:00:00    2.0
""", tsh.get(engine, 'xserie'))

    log = tsh.log(engine, names=['xserie', 'yserie'])
    # 5 and 7 have disappeared
    assert [l['author'] for l in log
    ] == ['babar', 'celeste', 'babar', 'celeste', 'celeste', 'celeste']

    log = tsh.log(engine, stripped=True, names=['xserie', 'yserie'])
983
984
985
986
987
988
    for l in log:
        if l['meta']:
            meta = l['meta']
            stripinfo = meta.get('tshistory.info')
            if stripinfo:
                assert stripinfo.startswith('got stripped from')
989
990


991
992
993
def test_long_name(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 40)

994
995
996
    name = 'a' * 64
    tsh.insert(engine, serie, name, 'babar')
    assert tsh.get(engine, name) is not None
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007


def test_get_delta(engine, tsh):
    for idate in pd.DatetimeIndex(start=utcdt(2015, 1, 1),
                                  end=utcdt(2015, 1, 1, 3),
                                  freq='H'):
        ts = genserie(start=idate, freq='H', repeat=7)
        tsh.insert(engine, ts, 'republication', 'test',
                   _insertion_date=idate)

    hist = tsh.get_history(engine, 'republication')
1008
    assert_hist("""
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
insertion_date             value_date               
2015-01-01 00:00:00+00:00  2015-01-01 00:00:00+00:00    0.0
                           2015-01-01 01:00:00+00:00    1.0
                           2015-01-01 02:00:00+00:00    2.0
                           2015-01-01 03:00:00+00:00    3.0
                           2015-01-01 04:00:00+00:00    4.0
                           2015-01-01 05:00:00+00:00    5.0
                           2015-01-01 06:00:00+00:00    6.0
2015-01-01 01:00:00+00:00  2015-01-01 00:00:00+00:00    0.0
                           2015-01-01 01:00:00+00:00    0.0
                           2015-01-01 02:00:00+00:00    1.0
                           2015-01-01 03:00:00+00:00    2.0
                           2015-01-01 04:00:00+00:00    3.0
                           2015-01-01 05:00:00+00:00    4.0
                           2015-01-01 06:00:00+00:00    5.0
                           2015-01-01 07:00:00+00:00    6.0
2015-01-01 02:00:00+00:00  2015-01-01 00:00:00+00:00    0.0
                           2015-01-01 01:00:00+00:00    0.0
                           2015-01-01 02:00:00+00:00    0.0
                           2015-01-01 03:00:00+00:00    1.0
                           2015-01-01 04:00:00+00:00    2.0
                           2015-01-01 05:00:00+00:00    3.0
                           2015-01-01 06:00:00+00:00    4.0
                           2015-01-01 07:00:00+00:00    5.0
                           2015-01-01 08:00:00+00:00    6.0
2015-01-01 03:00:00+00:00  2015-01-01 00:00:00+00:00    0.0
                           2015-01-01 01:00:00+00:00    0.0
                           2015-01-01 02:00:00+00:00    0.0
                           2015-01-01 03:00:00+00:00    0.0
                           2015-01-01 04:00:00+00:00    1.0
                           2015-01-01 05:00:00+00:00    2.0
                           2015-01-01 06:00:00+00:00    3.0
                           2015-01-01 07:00:00+00:00    4.0
                           2015-01-01 08:00:00+00:00    5.0
                           2015-01-01 09:00:00+00:00    6.0
""", hist)

    deltas = tsh.get_delta(engine,  'republication', delta=timedelta(hours=3))

    assert_df("""
2015-01-01 03:00:00+00:00    3.0
2015-01-01 04:00:00+00:00    3.0
2015-01-01 05:00:00+00:00    3.0
2015-01-01 06:00:00+00:00    3.0
2015-01-01 07:00:00+00:00    4.0
2015-01-01 08:00:00+00:00    5.0
2015-01-01 09:00:00+00:00    6.0
""", deltas)

    deltas = tsh.get_delta(engine,  'republication', delta=timedelta(hours=5))
    assert_df("""
2015-01-01 05:00:00+00:00    5.0
2015-01-01 06:00:00+00:00    5.0
2015-01-01 07:00:00+00:00    5.0
2015-01-01 08:00:00+00:00    5.0
2015-01-01 09:00:00+00:00    6.0
""", deltas)

    hist = tsh.get_history(engine, 'republication',
                           deltabefore=-timedelta(hours=3),
                           deltaafter=timedelta(hours=3))
1070
    assert_hist("""
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
insertion_date             value_date               
2015-01-01 00:00:00+00:00  2015-01-01 03:00:00+00:00    3.0
2015-01-01 01:00:00+00:00  2015-01-01 04:00:00+00:00    3.0
2015-01-01 02:00:00+00:00  2015-01-01 05:00:00+00:00    3.0
2015-01-01 03:00:00+00:00  2015-01-01 06:00:00+00:00    3.0
""", hist)

    hist = tsh.get_history(engine, 'republication',
                           deltabefore=-timedelta(hours=5),
                           deltaafter=timedelta(hours=5))

1082
    assert_hist("""
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
insertion_date             value_date               
2015-01-01 00:00:00+00:00  2015-01-01 05:00:00+00:00    5.0
2015-01-01 01:00:00+00:00  2015-01-01 06:00:00+00:00    5.0
2015-01-01 02:00:00+00:00  2015-01-01 07:00:00+00:00    5.0
2015-01-01 03:00:00+00:00  2015-01-01 08:00:00+00:00    5.0
""", hist)


    # maybe a more interesting example, each days we insert 7 data points
    for idx, idate in enumerate(pd.DatetimeIndex(start=utcdt(2015, 1, 1),
                                                 end=utcdt(2015, 1, 4),
                                                 freq='D')):
        ts = genserie(start=idate, freq='H', repeat=7)
        tsh.insert(engine, ts, 'repu2', 'test', _insertion_date=idate)

    deltas = tsh.get_delta(engine, 'repu2', delta=timedelta(hours=3))
    assert_df("""
2015-01-01 03:00:00+00:00    3.0
2015-01-01 04:00:00+00:00    4.0
2015-01-01 05:00:00+00:00    5.0
2015-01-01 06:00:00+00:00    6.0
2015-01-02 03:00:00+00:00    3.0
2015-01-02 04:00:00+00:00    4.0
2015-01-02 05:00:00+00:00    5.0
2015-01-02 06:00:00+00:00    6.0
2015-01-03 03:00:00+00:00    3.0
2015-01-03 04:00:00+00:00    4.0
2015-01-03 05:00:00+00:00    5.0
2015-01-03 06:00:00+00:00    6.0
2015-01-04 03:00:00+00:00    3.0
2015-01-04 04:00:00+00:00    4.0
2015-01-04 05:00:00+00:00    5.0
2015-01-04 06:00:00+00:00    6.0
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
""", deltas)

    deltas = tsh.get_delta(engine, 'repu2', delta=timedelta(hours=3),
                           from_value_date=datetime(2015,1,2),
                           to_value_date=datetime(2015, 1,3))
    assert_df("""
2015-01-02 03:00:00+00:00    3.0
2015-01-02 04:00:00+00:00    4.0
2015-01-02 05:00:00+00:00    5.0
2015-01-02 06:00:00+00:00    6.0
1126
1127
1128
1129
1130
""", deltas)

    # which is basically the same as below
    hist = tsh.get_history(engine, 'repu2',
                           deltabefore=-timedelta(hours=3))
1131
    assert_hist("""
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
insertion_date             value_date               
2015-01-01 00:00:00+00:00  2015-01-01 03:00:00+00:00    3.0
                           2015-01-01 04:00:00+00:00    4.0
                           2015-01-01 05:00:00+00:00    5.0