test_tsio.py 41.7 KB
Newer Older
1
from datetime import datetime, timedelta
2
3
from pathlib import Path
import pytz
4

5
from dateutil import parser
6
import pytest
7
8
import numpy as np
import pandas as pd
9

10
from tshistory.snapshot import Snapshot
11
from tshistory.util import rename_series
12
13
from tshistory.testutil import (
    assert_df,
14
15
    assert_hist,
    assert_hist_equals,
16
    assert_group_equals,
17
    assert_structures,
18
19
20
    genserie,
    tempattr
)
21

22
DATADIR = Path(__file__).parent / 'data'
23

Aurélien Campéas's avatar
Aurélien Campéas committed
24

25
26
27
28
def utcdt(*dt):
    return pd.Timestamp(datetime(*dt), tz='UTC')


29
def test_tstamp_roundtrip(engine, tsh):
30
    assert_structures(engine, tsh)
31
32
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
33
34
35
36
37
38
39
40
41
42
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

43
44
    tsh.insert(engine, ts, 'tztest', 'Babar',
               _insertion_date=utcdt(2018, 1, 1))
45
    back = tsh.get(engine, 'tztest')
46
47
48

    # though un localized we understand it's been normalized to utc
    assert_df("""
49
50
51
52
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    2.0
2017-10-29 02:00:00+00:00    3.0
53
54
55
""", back)

    assert (ts.index == back.index).all()
56
    assert str(back.index.dtype) == 'datetime64[ns, UTC]'
57

58
59
60
61
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 02:00:00+0000', tz='UTC')

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
    ts = genserie(datetime(2017, 10, 29, 1),
                  'H', 4, tz='UTC')
    ts.index = ts.index.tz_convert('Europe/Paris')
    tsh.insert(engine, ts, 'tztest', 'Celeste',
                   _insertion_date=utcdt(2018, 1, 3))

    ts = tsh.get(engine, 'tztest')
    assert_df("""
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    0.0
2017-10-29 02:00:00+00:00    1.0
2017-10-29 03:00:00+00:00    2.0
2017-10-29 04:00:00+00:00    3.0
""", ts)

    hist = tsh.get_history(engine, 'tztest')
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
                           2017-10-29 04:00:00+00:00    3.0
""", hist)

    hist = tsh.get_history(engine, 'tztest',
                           from_value_date=utcdt(2017, 10, 29, 1),
                           to_value_date=utcdt(2017, 10, 29, 3))
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
""", hist)

105
106
107
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 04:00:00+0000', tz='UTC')
108
    assert_structures(engine, tsh)
109

110

111
def test_differential(engine, tsh):
112
    assert_structures(engine, tsh)
113
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
114
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
115

116
    id1 = tsh.last_id(engine, 'ts_test')
117
    assert tsh._previous_cset(engine, 'ts_test', id1) is None
118

119
120
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
121

122
123
124
125
126
127
128
129
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 10, 0, 0),
        closed='both'
    )

    with pytest.raises(ValueError):
        assert tsh.interval(engine, 'nosuchts')

130
    assert_df("""
131
132
133
134
135
136
137
138
139
140
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
141
""", tsh.get(engine, 'ts_test'))
142
143

    # we should detect the emission of a message
144
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
145

146
    assert_df("""
147
148
149
150
151
152
153
154
155
156
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
157
""", tsh.get(engine, 'ts_test'))
158
159
160
161

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
162
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
163
    id2 = tsh.last_id(engine, 'ts_test')
164
    assert tsh._previous_cset(engine, 'ts_test', id2) == id1
165

166
    assert_df("""
167
168
169
170
171
172
173
174
175
176
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
177
""", tsh.get(engine, 'ts_test'))
178

179
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
180
181
182
183
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

184
185
    with engine.connect() as cn:
        tsh.insert(cn, ts_longer, 'ts_test', 'test')
186
187
188
    id3 = tsh.last_id(engine, 'ts_test')

    assert id1 < id2 < id3
189

190
    assert_df("""
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
208
""", tsh.get(engine, 'ts_test'))
209

210
211
212
213
214
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 17, 0, 0),
        closed='both'
    )

215
    # start testing manual overrides
216
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
217
    ts_begin.loc['2010-01-04'] = -1
218
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
219
220

    # -1 represents bogus upstream data
221
    assert_df("""
222
223
224
225
226
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
227
""", tsh.get(engine, 'ts_mixte'))
228
229

    # refresh all the period + 1 extra data point
230
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
231
    ts_more.loc['2010-01-04'] = -1
232
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
233

234
    assert_df("""
235
236
237
238
239
240
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
241
""", tsh.get(engine, 'ts_mixte'))
242
243

    # just append an extra data point
244
245
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
246
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
247

248
    assert_df("""
249
250
251
252
253
254
255
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
256
""", tsh.get(engine, 'ts_mixte'))
257

258
    with engine.connect() as cn:
259
        cn.execute('set search_path to "{0}.timeserie", {0}, public'.format(tsh.namespace))
260
261
        allts = pd.read_sql("select seriename, table_name from registry "
                            "where seriename in ('ts_test', 'ts_mixte')",
262
                            cn)
263

264
        assert_df("""
265
266
267
seriename table_name
0   ts_test    ts_test
1  ts_mixte   ts_mixte
268
""".format(tsh.namespace), allts)
269

270
        assert_df("""
271
272
273
274
275
276
277
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
278
""", tsh.get(cn, 'ts_mixte',
279
             revision_date=datetime.now()))
280

281
282
    assert_structures(engine, tsh)

283

284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
def test_serie_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
    tsh.insert(engine, serie, 'ts-metadata', 'babar')

    initialmeta = tsh.metadata(engine, 'ts-metadata')
    assert initialmeta == {
        'tzaware': False,
        'index_type': 'datetime64[ns]',
        'value_type': 'float64',
        'index_names': []
    }

    tsh.update_metadata(engine, 'ts-metadata',
                        {'topic': 'banana spot price'}
    )
    assert tsh.metadata(engine, 'ts-metadata')['topic'] == 'banana spot price'

    with pytest.raises(AssertionError):
        tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True})

    tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True}, internal=True)
    assert tsh.metadata(engine, 'ts-metadata') == {
        'tzaware': True,
        'index_type': 'datetime64[ns]',
        'value_type': 'float64',
        'index_names': [],
        'topic': 'banana spot price'
    }
    # unbreak the serie for the second test pass :o
    tsh.update_metadata(engine, 'ts-metadata', initialmeta, internal=True)


316
317
def test_changeset_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
318
    tsh.insert(engine, serie, 'ts-cs-metadata', 'babar',
319
320
               {'foo': 'A', 'bar': 42})

321
    log = tsh.log(engine, names=['ts-cs-metadata'])
322
323
324
325
    meta = tsh.changeset_metadata(engine, log[0]['rev'])
    assert meta == {'foo': 'A', 'bar': 42}


326
def test_bad_import(engine, tsh):
327
    # the data were parsed as date by pd.read_json()
Aurélien Campéas's avatar
Aurélien Campéas committed
328
    df_result = pd.read_csv(str(DATADIR / 'test_data.csv'))
329
330
331
    df_result['Gas Day'] = df_result['Gas Day'].apply(parser.parse, dayfirst=True, yearfirst=False)
    df_result.set_index('Gas Day', inplace=True)
    ts = df_result['SC']
332

333
334
    tsh.insert(engine, ts, 'SND_SC', 'test')
    result = tsh.get(engine, 'SND_SC')
335
    assert result.dtype == 'float64'
336
337
338

    # insertion of empty ts
    ts = pd.Series(name='truc', dtype='object')
339
340
    tsh.insert(engine, ts, 'empty_ts', 'test')
    assert tsh.get(engine, 'empty_ts') is None
341
342
343

    # nan in ts
    # all na
344
    ts = genserie(datetime(2010, 1, 10), 'D', 10, [np.nan], name='truc')
345
    tsh.insert(engine, ts, 'test_nan', 'test')
346
347
    assert len(tsh.get(engine, 'test_nan')) == 0
    assert len(tsh.get(engine, 'test_nan', _keep_nans=True)) == 10
348
349
350
351
352

    # mixe na
    ts = pd.Series([np.nan] * 5 + [3] * 5,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
353
354
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
355

356
357
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
358
    assert_df("""
359
360
361
362
363
2010-01-15    3.0
2010-01-16    3.0
2010-01-17    3.0
2010-01-18    3.0
2010-01-19    3.0
364
""", result)
365
366

    # get_ts with name not in database
367
    assert tsh.get(engine, 'inexisting_name', 'test') is None
368
369


370
def test_revision_date(engine, tsh):
371
372
    for i in range(1, 5):
        with engine.connect() as cn:
373
374
            tsh.insert(cn, genserie(datetime(2017, 1, i), 'D', 3, [i]), 'revdate',
                       'test', _insertion_date=utcdt(2016, 1, i))
375
376

    # end of prologue, now some real meat
377
    idate0 = pd.Timestamp('2015-1-1 00:00:00', tz='UTC')
378
379
380
381
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [0], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate0)
    assert idate0 == tsh.latest_insertion_date(engine, 'ts_through_time')
382

383
    idate1 = pd.Timestamp('2015-1-1 15:45:23', tz='UTC')
384
385
386
387
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate1)
    assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
388

389
    idate2 = pd.Timestamp('2015-1-2 15:43:23', tz='UTC')
390
391
392
393
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate2)
    assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
394

395
    idate3 = pd.Timestamp('2015-1-3', tz='UTC')
396
397
398
399
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate3)
    assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
400

401
    ts = tsh.get(engine, 'ts_through_time')
402

403
    assert_df("""
404
405
406
407
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
408
""", ts)
409

410
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
411
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
412

413
    assert_df("""
414
415
416
417
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
418
""", ts)
419

420
    ts = tsh.get(engine, 'ts_through_time',
421
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
422

423
    assert_df("""
424
425
426
427
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
428
""", ts)
429

430
    ts = tsh.get(engine, 'ts_through_time',
431
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
432
433
434

    assert ts is None

435
436
437
438
439
440
441
442
443
444
    # epilogue: back to the revdate issue
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
2017-01-03    3.0
2017-01-04    4.0
2017-01-05    4.0
2017-01-06    4.0
""", tsh.get(engine, 'revdate'))

445
    oldstate = tsh.get(engine, 'revdate', revision_date=datetime(2016, 1, 2))
446
447
448
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
449
450
451
2017-01-03    2.0
2017-01-04    2.0
""", oldstate)
452

453

454
def test_point_deletion(engine, tsh):
455
456
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
457
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
458

459
    _, ts = Snapshot(engine, tsh, 'ts_del').find()
460
    assert ts.iloc[-2] == 9.0
461

462
    ts_begin.iloc[0] = np.nan
463
    ts_begin.iloc[3] = np.nan
464

465
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
466

467
    assert_df("""
468
469
470
471
472
473
474
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
475
2010-01-10    9.0
476
""", tsh.get(engine, 'ts_del'))
477

478
    ts2 = tsh.get(engine, 'ts_del',
479
480
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
481
    assert (tsh.get(engine, 'ts_del') == ts2).all()
482

483
484
485
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

486
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
487

488
    assert_df("""
489
490
491
492
493
494
495
496
497
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
498
2010-01-10     9.0
499
""", tsh.get(engine, 'ts_del'))
500
501
502

    # now with string!

503
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
504
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
505
506
507
508

    ts_string[4] = None
    ts_string[5] = None

509
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
510
    assert_df("""
511
512
513
514
515
516
517
518
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
519
""", tsh.get(engine, 'ts_string_del'))
520
521
522
523

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

524
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
525
    assert_df("""
526
527
528
529
530
531
532
533
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
534
2010-01-10    machin
535
""", tsh.get(engine, 'ts_string_del'))
536

537
    ts_string[ts_string.index] = np.nan
538
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
539

540
    erased = tsh.get(engine, 'ts_string_del')
541
542
    assert len(erased) == 0

543
544
    # first insertion with only nan

545
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
546
    tsh.insert(engine, ts_begin, 'ts_null', 'test')
547

548
    assert len(tsh.get(engine, 'ts_null')) == 0
549

550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
    # exhibit issue with nans handling
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

569
570
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
571
572
    assert diff is None

573
    # there is no difference
574
    assert 0 == len(tsh.diff(ts_repushed, ts_repushed))
575
576
577
578
579

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
580
    diff = tsh.diff(ts_repushed, ts_add)
581
582
583
584
585
586

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
587
2010-01-13    12.0""", diff.sort_index())
588
589
590
591
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
592

Aurélien Campéas's avatar
Aurélien Campéas committed
593
    # full erasing
594
595
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
596
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
597

Aurélien Campéas's avatar
Aurélien Campéas committed
598
    ts_begin.iloc[:] = np.nan
599
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
600
601

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
602
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
603
604
605
606

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
607
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
608
609

    ts_begin.iloc[:] = np.nan
610
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
611
612

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
613
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
614

Aurélien Campéas's avatar
Aurélien Campéas committed
615

616
def test_get_history(engine, tsh):
617
618
    for numserie in (1, 2, 3):
        with engine.connect() as cn:
619
620
621
            tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie',
                       'aurelien.campeas@pythonian.fr',
                       _insertion_date=utcdt(2017, 2, numserie))
622
623
624
625
626
627
628
629
630
631
632

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
633
         'meta': {},
634
         'date': pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
635
636
637
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
638
         'meta': {},
639
         'date': pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
640
641
642
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
643
         'meta': {},
644
         'date': pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC'),
645
646
647
648
649
650
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')

651
    assert_hist("""
652
653
654
655
656
657
658
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
659
660
""", histts)

661
    diffs = tsh.get_history(engine, 'smallserie', diffmode=True)
662
    assert_hist("""
663
664
665
666
667
668
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-03    2.0
""", diffs)

669
    for idate in histts:
670
        with engine.connect() as cn:
671
            idate = idate.replace(tzinfo=pytz.timezone('UTC'))
672
673
            tsh.insert(cn, histts[idate], 'smallserie2',
                       'aurelien.campeas@pythonian.f', _insertion_date=idate)
674
675
676

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
677
    assert_hist_equals(tsh.get_history(engine, 'smallserie2'), histts)
678
679
680
681

    # get history ranges
    tsa = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2))
682
    assert_hist("""
683
684
685
686
687
688
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
689
690
691
692
""", tsa)

    tsb = tsh.get_history(engine, 'smallserie',
                          to_insertion_date=datetime(2017, 2, 2))
693
    assert_hist("""
694
695
696
697
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
698
699
700
701
702
""", tsb)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2),
                          to_insertion_date=datetime(2017, 2, 2))
703
    assert_hist("""
704
705
706
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
707
708
""", tsc)

709
710
711
712
713
714
715
716
    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 4),
                          to_insertion_date=datetime(2017, 2, 4))
    assert tsc is None

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2017, 2, 2))
717
    assert_hist("""
718
719
720
721
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
722
723
724
725
726
727
728
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2016, 12, 31))
    assert tsc is None

729
730
731
732
    # restrictions on value dates
    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 1),
                          to_value_date=datetime(2017, 1, 2))
733
    assert_hist("""
734
735
736
737
738
739
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
740
741
742
743
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 2))
744
    assert_hist("""
745
746
747
748
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-02    1.0
                           2017-01-03    2.0
749
750
751
752
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          to_value_date=datetime(2017, 1, 2))
753
    assert_hist("""
754
755
756
757
758
759
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
760
761
""", tsc)

762

763
764
765
766
767
768
769
def test_history_delta(engine, tsh):
    for d in range(1, 3):
        idate = utcdt(2018, 1, d)
        serie = genserie(idate - timedelta(hours=1), 'H', 6, initval=[d])
        tsh.insert(engine, serie, 'hd', 'aurelien.campeas@pythonian.fr',
                   _insertion_date=idate)

770
    assert_hist("""
771
insertion_date             value_date               
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
                           2018-01-02 03:00:00+00:00    2.0
                           2018-01-02 04:00:00+00:00    2.0
    """, tsh.get_history(engine, 'hd'))

792
    assert_hist("""
793
insertion_date             value_date               
794
795
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
796
797
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
798
799
800
801
802
803
804
805
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
806
807
808
809
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd', deltaafter=timedelta(hours=2)))

810
    assert_hist("""
811
insertion_date             value_date               
812
2018-01-01 00:00:00+00:00  2018-01-01 00:00:00+00:00    1.0
813
                           2018-01-01 01:00:00+00:00    1.0
814
2018-01-02 00:00:00+00:00  2018-01-02 00:00:00+00:00    2.0
815
816
                           2018-01-02 01:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd',
817
                      deltabefore=timedelta(hours=0),
818
819
820
                      deltaafter=timedelta(hours=1)))


821
822
823
824
825
826
827
828
829
830
831
def test_nr_gethistory(engine, tsh):
    s0 = pd.Series([-1, 0, 0, -1],
                   index=pd.DatetimeIndex(start=datetime(2016, 12, 29),
                                          end=datetime(2017, 1, 1),
                                          freq='D'))
    tsh.insert(engine, s0, 'foo', 'zogzog')

    s1 = pd.Series([1, 0, 0, 1],
                   index=pd.DatetimeIndex(start=datetime(2017, 1, 1),
                                          end=datetime(2017, 1, 4),
                                          freq='D'))
832
    idate = utcdt(2016, 1, 1)
833
834
    for i in range(5):
        with engine.connect() as cn:
835
836
837
            tsh.insert(cn, s1 * i, 'foo',
                       'aurelien.campeas@pythonian.f',
                       _insertion_date=idate + timedelta(days=i))
838
839
840
841
842
843
844

    df = tsh.get_history(engine, 'foo',
                         datetime(2016, 1, 3),
                         datetime(2016, 1, 4),
                         datetime(2017, 1, 1),
                         datetime(2017, 1, 4))

845
    assert_hist("""
846
847
848
849
850
851
852
853
854
insertion_date             value_date
2016-01-03 00:00:00+00:00  2017-01-01    2.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    2.0
2016-01-04 00:00:00+00:00  2017-01-01    3.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    3.0
855
856
857
""", df)


858
def test_add_na(engine, tsh):
859
860
861
862
863
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

864
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
865
    assert len(diff) == 5
866
    result = tsh.get(engine, 'ts_add_na')
867
868
869
870
871
872
873
874
875
876
    assert len(result) == 0

    result = tsh.get(engine, 'ts_add_na', _keep_nans=True)
    assert_df("""
2010-01-01   NaN
2010-01-02   NaN
2010-01-03   NaN
2010-01-04   NaN
2010-01-05   NaN
""", result)
877
878
879

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
880
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
881
882
883
884
885

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

886
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
887
888
    assert diff is None

889
    result = tsh.get(engine, 'ts_add_na')
890
    assert len(result) == 5
891
892


893
def test_dtype_mismatch(engine, tsh):
894
    tsh.insert(engine,
895
896
897
898
899
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
900
        tsh.insert(engine,
901
902
903
904
905
906
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

907
    tsh.insert(engine,
908
909
910
911
912
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
913
        tsh.insert(engine,
914
915
916
917
918
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)
919
920


921
922
923
924
925
926
927
928
929
def test_precision(engine, tsh):
    floaty = 0.123456789123456789
    ts = genserie(datetime(2015, 1, 1), 'D', 5, initval=[floaty])

    tsh.insert(engine, ts, 'precision', 'test')
    ts_round = tsh.get(engine, 'precision')
    assert 0.12345678912346 == ts_round.iloc[0]

    diff = tsh.insert(engine, ts_round, 'precision', 'test')
Aurélien Campéas's avatar
Aurélien Campéas committed
930
    assert diff is None  # the roundtriped series does not produce a diff when reinserted
931

Aurélien Campéas's avatar
Aurélien Campéas committed
932
    diff = tsh.insert(engine, ts, 'precision', 'test')  # neither does the original series
933
934
935
    assert diff is None


936
937
938
939
940
941
942
943
def test_serie_deletion(engine, tsh):
    ts = genserie(datetime(2018, 1, 10), 'H', 10)
    tsh.insert(engine, ts, 'keepme', 'Babar')
    tsh.insert(engine, ts, 'deleteme', 'Celeste')
    ts = genserie(datetime(2018, 1, 12), 'H', 10)
    tsh.insert(engine, ts, 'keepme', 'Babar')
    tsh.insert(engine, ts, 'deleteme', 'Celeste')

944
    seriecount, csetcount, csetseriecount = assert_structures(engine, tsh)
945
946
947
948
949
950
951
952
953

    with engine.connect() as cn:
        tsh.delete(cn, 'deleteme')

    assert not tsh.exists(engine, 'deleteme')
    log = [entry['author']
           for entry in tsh.log(engine, names=('keepme', 'deleteme'))]
    assert log == ['Babar', 'Babar']

954
955
956
957
    seriecount2, csetcount2, csetseriecount2 = assert_structures(engine, tsh)

    assert csetcount - csetcount2  == 2
    assert csetseriecount - csetseriecount2 == 2
958
959
960
961
962
963
    assert seriecount - seriecount2 == 1

    with pytest.raises(AssertionError) as werr:
        tsh.delete(engine, 'keepme')
    assert werr.value.args[0] == 'use a transaction object'

964
    tsh.insert(engine, ts, 'deleteme', 'Celeste')
965

966

967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
def test_strip(engine, tsh):
    for i in range(1, 5):
        pubdate = utcdt(2017, 1, i)
        ts = genserie(datetime(2017, 1, 10), 'H', 1 + i)
        tsh.insert(engine, ts, 'xserie', 'babar', _insertion_date=pubdate)
        # also insert something completely unrelated
        tsh.insert(engine, genserie(datetime(2018, 1, 1), 'D', 1 + i), 'yserie', 'celeste')

    csida = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    assert csida is not None
    csidb = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='before')
    csidc = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='after')
    assert csidb < csida < csidc

    log = tsh.log(engine, names=['xserie', 'yserie'])
    assert [(idx, l['author']) for idx, l in enumerate(log, start=1)
    ] == [
        (1, 'babar'),
        (2, 'celeste'),
        (3, 'babar'),
        (4, 'celeste'),
        (5, 'babar'),
        (6, 'celeste'),
        (7, 'babar'),
        (8, 'celeste')
    ]

    h = tsh.get_history(engine, 'xserie')
995
    assert_hist("""
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
2017-01-03 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
2017-01-04 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
                           2017-01-10 04:00:00    4.0
""", h)

    csid = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    with engine.connect() as cn:
        tsh.strip(cn, 'xserie', csid)

1017
    assert_hist("""
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
""", tsh.get_history(engine, 'xserie'))

    assert_df("""
2017-01-10 00:00:00    0.0
2017-01-10 01:00:00    1.0
2017-01-10 02:00:00    2.0
""", tsh.get(engine, 'xserie'))

    log = tsh.log(engine, names=['xserie', 'yserie'])
    # 5 and 7 have disappeared
    assert [l['author'] for l in log
    ] == ['babar', 'celeste', 'babar', 'celeste', 'celeste', 'celeste']

    log = tsh.log(engine, stripped=True, names=['xserie', 'yserie'])
1038
1039
1040
1041
1042
1043
    for l in log:
        if l['meta']:
            meta = l['meta']
            stripinfo = meta.get('tshistory.info')
            if stripinfo:
                assert stripinfo.startswith('got stripped from')
1044
1045


1046
1047
1048
def test_long_name(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 40)

1049
1050
1051
    name = 'a' * 64
    tsh.insert(engine, serie, name, 'babar')
    assert tsh.get(engine, name) is not None
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062


def test_get_delta(engine, tsh):
    for idate in pd.DatetimeIndex(start=utcdt(2015, 1, 1),
                                  end=utcdt(2015, 1, 1, 3),
                                  freq='H'):
        ts = genserie(start=idate, freq='H', repeat=7)
        tsh.insert(engine, ts, 'republication', 'test',
                   _insertion_date=idate)

    hist = tsh.get_history(engine, 'republication')
1063
    assert_hist("""
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092