test_tsio.py 41.9 KB
Newer Older
1
from datetime import datetime, timedelta
2
3
from pathlib import Path
import pytz
4

5
from dateutil import parser
6
import pytest
7
8
import numpy as np
import pandas as pd
9

10
from tshistory.snapshot import Snapshot
11
from tshistory.util import rename_series
12
13
from tshistory.testutil import (
    assert_df,
14
15
    assert_hist,
    assert_hist_equals,
16
    assert_group_equals,
17
    assert_structures,
18
19
20
    genserie,
    tempattr
)
21

22
DATADIR = Path(__file__).parent / 'data'
23

Aurélien Campéas's avatar
Aurélien Campéas committed
24

25
26
27
28
def utcdt(*dt):
    return pd.Timestamp(datetime(*dt), tz='UTC')


29
def test_tstamp_roundtrip(engine, tsh):
30
    assert_structures(engine, tsh)
31
32
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
33
34
35
36
37
38
39
40
41
42
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

43
44
    tsh.insert(engine, ts, 'tztest', 'Babar',
               _insertion_date=utcdt(2018, 1, 1))
45
    back = tsh.get(engine, 'tztest')
46
47
48

    # though un localized we understand it's been normalized to utc
    assert_df("""
49
50
51
52
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    2.0
2017-10-29 02:00:00+00:00    3.0
53
54
55
""", back)

    assert (ts.index == back.index).all()
56
    assert str(back.index.dtype) == 'datetime64[ns, UTC]'
57

58
59
60
61
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 02:00:00+0000', tz='UTC')

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
    ts = genserie(datetime(2017, 10, 29, 1),
                  'H', 4, tz='UTC')
    ts.index = ts.index.tz_convert('Europe/Paris')
    tsh.insert(engine, ts, 'tztest', 'Celeste',
                   _insertion_date=utcdt(2018, 1, 3))

    ts = tsh.get(engine, 'tztest')
    assert_df("""
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    0.0
2017-10-29 02:00:00+00:00    1.0
2017-10-29 03:00:00+00:00    2.0
2017-10-29 04:00:00+00:00    3.0
""", ts)

    hist = tsh.get_history(engine, 'tztest')
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-28 23:00:00+00:00    0.0
                           2017-10-29 00:00:00+00:00    1.0
                           2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
                           2017-10-29 04:00:00+00:00    3.0
""", hist)

    hist = tsh.get_history(engine, 'tztest',
                           from_value_date=utcdt(2017, 10, 29, 1),
                           to_value_date=utcdt(2017, 10, 29, 3))
    assert_hist("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-10-29 01:00:00+00:00    2.0
                           2017-10-29 02:00:00+00:00    3.0
2018-01-03 00:00:00+00:00  2017-10-29 01:00:00+00:00    0.0
                           2017-10-29 02:00:00+00:00    1.0
                           2017-10-29 03:00:00+00:00    2.0
""", hist)

105
106
107
    ival = tsh.interval(engine, 'tztest')
    assert ival.left == pd.Timestamp('2017-10-28 23:00:00+0000', tz='UTC')
    assert ival.right == pd.Timestamp('2017-10-29 04:00:00+0000', tz='UTC')
108
    assert_structures(engine, tsh)
109

110

111
def test_differential(engine, tsh):
112
    assert_structures(engine, tsh)
113
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
114
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
115

116
    id1 = tsh.last_id(engine, 'ts_test')
117
    assert tsh._previous_cset(engine, 'ts_test', id1) is None
118

119
120
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
121

122
123
124
125
126
127
128
129
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 10, 0, 0),
        closed='both'
    )

    with pytest.raises(ValueError):
        assert tsh.interval(engine, 'nosuchts')

130
    assert_df("""
131
132
133
134
135
136
137
138
139
140
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
141
""", tsh.get(engine, 'ts_test'))
142
143

    # we should detect the emission of a message
144
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
145

146
    assert_df("""
147
148
149
150
151
152
153
154
155
156
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
157
""", tsh.get(engine, 'ts_test'))
158
159
160
161

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
162
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
163
    id2 = tsh.last_id(engine, 'ts_test')
164
    assert tsh._previous_cset(engine, 'ts_test', id2) == id1
165

166
    assert_df("""
167
168
169
170
171
172
173
174
175
176
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
177
""", tsh.get(engine, 'ts_test'))
178

179
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
180
181
182
183
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

184
    with engine.begin() as cn:
185
        tsh.insert(cn, ts_longer, 'ts_test', 'test')
186
187
188
    id3 = tsh.last_id(engine, 'ts_test')

    assert id1 < id2 < id3
189

190
    assert_df("""
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
208
""", tsh.get(engine, 'ts_test'))
209

210
211
212
213
214
    assert tsh.interval(engine, 'ts_test') == pd.Interval(
        datetime(2010, 1, 1, 0, 0), datetime(2010, 1, 17, 0, 0),
        closed='both'
    )

215
    # start testing manual overrides
216
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
217
    ts_begin.loc['2010-01-04'] = -1
218
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
219
220

    # -1 represents bogus upstream data
221
    assert_df("""
222
223
224
225
226
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
227
""", tsh.get(engine, 'ts_mixte'))
228
229

    # refresh all the period + 1 extra data point
230
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
231
    ts_more.loc['2010-01-04'] = -1
232
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
233

234
    assert_df("""
235
236
237
238
239
240
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
241
""", tsh.get(engine, 'ts_mixte'))
242
243

    # just append an extra data point
244
245
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
246
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
247

248
    assert_df("""
249
250
251
252
253
254
255
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
256
""", tsh.get(engine, 'ts_mixte'))
257

258
    with engine.begin() as cn:
259
        cn.execute('set search_path to "{0}.timeserie", {0}, public'.format(tsh.namespace))
260
261
        allts = pd.read_sql("select seriename, table_name from registry "
                            "where seriename in ('ts_test', 'ts_mixte')",
262
                            cn)
263

264
        assert_df("""
265
266
267
seriename table_name
0   ts_test    ts_test
1  ts_mixte   ts_mixte
268
""".format(tsh.namespace), allts)
269

270
        assert_df("""
271
272
273
274
275
276
277
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
278
""", tsh.get(cn, 'ts_mixte',
279
             revision_date=datetime.now()))
280

281
282
    assert_structures(engine, tsh)

283

284
285
286
287
288
289
def test_serie_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
    tsh.insert(engine, serie, 'ts-metadata', 'babar')

    initialmeta = tsh.metadata(engine, 'ts-metadata')
    assert initialmeta == {
290
291
        'index_dtype': '<M8[ns]',
        'index_names': [],
292
        'index_type': 'datetime64[ns]',
293
294
295
        'tzaware': False,
        'value_dtype': '<f8',
        'value_type': 'float64'
296
297
298
299
300
301
302
303
304
305
306
307
    }

    tsh.update_metadata(engine, 'ts-metadata',
                        {'topic': 'banana spot price'}
    )
    assert tsh.metadata(engine, 'ts-metadata')['topic'] == 'banana spot price'

    with pytest.raises(AssertionError):
        tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True})

    tsh.update_metadata(engine, 'ts-metadata', {'tzaware': True}, internal=True)
    assert tsh.metadata(engine, 'ts-metadata') == {
308
        'index_dtype': '<M8[ns]',
309
        'index_names': [],
310
311
312
313
314
        'index_type': 'datetime64[ns]',
        'topic': 'banana spot price',
        'tzaware': True,
        'value_dtype': '<f8',
        'value_type': 'float64'
315
    }
316

317
318
319
320
    # unbreak the serie for the second test pass :o
    tsh.update_metadata(engine, 'ts-metadata', initialmeta, internal=True)


321
322
def test_changeset_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
323
    tsh.insert(engine, serie, 'ts-cs-metadata', 'babar',
324
325
               {'foo': 'A', 'bar': 42})

326
    log = tsh.log(engine, names=['ts-cs-metadata'])
327
328
329
330
    meta = tsh.changeset_metadata(engine, log[0]['rev'])
    assert meta == {'foo': 'A', 'bar': 42}


331
def test_bad_import(engine, tsh):
332
    # the data were parsed as date by pd.read_json()
Aurélien Campéas's avatar
Aurélien Campéas committed
333
    df_result = pd.read_csv(str(DATADIR / 'test_data.csv'))
334
335
336
    df_result['Gas Day'] = df_result['Gas Day'].apply(parser.parse, dayfirst=True, yearfirst=False)
    df_result.set_index('Gas Day', inplace=True)
    ts = df_result['SC']
337

338
339
    tsh.insert(engine, ts, 'SND_SC', 'test')
    result = tsh.get(engine, 'SND_SC')
340
    assert result.dtype == 'float64'
341
342
343

    # insertion of empty ts
    ts = pd.Series(name='truc', dtype='object')
344
345
    tsh.insert(engine, ts, 'empty_ts', 'test')
    assert tsh.get(engine, 'empty_ts') is None
346
347
348

    # nan in ts
    # all na
349
    ts = genserie(datetime(2010, 1, 10), 'D', 10, [np.nan], name='truc')
350
    tsh.insert(engine, ts, 'test_nan', 'test')
351
352
    assert len(tsh.get(engine, 'test_nan')) == 0
    assert len(tsh.get(engine, 'test_nan', _keep_nans=True)) == 10
353
354
355
356
357

    # mixe na
    ts = pd.Series([np.nan] * 5 + [3] * 5,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
358
359
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
360

361
362
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
363
    assert_df("""
364
365
366
367
368
2010-01-15    3.0
2010-01-16    3.0
2010-01-17    3.0
2010-01-18    3.0
2010-01-19    3.0
369
""", result)
370
371

    # get_ts with name not in database
372
    assert tsh.get(engine, 'inexisting_name', 'test') is None
373
374


375
def test_revision_date(engine, tsh):
376
    for i in range(1, 5):
377
        with engine.begin() as cn:
378
379
            tsh.insert(cn, genserie(datetime(2017, 1, i), 'D', 3, [i]), 'revdate',
                       'test', _insertion_date=utcdt(2016, 1, i))
380
381

    # end of prologue, now some real meat
382
    idate0 = pd.Timestamp('2015-1-1 00:00:00', tz='UTC')
383
384
385
386
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [0], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate0)
    assert idate0 == tsh.latest_insertion_date(engine, 'ts_through_time')
387

388
    idate1 = pd.Timestamp('2015-1-1 15:45:23', tz='UTC')
389
390
391
392
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate1)
    assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
393

394
    idate2 = pd.Timestamp('2015-1-2 15:43:23', tz='UTC')
395
396
397
398
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate2)
    assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
399

400
    idate3 = pd.Timestamp('2015-1-3', tz='UTC')
401
402
403
404
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate3)
    assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
405

406
    ts = tsh.get(engine, 'ts_through_time')
407

408
    assert_df("""
409
410
411
412
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
413
""", ts)
414

415
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
416
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
417

418
    assert_df("""
419
420
421
422
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
423
""", ts)
424

425
    ts = tsh.get(engine, 'ts_through_time',
426
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
427

428
    assert_df("""
429
430
431
432
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
433
""", ts)
434

435
    ts = tsh.get(engine, 'ts_through_time',
436
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
437
438
439

    assert ts is None

440
441
442
443
444
445
446
447
448
449
    # epilogue: back to the revdate issue
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
2017-01-03    3.0
2017-01-04    4.0
2017-01-05    4.0
2017-01-06    4.0
""", tsh.get(engine, 'revdate'))

450
    oldstate = tsh.get(engine, 'revdate', revision_date=datetime(2016, 1, 2))
451
452
453
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
454
455
456
2017-01-03    2.0
2017-01-04    2.0
""", oldstate)
457

458

459
def test_point_deletion(engine, tsh):
460
461
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
462
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
463

464
    _, ts = Snapshot(engine, tsh, 'ts_del').find()
465
    assert ts.iloc[-2] == 9.0
466

467
    ts_begin.iloc[0] = np.nan
468
    ts_begin.iloc[3] = np.nan
469

470
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
471

472
    assert_df("""
473
474
475
476
477
478
479
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
480
2010-01-10    9.0
481
""", tsh.get(engine, 'ts_del'))
482

483
    ts2 = tsh.get(engine, 'ts_del',
484
485
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
486
    assert (tsh.get(engine, 'ts_del') == ts2).all()
487

488
489
490
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

491
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
492

493
    assert_df("""
494
495
496
497
498
499
500
501
502
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
503
2010-01-10     9.0
504
""", tsh.get(engine, 'ts_del'))
505
506
507

    # now with string!

508
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
509
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
510
511
512
513

    ts_string[4] = None
    ts_string[5] = None

514
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
515
    assert_df("""
516
517
518
519
520
521
522
523
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
524
""", tsh.get(engine, 'ts_string_del'))
525
526
527
528

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

529
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
530
    assert_df("""
531
532
533
534
535
536
537
538
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
539
2010-01-10    machin
540
""", tsh.get(engine, 'ts_string_del'))
541

542
    ts_string[ts_string.index] = np.nan
543
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
544

545
    erased = tsh.get(engine, 'ts_string_del')
546
547
    assert len(erased) == 0

548
549
    # first insertion with only nan

550
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
551
    tsh.insert(engine, ts_begin, 'ts_null', 'test')
552

553
    assert len(tsh.get(engine, 'ts_null')) == 0
554

555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

573
574
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
575
576
    assert diff is None

577
    # there is no difference
578
    assert 0 == len(tsh.diff(ts_repushed, ts_repushed))
579
580
581
582
583

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
584
    diff = tsh.diff(ts_repushed, ts_add)
585
586
587
588
589
590

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
591
2010-01-13    12.0""", diff.sort_index())
592
593
594
595
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
596

Aurélien Campéas's avatar
Aurélien Campéas committed
597
    # full erasing
598
599
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
600
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
601

Aurélien Campéas's avatar
Aurélien Campéas committed
602
    ts_begin.iloc[:] = np.nan
603
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
604
605

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
606
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
607
608
609
610

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
611
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
612

613
614
    ts_begin = pd.Series([np.nan] * 4, name='ts_full_del_str',
                         index=ts_begin.index)
615
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
616
617

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
618
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
619

Aurélien Campéas's avatar
Aurélien Campéas committed
620

621
def test_get_history(engine, tsh):
622
    for numserie in (1, 2, 3):
623
        with engine.begin() as cn:
624
625
626
            tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie',
                       'aurelien.campeas@pythonian.fr',
                       _insertion_date=utcdt(2017, 2, numserie))
627
628
629
630
631
632
633
634
635
636
637

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
638
         'meta': {},
639
         'date': pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
640
641
642
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
643
         'meta': {},
644
         'date': pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
645
646
647
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
648
         'meta': {},
649
         'date': pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC'),
650
651
652
653
654
655
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')

656
    assert_hist("""
657
658
659
660
661
662
663
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
664
665
""", histts)

666
    diffs = tsh.get_history(engine, 'smallserie', diffmode=True)
667
    assert_hist("""
668
669
670
671
672
673
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-03    2.0
""", diffs)

674
    for idate in histts:
675
        with engine.begin() as cn:
676
            idate = idate.replace(tzinfo=pytz.timezone('UTC'))
677
678
            tsh.insert(cn, histts[idate], 'smallserie2',
                       'aurelien.campeas@pythonian.f', _insertion_date=idate)
679
680
681

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
682
    assert_hist_equals(tsh.get_history(engine, 'smallserie2'), histts)
683
684
685
686

    # get history ranges
    tsa = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2))
687
    assert_hist("""
688
689
690
691
692
693
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
694
695
696
697
""", tsa)

    tsb = tsh.get_history(engine, 'smallserie',
                          to_insertion_date=datetime(2017, 2, 2))
698
    assert_hist("""
699
700
701
702
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
703
704
705
706
707
""", tsb)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2),
                          to_insertion_date=datetime(2017, 2, 2))
708
    assert_hist("""
709
710
711
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
712
713
""", tsc)

714
715
716
717
718
719
720
721
    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 4),
                          to_insertion_date=datetime(2017, 2, 4))
    assert tsc is None

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2017, 2, 2))
722
    assert_hist("""
723
724
725
726
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
727
728
729
730
731
732
733
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2016, 12, 31))
    assert tsc is None

734
735
736
737
    # restrictions on value dates
    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 1),
                          to_value_date=datetime(2017, 1, 2))
738
    assert_hist("""
739
740
741
742
743
744
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
745
746
747
748
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 2))
749
    assert_hist("""
750
751
752
753
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-02    1.0
                           2017-01-03    2.0
754
755
756
757
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          to_value_date=datetime(2017, 1, 2))
758
    assert_hist("""
759
760
761
762
763
764
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
765
766
""", tsc)

767

768
769
770
771
772
773
774
def test_history_delta(engine, tsh):
    for d in range(1, 3):
        idate = utcdt(2018, 1, d)
        serie = genserie(idate - timedelta(hours=1), 'H', 6, initval=[d])
        tsh.insert(engine, serie, 'hd', 'aurelien.campeas@pythonian.fr',
                   _insertion_date=idate)

775
    assert_hist("""
776
insertion_date             value_date               
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
                           2018-01-02 03:00:00+00:00    2.0
                           2018-01-02 04:00:00+00:00    2.0
    """, tsh.get_history(engine, 'hd'))

797
    assert_hist("""
798
insertion_date             value_date               
799
800
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
801
802
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
803
804
805
806
807
808
809
810
2018-01-02 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
                           2018-01-01 03:00:00+00:00    1.0
                           2018-01-01 04:00:00+00:00    1.0
                           2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
811
812
813
814
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd', deltaafter=timedelta(hours=2)))

815
    assert_hist("""
816
insertion_date             value_date               
817
2018-01-01 00:00:00+00:00  2018-01-01 00:00:00+00:00    1.0
818
                           2018-01-01 01:00:00+00:00    1.0
819
2018-01-02 00:00:00+00:00  2018-01-02 00:00:00+00:00    2.0
820
821
                           2018-01-02 01:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd',
822
                      deltabefore=timedelta(hours=0),
823
824
825
                      deltaafter=timedelta(hours=1)))


826
827
828
829
830
831
832
833
834
835
836
def test_nr_gethistory(engine, tsh):
    s0 = pd.Series([-1, 0, 0, -1],
                   index=pd.DatetimeIndex(start=datetime(2016, 12, 29),
                                          end=datetime(2017, 1, 1),
                                          freq='D'))
    tsh.insert(engine, s0, 'foo', 'zogzog')

    s1 = pd.Series([1, 0, 0, 1],
                   index=pd.DatetimeIndex(start=datetime(2017, 1, 1),
                                          end=datetime(2017, 1, 4),
                                          freq='D'))
837
    idate = utcdt(2016, 1, 1)
838
    for i in range(5):
839
        with engine.begin() as cn:
840
841
842
            tsh.insert(cn, s1 * i, 'foo',
                       'aurelien.campeas@pythonian.f',
                       _insertion_date=idate + timedelta(days=i))
843
844
845
846
847
848
849

    df = tsh.get_history(engine, 'foo',
                         datetime(2016, 1, 3),
                         datetime(2016, 1, 4),
                         datetime(2017, 1, 1),
                         datetime(2017, 1, 4))

850
    assert_hist("""
851
852
853
854
855
856
857
858
859
insertion_date             value_date
2016-01-03 00:00:00+00:00  2017-01-01    2.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    2.0
2016-01-04 00:00:00+00:00  2017-01-01    3.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    3.0
860
861
862
""", df)


863
def test_add_na(engine, tsh):
864
865
866
867
868
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

869
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
870
    assert len(diff) == 5
871
    result = tsh.get(engine, 'ts_add_na')
872
873
874
875
876
877
878
879
880
881
    assert len(result) == 0

    result = tsh.get(engine, 'ts_add_na', _keep_nans=True)
    assert_df("""
2010-01-01   NaN
2010-01-02   NaN
2010-01-03   NaN
2010-01-04   NaN
2010-01-05   NaN
""", result)
882
883
884

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
885
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
886
887
888
889
890

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

891
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
892
893
    assert diff is None

894
    result = tsh.get(engine, 'ts_add_na')
895
    assert len(result) == 5
896
897


898
def test_dtype_mismatch(engine, tsh):
899
    tsh.insert(engine,
900
901
902
903
904
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
905
        tsh.insert(engine,
906
907
908
909
910
911
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

912
    tsh.insert(engine,
913
914
915
916
917
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
918
        tsh.insert(engine,
919
920
921
922
923
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)
924
925


926
927
928
929
930
931
def test_precision(engine, tsh):
    floaty = 0.123456789123456789
    ts = genserie(datetime(2015, 1, 1), 'D', 5, initval=[floaty])

    tsh.insert(engine, ts, 'precision', 'test')
    ts_round = tsh.get(engine, 'precision')
932
    assert 0.12345678912345678 == ts_round.iloc[0]
933
934

    diff = tsh.insert(engine, ts_round, 'precision', 'test')
Aurélien Campéas's avatar
Aurélien Campéas committed
935
    assert diff is None  # the roundtriped series does not produce a diff when reinserted
936

Aurélien Campéas's avatar
Aurélien Campéas committed
937
    diff = tsh.insert(engine, ts, 'precision', 'test')  # neither does the original series
938
939
940
    assert diff is None


941
942
943
944
945
946
947
948
def test_serie_deletion(engine, tsh):
    ts = genserie(datetime(2018, 1, 10), 'H', 10)
    tsh.insert(engine, ts, 'keepme', 'Babar')
    tsh.insert(engine, ts, 'deleteme', 'Celeste')
    ts = genserie(datetime(2018, 1, 12), 'H', 10)
    tsh.insert(engine, ts, 'keepme', 'Babar')
    tsh.insert(engine, ts, 'deleteme', 'Celeste')

949
    seriecount, csetcount, csetseriecount = assert_structures(engine, tsh)
950

951
    with engine.begin() as cn:
952
953
954
955
956
957
958
        tsh.delete(cn, 'deleteme')

    assert not tsh.exists(engine, 'deleteme')
    log = [entry['author']
           for entry in tsh.log(engine, names=('keepme', 'deleteme'))]
    assert log == ['Babar', 'Babar']

959
960
961
962
    seriecount2, csetcount2, csetseriecount2 = assert_structures(engine, tsh)

    assert csetcount - csetcount2  == 2
    assert csetseriecount - csetseriecount2 == 2
963
964
965
966
967
968
    assert seriecount - seriecount2 == 1

    with pytest.raises(AssertionError) as werr:
        tsh.delete(engine, 'keepme')
    assert werr.value.args[0] == 'use a transaction object'

969
    tsh.insert(engine, ts, 'deleteme', 'Celeste')
970

971

972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
def test_strip(engine, tsh):
    for i in range(1, 5):
        pubdate = utcdt(2017, 1, i)
        ts = genserie(datetime(2017, 1, 10), 'H', 1 + i)
        tsh.insert(engine, ts, 'xserie', 'babar', _insertion_date=pubdate)
        # also insert something completely unrelated
        tsh.insert(engine, genserie(datetime(2018, 1, 1), 'D', 1 + i), 'yserie', 'celeste')

    csida = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    assert csida is not None
    csidb = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='before')
    csidc = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='after')
    assert csidb < csida < csidc

    log = tsh.log(engine, names=['xserie', 'yserie'])
    assert [(idx, l['author']) for idx, l in enumerate(log, start=1)
    ] == [
        (1, 'babar'),
        (2, 'celeste'),
        (3, 'babar'),
        (4, 'celeste'),
        (5, 'babar'),
        (6, 'celeste'),
        (7, 'babar'),
        (8, 'celeste')
    ]

    h = tsh.get_history(engine, 'xserie')
1000
    assert_hist("""
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
2017-01-03 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
2017-01-04 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
                           2017-01-10 04:00:00    4.0
""", h)

    csid = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
1019
    with engine.begin() as cn:
1020
1021
        tsh.strip(cn, 'xserie', csid)

1022
    assert_hist("""