test_tsio.py 28.3 KB
Newer Older
1
from datetime import datetime, timedelta
2
3
from pathlib import Path
import pytz
4

5
from dateutil import parser
6
import pytest
7
8
import numpy as np
import pandas as pd
9

10
from tshistory.snapshot import Snapshot
11
12
13
14
15
16
from tshistory.testutil import (
    assert_df,
    assert_group_equals,
    genserie,
    tempattr
)
17

18
DATADIR = Path(__file__).parent / 'data'
19

Aurélien Campéas's avatar
Aurélien Campéas committed
20

21
22
23
24
def utcdt(*dt):
    return pd.Timestamp(datetime(*dt), tz='UTC')


25
def test_tstamp_roundtrip(engine, tsh):
26
27
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
28
29
30
31
32
33
34
35
36
37
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

38
39
    tsh.insert(engine, ts, 'tztest', 'Babar')
    back = tsh.get(engine, 'tztest')
40
41
42

    # though un localized we understand it's been normalized to utc
    assert_df("""
43
44
45
46
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    2.0
2017-10-29 02:00:00+00:00    3.0
47
48
49
""", back)

    assert (ts.index == back.index).all()
50
    assert str(back.index.dtype) == 'datetime64[ns, UTC]'
51
52


53
def test_differential(engine, tsh):
54
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
55
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
56

57
58
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
59

60
    assert_df("""
61
62
63
64
65
66
67
68
69
70
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
71
""", tsh.get(engine, 'ts_test'))
72
73

    # we should detect the emission of a message
74
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
75

76
    assert_df("""
77
78
79
80
81
82
83
84
85
86
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
87
""", tsh.get(engine, 'ts_test'))
88
89
90
91

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
92
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
93

94
    assert_df("""
95
96
97
98
99
100
101
102
103
104
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
105
""", tsh.get(engine, 'ts_test'))
106

107
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
108
109
110
111
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

112
    tsh.insert(engine, ts_longer, 'ts_test', 'test')
113

114
    assert_df("""
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
132
""", tsh.get(engine, 'ts_test'))
133
134

    # start testing manual overrides
135
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
136
    ts_begin.loc['2010-01-04'] = -1
137
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
138
139

    # -1 represents bogus upstream data
140
    assert_df("""
141
142
143
144
145
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
146
""", tsh.get(engine, 'ts_mixte'))
147
148

    # refresh all the period + 1 extra data point
149
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
150
    ts_more.loc['2010-01-04'] = -1
151
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
152

153
    assert_df("""
154
155
156
157
158
159
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
160
""", tsh.get(engine, 'ts_mixte'))
161
162

    # just append an extra data point
163
164
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
165
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
166

167
    assert_df("""
168
169
170
171
172
173
174
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
175
""", tsh.get(engine, 'ts_mixte'))
176

177
    with engine.connect() as cn:
178
        cn.execute('set search_path to "{0}.timeserie", {0}, public'.format(tsh.namespace))
179
180
181
        allts = pd.read_sql("select name, table_name from registry "
                            "where name in ('ts_test', 'ts_mixte')",
                            cn)
182

183
184
        assert_df("""
name              table_name
185
186
187
0   ts_test   {0}.timeserie.ts_test
1  ts_mixte  {0}.timeserie.ts_mixte
""".format(tsh.namespace), allts)
188

189
        assert_df("""
190
191
192
193
194
195
196
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
197
""", tsh.get(cn, 'ts_mixte',
198
             revision_date=datetime.now()))
199
200


201
202
203
204
205
206
207
208
209
210
def test_changeset_metadata(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 1, initval=[1])
    tsh.insert(engine, serie, 'ts-metadata', 'babar',
               {'foo': 'A', 'bar': 42})

    log = tsh.log(engine, names=['ts-metadata'])
    meta = tsh.changeset_metadata(engine, log[0]['rev'])
    assert meta == {'foo': 'A', 'bar': 42}


211
def test_bad_import(engine, tsh):
212
    # the data were parsed as date by pd.read_json()
Aurélien Campéas's avatar
Aurélien Campéas committed
213
    df_result = pd.read_csv(str(DATADIR / 'test_data.csv'))
214
215
216
    df_result['Gas Day'] = df_result['Gas Day'].apply(parser.parse, dayfirst=True, yearfirst=False)
    df_result.set_index('Gas Day', inplace=True)
    ts = df_result['SC']
217

218
219
    tsh.insert(engine, ts, 'SND_SC', 'test')
    result = tsh.get(engine, 'SND_SC')
220
    assert result.dtype == 'float64'
221
222
223

    # insertion of empty ts
    ts = pd.Series(name='truc', dtype='object')
224
225
    tsh.insert(engine, ts, 'empty_ts', 'test')
    assert tsh.get(engine, 'empty_ts') is None
226
227
228

    # nan in ts
    # all na
229
    ts = genserie(datetime(2010, 1, 10), 'D', 10, [np.nan], name='truc')
230
    tsh.insert(engine, ts, 'test_nan', 'test')
231
232
    assert len(tsh.get(engine, 'test_nan')) == 0
    assert len(tsh.get(engine, 'test_nan', _keep_nans=True)) == 10
233
234
235
236
237

    # mixe na
    ts = pd.Series([np.nan] * 5 + [3] * 5,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
238
239
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
240

241
242
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
243
    assert_df("""
244
245
246
247
248
2010-01-15    3.0
2010-01-16    3.0
2010-01-17    3.0
2010-01-18    3.0
2010-01-19    3.0
249
""", result)
250
251

    # get_ts with name not in database
252
    assert tsh.get(engine, 'inexisting_name', 'test') is None
253
254


255
def test_revision_date(engine, tsh):
256
    # we prepare a good joke for the end of the test
257
258
    # ival = Snapshot._interval
    # Snapshot._interval = 3
259
260
261

    for i in range(1, 5):
        with engine.connect() as cn:
262
263
            tsh.insert(cn, genserie(datetime(2017, 1, i), 'D', 3, [i]), 'revdate',
                       'test', _insertion_date=utcdt(2016, 1, i))
264
265

    # end of prologue, now some real meat
266
    idate0 = pd.Timestamp('2015-1-1 00:00:00', tz='UTC')
267
268
269
270
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [0], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate0)
    assert idate0 == tsh.latest_insertion_date(engine, 'ts_through_time')
271

272
    idate1 = pd.Timestamp('2015-1-1 15:45:23', tz='UTC')
273
274
275
276
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate1)
    assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
277

278
    idate2 = pd.Timestamp('2015-1-2 15:43:23', tz='UTC')
279
280
281
282
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate2)
    assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
283

284
    idate3 = pd.Timestamp('2015-1-3', tz='UTC')
285
286
287
288
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate3)
    assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
289

290
    ts = tsh.get(engine, 'ts_through_time')
291

292
    assert_df("""
293
294
295
296
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
297
""", ts)
298

299
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
300
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
301

302
    assert_df("""
303
304
305
306
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
307
""", ts)
308

309
    ts = tsh.get(engine, 'ts_through_time',
310
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
311

312
    assert_df("""
313
314
315
316
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
317
""", ts)
318

319
    ts = tsh.get(engine, 'ts_through_time',
320
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
321
322
323

    assert ts is None

324
325
326
327
328
329
330
331
332
333
    # epilogue: back to the revdate issue
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
2017-01-03    3.0
2017-01-04    4.0
2017-01-05    4.0
2017-01-06    4.0
""", tsh.get(engine, 'revdate'))

334
    oldstate = tsh.get(engine, 'revdate', revision_date=datetime(2016, 1, 2))
335
336
337
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
338
339
340
2017-01-03    2.0
2017-01-04    2.0
""", oldstate)
341

342

343
def test_deletion(engine, tsh):
344
345
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
346
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
347

348
    _, ts = Snapshot(engine, tsh, 'ts_del').find()
349
    assert ts.iloc[-2] == 9.0
350

351
    ts_begin.iloc[0] = np.nan
352
    ts_begin.iloc[3] = np.nan
353

354
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
355

356
    assert_df("""
357
358
359
360
361
362
363
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
364
2010-01-10    9.0
365
""", tsh.get(engine, 'ts_del'))
366

367
    ts2 = tsh.get(engine, 'ts_del',
368
369
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
370
    assert (tsh.get(engine, 'ts_del') == ts2).all()
371

372
373
374
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

375
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
376

377
    assert_df("""
378
379
380
381
382
383
384
385
386
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
387
2010-01-10     9.0
388
""", tsh.get(engine, 'ts_del'))
389
390
391

    # now with string!

392
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
393
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
394
395
396
397

    ts_string[4] = None
    ts_string[5] = None

398
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
399
    assert_df("""
400
401
402
403
404
405
406
407
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
408
""", tsh.get(engine, 'ts_string_del'))
409
410
411
412

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

413
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
414
    assert_df("""
415
416
417
418
419
420
421
422
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
423
2010-01-10    machin
424
""", tsh.get(engine, 'ts_string_del'))
425

426
    ts_string[ts_string.index] = np.nan
427
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
428

429
    erased = tsh.get(engine, 'ts_string_del')
430
431
    assert len(erased) == 0

432
433
    # first insertion with only nan

434
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
435
    tsh.insert(engine, ts_begin, 'ts_null', 'test')
436

437
    assert len(tsh.get(engine, 'ts_null')) == 0
438

439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
    # exhibit issue with nans handling
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

458
459
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
460
461
    assert diff is None

462
    # there is no difference
463
    assert 0 == len(tsh.diff(ts_repushed, ts_repushed))
464
465
466
467
468

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
469
    diff = tsh.diff(ts_repushed, ts_add)
470
471
472
473
474
475

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
476
2010-01-13    12.0""", diff.sort_index())
477
478
479
480
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
481

Aurélien Campéas's avatar
Aurélien Campéas committed
482
    # full erasing
483
484
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
485
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
486

Aurélien Campéas's avatar
Aurélien Campéas committed
487
    ts_begin.iloc[:] = np.nan
488
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
489
490

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
491
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
492
493
494
495

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
496
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
497
498

    ts_begin.iloc[:] = np.nan
499
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
500
501

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
502
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
503

Aurélien Campéas's avatar
Aurélien Campéas committed
504

505
def test_get_history(engine, tsh):
506
507
    for numserie in (1, 2, 3):
        with engine.connect() as cn:
508
509
510
            tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie',
                       'aurelien.campeas@pythonian.fr',
                       _insertion_date=utcdt(2017, 2, numserie))
511
512
513
514
515
516
517
518
519
520
521

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
522
         'meta': {},
523
         'date': pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
524
525
526
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
527
         'meta': {},
528
         'date': pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
529
530
531
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
532
         'meta': {},
533
         'date': pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC'),
534
535
536
537
538
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')
539
    assert histts.name == 'smallserie'
540
541

    assert_df("""
542
543
544
545
546
547
548
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
549
550
""", histts)

551
552
    diffs = tsh.get_history(engine, 'smallserie', diffmode=True)
    assert_df("""
553
554
555
556
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-03    2.0
557
558
""", diffs)

559
    for idate in histts.index.get_level_values('insertion_date').unique():
560
        with engine.connect() as cn:
561
            idate = idate.replace(tzinfo=pytz.timezone('UTC'))
562
563
            tsh.insert(cn, histts[idate], 'smallserie2',
                       'aurelien.campeas@pythonian.f', _insertion_date=idate)
564
565
566
567
568
569
570
571
572

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
    assert (tsh.get_history(engine, 'smallserie2') == histts).all()

    # get history ranges
    tsa = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2))
    assert_df("""
573
574
575
576
577
578
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
579
580
581
582
583
""", tsa)

    tsb = tsh.get_history(engine, 'smallserie',
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
584
585
586
587
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
588
589
590
591
592
593
""", tsb)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2),
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
594
595
596
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
597
598
""", tsc)

599
600
601
602
603
604
605
606
607
    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 4),
                          to_insertion_date=datetime(2017, 2, 4))
    assert tsc is None

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
608
609
610
611
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
612
613
614
615
616
617
618
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2016, 12, 31))
    assert tsc is None

619
620
621
622
623
    # restrictions on value dates
    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 1),
                          to_value_date=datetime(2017, 1, 2))
    assert_df("""
624
625
626
627
628
629
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
630
631
""", tsc)

632
633
634
635
636
    diffs = tsh.get_history(engine, 'smallserie',
                            diffmode=True,
                            from_value_date=datetime(2017, 1, 1),
                            to_value_date=datetime(2017, 1, 2))
    assert_df("""
637
638
639
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-02    1.0
640
641
""", diffs)

642
643
644
    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 2))
    assert_df("""
645
646
647
648
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-02    1.0
                           2017-01-03    2.0
649
650
651
652
653
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          to_value_date=datetime(2017, 1, 2))
    assert_df("""
654
655
656
657
658
659
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
660
661
""", tsc)

662

663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
def test_history_delta(engine, tsh):
    for d in range(1, 3):
        idate = utcdt(2018, 1, d)
        serie = genserie(idate - timedelta(hours=1), 'H', 6, initval=[d])
        tsh.insert(engine, serie, 'hd', 'aurelien.campeas@pythonian.fr',
                   _insertion_date=idate)

    assert_df("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
                           2018-01-01 02:00:00+00:00    1.0
2018-01-02 00:00:00+00:00  2018-01-02 00:00:00+00:00    2.0
                           2018-01-02 01:00:00+00:00    2.0
                           2018-01-02 02:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd', deltaafter=timedelta(hours=2)))


    assert_df("""
insertion_date             value_date               
2018-01-01 00:00:00+00:00  2017-12-31 23:00:00+00:00    1.0
                           2018-01-01 00:00:00+00:00    1.0
                           2018-01-01 01:00:00+00:00    1.0
2018-01-02 00:00:00+00:00  2018-01-01 23:00:00+00:00    2.0
                           2018-01-02 00:00:00+00:00    2.0
                           2018-01-02 01:00:00+00:00    2.0
""",  tsh.get_history(engine, 'hd',
                      deltabefore=timedelta(hours=1),
                      deltaafter=timedelta(hours=1)))


694
695
696
697
698
699
700
701
702
703
704
def test_nr_gethistory(engine, tsh):
    s0 = pd.Series([-1, 0, 0, -1],
                   index=pd.DatetimeIndex(start=datetime(2016, 12, 29),
                                          end=datetime(2017, 1, 1),
                                          freq='D'))
    tsh.insert(engine, s0, 'foo', 'zogzog')

    s1 = pd.Series([1, 0, 0, 1],
                   index=pd.DatetimeIndex(start=datetime(2017, 1, 1),
                                          end=datetime(2017, 1, 4),
                                          freq='D'))
705
    idate = utcdt(2016, 1, 1)
706
707
    for i in range(5):
        with engine.connect() as cn:
708
709
710
            tsh.insert(cn, s1 * i, 'foo',
                       'aurelien.campeas@pythonian.f',
                       _insertion_date=idate + timedelta(days=i))
711
712
713
714
715
716
717
718

    df = tsh.get_history(engine, 'foo',
                         datetime(2016, 1, 3),
                         datetime(2016, 1, 4),
                         datetime(2017, 1, 1),
                         datetime(2017, 1, 4))

    assert_df("""
719
720
721
722
723
724
725
726
727
insertion_date             value_date
2016-01-03 00:00:00+00:00  2017-01-01    2.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    2.0
2016-01-04 00:00:00+00:00  2017-01-01    3.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    3.0
728
729
730
""", df)


731
def test_add_na(engine, tsh):
732
733
734
735
736
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

737
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
738
    assert len(diff) == 5
739
    result = tsh.get(engine, 'ts_add_na')
740
741
742
743
744
745
746
747
748
749
    assert len(result) == 0

    result = tsh.get(engine, 'ts_add_na', _keep_nans=True)
    assert_df("""
2010-01-01   NaN
2010-01-02   NaN
2010-01-03   NaN
2010-01-04   NaN
2010-01-05   NaN
""", result)
750
751
752

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
753
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
754
755
756
757
758

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

759
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
760
761
    assert diff is None

762
    result = tsh.get(engine, 'ts_add_na')
763
    assert len(result) == 5
764
765


766
def test_dtype_mismatch(engine, tsh):
767
    tsh.insert(engine,
768
769
770
771
772
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
773
        tsh.insert(engine,
774
775
776
777
778
779
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

780
    tsh.insert(engine,
781
782
783
784
785
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
786
        tsh.insert(engine,
787
788
789
790
791
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)
792
793


794
795
796
797
798
799
800
801
802
def test_precision(engine, tsh):
    floaty = 0.123456789123456789
    ts = genserie(datetime(2015, 1, 1), 'D', 5, initval=[floaty])

    tsh.insert(engine, ts, 'precision', 'test')
    ts_round = tsh.get(engine, 'precision')
    assert 0.12345678912346 == ts_round.iloc[0]

    diff = tsh.insert(engine, ts_round, 'precision', 'test')
Aurélien Campéas's avatar
Aurélien Campéas committed
803
    assert diff is None  # the roundtriped series does not produce a diff when reinserted
804

Aurélien Campéas's avatar
Aurélien Campéas committed
805
    diff = tsh.insert(engine, ts, 'precision', 'test')  # neither does the original series
806
807
808
    assert diff is None


809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
def test_strip(engine, tsh):
    for i in range(1, 5):
        pubdate = utcdt(2017, 1, i)
        ts = genserie(datetime(2017, 1, 10), 'H', 1 + i)
        tsh.insert(engine, ts, 'xserie', 'babar', _insertion_date=pubdate)
        # also insert something completely unrelated
        tsh.insert(engine, genserie(datetime(2018, 1, 1), 'D', 1 + i), 'yserie', 'celeste')

    csida = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    assert csida is not None
    csidb = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='before')
    csidc = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='after')
    assert csidb < csida < csidc

    log = tsh.log(engine, names=['xserie', 'yserie'])
    assert [(idx, l['author']) for idx, l in enumerate(log, start=1)
    ] == [
        (1, 'babar'),
        (2, 'celeste'),
        (3, 'babar'),
        (4, 'celeste'),
        (5, 'babar'),
        (6, 'celeste'),
        (7, 'babar'),
        (8, 'celeste')
    ]

    h = tsh.get_history(engine, 'xserie')
    assert_df("""
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
2017-01-03 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
2017-01-04 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
                           2017-01-10 04:00:00    4.0
""", h)

    csid = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    with engine.connect() as cn:
        tsh.strip(cn, 'xserie', csid)

    assert_df("""
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
""", tsh.get_history(engine, 'xserie'))

    assert_df("""
2017-01-10 00:00:00    0.0
2017-01-10 01:00:00    1.0
2017-01-10 02:00:00    2.0
""", tsh.get(engine, 'xserie'))

    # internal structure is ok
    with engine.connect() as cn:
        cn.execute('set search_path to "{}.timeserie"'.format(tsh.namespace))
        df = pd.read_sql("select id, diff from xserie order by id", cn)
        df['diff'] = df['diff'].apply(lambda x: False if x is None else True)

    assert_df("""
id   diff
0   1  False
1   2   True
""", df)

    log = tsh.log(engine, names=['xserie', 'yserie'])
    # 5 and 7 have disappeared
    assert [l['author'] for l in log
    ] == ['babar', 'celeste', 'babar', 'celeste', 'celeste', 'celeste']

    log = tsh.log(engine, stripped=True, names=['xserie', 'yserie'])
892
893
894
895
896
897
    for l in log:
        if l['meta']:
            meta = l['meta']
            stripinfo = meta.get('tshistory.info')
            if stripinfo:
                assert stripinfo.startswith('got stripped from')
898
899
900
901
902
903
904
905
906
907
908
909


def test_prepend(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 40)

    assert 40 == len(serie)
    ts_insert = serie[2:]
    tsh.insert(engine, ts_insert, 'prepend', 'test')
    assert 38 == len(tsh.get(engine, 'prepend'))

    tsh.insert(engine, serie, 'prepend', 'test')
    assert 40 == len(tsh.get(engine, 'prepend'))