test_tsio.py 26.5 KB
Newer Older
1
from datetime import datetime, timedelta
2
3
from pathlib import Path
import pytz
4

5
from dateutil import parser
6
import pytest
7
8
import numpy as np
import pandas as pd
9

10
from tshistory.snapshot import Snapshot
11
12
13
14
15
16
from tshistory.testutil import (
    assert_df,
    assert_group_equals,
    genserie,
    tempattr
)
17

18
DATADIR = Path(__file__).parent / 'data'
19

Aurélien Campéas's avatar
Aurélien Campéas committed
20

21
22
23
24
def utcdt(*dt):
    return pd.Timestamp(datetime(*dt), tz='UTC')


25
def test_tstamp_roundtrip(engine, tsh):
26
27
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
28
29
30
31
32
33
34
35
36
37
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

38
39
    tsh.insert(engine, ts, 'tztest', 'Babar')
    back = tsh.get(engine, 'tztest')
40
41
42

    # though un localized we understand it's been normalized to utc
    assert_df("""
43
44
45
46
2017-10-28 23:00:00+00:00    0.0
2017-10-29 00:00:00+00:00    1.0
2017-10-29 01:00:00+00:00    2.0
2017-10-29 02:00:00+00:00    3.0
47
48
49
""", back)

    assert (ts.index == back.index).all()
50
    assert str(back.index.dtype) == 'datetime64[ns, UTC]'
51
52


53
def test_differential(engine, tsh):
54
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
55
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
56

57
58
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
59

60
    assert_df("""
61
62
63
64
65
66
67
68
69
70
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
71
""", tsh.get(engine, 'ts_test'))
72
73

    # we should detect the emission of a message
74
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
75

76
    assert_df("""
77
78
79
80
81
82
83
84
85
86
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
87
""", tsh.get(engine, 'ts_test'))
88
89
90
91

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
92
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
93

94
    assert_df("""
95
96
97
98
99
100
101
102
103
104
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
105
""", tsh.get(engine, 'ts_test'))
106

107
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
108
109
110
111
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

112
    tsh.insert(engine, ts_longer, 'ts_test', 'test')
113

114
    assert_df("""
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
132
""", tsh.get(engine, 'ts_test'))
133
134

    # start testing manual overrides
135
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
136
    ts_begin.loc['2010-01-04'] = -1
137
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
138
139

    # -1 represents bogus upstream data
140
    assert_df("""
141
142
143
144
145
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
146
""", tsh.get(engine, 'ts_mixte'))
147
148

    # refresh all the period + 1 extra data point
149
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
150
    ts_more.loc['2010-01-04'] = -1
151
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
152

153
    assert_df("""
154
155
156
157
158
159
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
160
""", tsh.get(engine, 'ts_mixte'))
161
162

    # just append an extra data point
163
164
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
165
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
166

167
    assert_df("""
168
169
170
171
172
173
174
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
175
""", tsh.get(engine, 'ts_mixte'))
176

177
    with engine.connect() as cn:
178
        cn.execute('set search_path to "{0}.timeserie", {0}, public'.format(tsh.namespace))
179
180
181
        allts = pd.read_sql("select name, table_name from registry "
                            "where name in ('ts_test', 'ts_mixte')",
                            cn)
182

183
184
        assert_df("""
name              table_name
185
186
187
0   ts_test   {0}.timeserie.ts_test
1  ts_mixte  {0}.timeserie.ts_mixte
""".format(tsh.namespace), allts)
188

189
        assert_df("""
190
191
192
193
194
195
196
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
197
""", tsh.get(cn, 'ts_mixte',
198
             revision_date=datetime.now()))
199
200


201
def test_bad_import(engine, tsh):
202
    # the data were parsed as date by pd.read_json()
Aurélien Campéas's avatar
Aurélien Campéas committed
203
    df_result = pd.read_csv(str(DATADIR / 'test_data.csv'))
204
205
206
    df_result['Gas Day'] = df_result['Gas Day'].apply(parser.parse, dayfirst=True, yearfirst=False)
    df_result.set_index('Gas Day', inplace=True)
    ts = df_result['SC']
207

208
209
    tsh.insert(engine, ts, 'SND_SC', 'test')
    result = tsh.get(engine, 'SND_SC')
210
    assert result.dtype == 'float64'
211
212
213

    # insertion of empty ts
    ts = pd.Series(name='truc', dtype='object')
214
215
    tsh.insert(engine, ts, 'empty_ts', 'test')
    assert tsh.get(engine, 'empty_ts') is None
216
217
218

    # nan in ts
    # all na
219
    ts = genserie(datetime(2010, 1, 10), 'D', 10, [np.nan], name='truc')
220
    tsh.insert(engine, ts, 'test_nan', 'test')
221
222
    assert len(tsh.get(engine, 'test_nan')) == 0
    assert len(tsh.get(engine, 'test_nan', _keep_nans=True)) == 10
223
224
225
226
227

    # mixe na
    ts = pd.Series([np.nan] * 5 + [3] * 5,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
228
229
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
230

231
232
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
233
    assert_df("""
234
235
236
237
238
2010-01-15    3.0
2010-01-16    3.0
2010-01-17    3.0
2010-01-18    3.0
2010-01-19    3.0
239
""", result)
240
241

    # get_ts with name not in database
242
    assert tsh.get(engine, 'inexisting_name', 'test') is None
243
244


245
def test_revision_date(engine, tsh):
246
    # we prepare a good joke for the end of the test
247
248
    # ival = Snapshot._interval
    # Snapshot._interval = 3
249
250
251

    for i in range(1, 5):
        with engine.connect() as cn:
252
253
            tsh.insert(cn, genserie(datetime(2017, 1, i), 'D', 3, [i]), 'revdate',
                       'test', _insertion_date=utcdt(2016, 1, i))
254
255

    # end of prologue, now some real meat
256
    idate0 = pd.Timestamp('2015-1-1 00:00:00', tz='UTC')
257
258
259
260
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [0], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate0)
    assert idate0 == tsh.latest_insertion_date(engine, 'ts_through_time')
261

262
    idate1 = pd.Timestamp('2015-1-1 15:45:23', tz='UTC')
263
264
265
266
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate1)
    assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
267

268
    idate2 = pd.Timestamp('2015-1-2 15:43:23', tz='UTC')
269
270
271
272
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate2)
    assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
273

274
    idate3 = pd.Timestamp('2015-1-3', tz='UTC')
275
276
277
278
    ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
    tsh.insert(engine, ts, 'ts_through_time',
               'test', _insertion_date=idate3)
    assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
279

280
    ts = tsh.get(engine, 'ts_through_time')
281

282
    assert_df("""
283
284
285
286
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
287
""", ts)
288

289
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
290
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
291

292
    assert_df("""
293
294
295
296
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
297
""", ts)
298

299
    ts = tsh.get(engine, 'ts_through_time',
300
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
301

302
    assert_df("""
303
304
305
306
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
307
""", ts)
308

309
    ts = tsh.get(engine, 'ts_through_time',
310
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
311
312
313

    assert ts is None

314
315
316
317
318
319
320
321
322
323
    # epilogue: back to the revdate issue
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
2017-01-03    3.0
2017-01-04    4.0
2017-01-05    4.0
2017-01-06    4.0
""", tsh.get(engine, 'revdate'))

324
    oldstate = tsh.get(engine, 'revdate', revision_date=datetime(2016, 1, 2))
325
326
327
    assert_df("""
2017-01-01    1.0
2017-01-02    2.0
328
329
330
2017-01-03    2.0
2017-01-04    2.0
""", oldstate)
331

332

333
def test_deletion(engine, tsh):
334
335
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
336
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
337

338
    _, ts = Snapshot(engine, tsh, 'ts_del').find()
339
    assert ts.iloc[-2] == 9.0
340

341
    ts_begin.iloc[0] = np.nan
342
    ts_begin.iloc[3] = np.nan
343

344
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
345

346
    assert_df("""
347
348
349
350
351
352
353
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
354
2010-01-10    9.0
355
""", tsh.get(engine, 'ts_del'))
356

357
    ts2 = tsh.get(engine, 'ts_del',
358
359
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
360
    assert (tsh.get(engine, 'ts_del') == ts2).all()
361

362
363
364
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

365
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
366

367
    assert_df("""
368
369
370
371
372
373
374
375
376
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
377
2010-01-10     9.0
378
""", tsh.get(engine, 'ts_del'))
379
380
381

    # now with string!

382
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
383
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
384
385
386
387

    ts_string[4] = None
    ts_string[5] = None

388
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
389
    assert_df("""
390
391
392
393
394
395
396
397
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
398
""", tsh.get(engine, 'ts_string_del'))
399
400
401
402

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

403
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
404
    assert_df("""
405
406
407
408
409
410
411
412
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
413
2010-01-10    machin
414
""", tsh.get(engine, 'ts_string_del'))
415

416
    ts_string[ts_string.index] = np.nan
417
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
418

419
    erased = tsh.get(engine, 'ts_string_del')
420
421
    assert len(erased) == 0

422
423
    # first insertion with only nan

424
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
425
    tsh.insert(engine, ts_begin, 'ts_null', 'test')
426

427
    assert len(tsh.get(engine, 'ts_null')) == 0
428

429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
    # exhibit issue with nans handling
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

448
449
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
450
451
    assert diff is None

452
    # there is no difference
453
    assert 0 == len(tsh.diff(ts_repushed, ts_repushed))
454
455
456
457
458

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
459
    diff = tsh.diff(ts_repushed, ts_add)
460
461
462
463
464
465

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
466
2010-01-13    12.0""", diff.sort_index())
467
468
469
470
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
471

Aurélien Campéas's avatar
Aurélien Campéas committed
472
    # full erasing
473
474
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
475
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
476

Aurélien Campéas's avatar
Aurélien Campéas committed
477
    ts_begin.iloc[:] = np.nan
478
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
479
480

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
481
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
482
483
484
485

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
486
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
487
488

    ts_begin.iloc[:] = np.nan
489
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
490
491

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
492
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
493

Aurélien Campéas's avatar
Aurélien Campéas committed
494

495
def test_get_history(engine, tsh):
496
497
    for numserie in (1, 2, 3):
        with engine.connect() as cn:
498
499
500
            tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie',
                       'aurelien.campeas@pythonian.fr',
                       _insertion_date=utcdt(2017, 2, numserie))
501
502
503
504
505
506
507
508
509
510
511

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
512
         'meta': {},
513
         'date': pd.Timestamp('2017-02-01 00:00:00+0000', tz='UTC'),
514
515
516
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
517
         'meta': {},
518
         'date': pd.Timestamp('2017-02-02 00:00:00+0000', tz='UTC'),
519
520
521
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
522
         'meta': {},
523
         'date': pd.Timestamp('2017-02-03 00:00:00+0000', tz='UTC'),
524
525
526
527
528
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')
529
    assert histts.name == 'smallserie'
530
531

    assert_df("""
532
533
534
535
536
537
538
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
539
540
""", histts)

541
542
    diffs = tsh.get_history(engine, 'smallserie', diffmode=True)
    assert_df("""
543
544
545
546
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-03    2.0
547
548
""", diffs)

549
    for idate in histts.index.get_level_values('insertion_date').unique():
550
        with engine.connect() as cn:
551
            idate = idate.replace(tzinfo=pytz.timezone('UTC'))
552
553
            tsh.insert(cn, histts[idate], 'smallserie2',
                       'aurelien.campeas@pythonian.f', _insertion_date=idate)
554
555
556
557
558
559
560
561
562

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
    assert (tsh.get_history(engine, 'smallserie2') == histts).all()

    # get history ranges
    tsa = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2))
    assert_df("""
563
564
565
566
567
568
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
                           2017-01-03    2.0
569
570
571
572
573
""", tsa)

    tsb = tsh.get_history(engine, 'smallserie',
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
574
575
576
577
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
578
579
580
581
582
583
""", tsb)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2),
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
584
585
586
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
587
588
""", tsc)

589
590
591
592
593
594
595
596
597
    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 4),
                          to_insertion_date=datetime(2017, 2, 4))
    assert tsc is None

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
598
599
600
601
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
602
603
604
605
606
607
608
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2016, 12, 31))
    assert tsc is None

609
610
611
612
613
    # restrictions on value dates
    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 1),
                          to_value_date=datetime(2017, 1, 2))
    assert_df("""
614
615
616
617
618
619
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
620
621
""", tsc)

622
623
624
625
626
    diffs = tsh.get_history(engine, 'smallserie',
                            diffmode=True,
                            from_value_date=datetime(2017, 1, 1),
                            to_value_date=datetime(2017, 1, 2))
    assert_df("""
627
628
629
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-02    1.0
630
631
""", diffs)

632
633
634
    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 2))
    assert_df("""
635
636
637
638
insertion_date             value_date
2017-02-02 00:00:00+00:00  2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-02    1.0
                           2017-01-03    2.0
639
640
641
642
643
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          to_value_date=datetime(2017, 1, 2))
    assert_df("""
644
645
646
647
648
649
insertion_date             value_date
2017-02-01 00:00:00+00:00  2017-01-01    0.0
2017-02-02 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
2017-02-03 00:00:00+00:00  2017-01-01    0.0
                           2017-01-02    1.0
650
651
""", tsc)

652

653
654
655
656
657
658
659
660
661
662
663
def test_nr_gethistory(engine, tsh):
    s0 = pd.Series([-1, 0, 0, -1],
                   index=pd.DatetimeIndex(start=datetime(2016, 12, 29),
                                          end=datetime(2017, 1, 1),
                                          freq='D'))
    tsh.insert(engine, s0, 'foo', 'zogzog')

    s1 = pd.Series([1, 0, 0, 1],
                   index=pd.DatetimeIndex(start=datetime(2017, 1, 1),
                                          end=datetime(2017, 1, 4),
                                          freq='D'))
664
    idate = utcdt(2016, 1, 1)
665
666
    for i in range(5):
        with engine.connect() as cn:
667
668
669
            tsh.insert(cn, s1 * i, 'foo',
                       'aurelien.campeas@pythonian.f',
                       _insertion_date=idate + timedelta(days=i))
670
671
672
673
674
675
676
677

    df = tsh.get_history(engine, 'foo',
                         datetime(2016, 1, 3),
                         datetime(2016, 1, 4),
                         datetime(2017, 1, 1),
                         datetime(2017, 1, 4))

    assert_df("""
678
679
680
681
682
683
684
685
686
insertion_date             value_date
2016-01-03 00:00:00+00:00  2017-01-01    2.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    2.0
2016-01-04 00:00:00+00:00  2017-01-01    3.0
                           2017-01-02    0.0
                           2017-01-03    0.0
                           2017-01-04    3.0
687
688
689
""", df)


690
def test_add_na(engine, tsh):
691
692
693
694
695
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

696
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
697
    assert len(diff) == 5
698
    result = tsh.get(engine, 'ts_add_na')
699
700
701
702
703
704
705
706
707
708
    assert len(result) == 0

    result = tsh.get(engine, 'ts_add_na', _keep_nans=True)
    assert_df("""
2010-01-01   NaN
2010-01-02   NaN
2010-01-03   NaN
2010-01-04   NaN
2010-01-05   NaN
""", result)
709
710
711

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
712
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
713
714
715
716
717

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

718
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
719
720
    assert diff is None

721
    result = tsh.get(engine, 'ts_add_na')
722
    assert len(result) == 5
723
724


725
def test_dtype_mismatch(engine, tsh):
726
    tsh.insert(engine,
727
728
729
730
731
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
732
        tsh.insert(engine,
733
734
735
736
737
738
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

739
    tsh.insert(engine,
740
741
742
743
744
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
745
        tsh.insert(engine,
746
747
748
749
750
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)
751
752


753
754
755
756
757
758
759
760
761
def test_precision(engine, tsh):
    floaty = 0.123456789123456789
    ts = genserie(datetime(2015, 1, 1), 'D', 5, initval=[floaty])

    tsh.insert(engine, ts, 'precision', 'test')
    ts_round = tsh.get(engine, 'precision')
    assert 0.12345678912346 == ts_round.iloc[0]

    diff = tsh.insert(engine, ts_round, 'precision', 'test')
Aurélien Campéas's avatar
Aurélien Campéas committed
762
    assert diff is None  # the roundtriped series does not produce a diff when reinserted
763

Aurélien Campéas's avatar
Aurélien Campéas committed
764
    diff = tsh.insert(engine, ts, 'precision', 'test')  # neither does the original series
765
766
767
    assert diff is None


768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
def test_strip(engine, tsh):
    for i in range(1, 5):
        pubdate = utcdt(2017, 1, i)
        ts = genserie(datetime(2017, 1, 10), 'H', 1 + i)
        tsh.insert(engine, ts, 'xserie', 'babar', _insertion_date=pubdate)
        # also insert something completely unrelated
        tsh.insert(engine, genserie(datetime(2018, 1, 1), 'D', 1 + i), 'yserie', 'celeste')

    csida = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    assert csida is not None
    csidb = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='before')
    csidc = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3, 1), mode='after')
    assert csidb < csida < csidc

    log = tsh.log(engine, names=['xserie', 'yserie'])
    assert [(idx, l['author']) for idx, l in enumerate(log, start=1)
    ] == [
        (1, 'babar'),
        (2, 'celeste'),
        (3, 'babar'),
        (4, 'celeste'),
        (5, 'babar'),
        (6, 'celeste'),
        (7, 'babar'),
        (8, 'celeste')
    ]

    h = tsh.get_history(engine, 'xserie')
    assert_df("""
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
2017-01-03 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
2017-01-04 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
                           2017-01-10 03:00:00    3.0
                           2017-01-10 04:00:00    4.0
""", h)

    csid = tsh.changeset_at(engine, 'xserie', datetime(2017, 1, 3))
    with engine.connect() as cn:
        tsh.strip(cn, 'xserie', csid)

    assert_df("""
insertion_date             value_date         
2017-01-01 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
2017-01-02 00:00:00+00:00  2017-01-10 00:00:00    0.0
                           2017-01-10 01:00:00    1.0
                           2017-01-10 02:00:00    2.0
""", tsh.get_history(engine, 'xserie'))

    assert_df("""
2017-01-10 00:00:00    0.0
2017-01-10 01:00:00    1.0
2017-01-10 02:00:00    2.0
""", tsh.get(engine, 'xserie'))

    # internal structure is ok
    with engine.connect() as cn:
        cn.execute('set search_path to "{}.timeserie"'.format(tsh.namespace))
        df = pd.read_sql("select id, diff from xserie order by id", cn)
        df['diff'] = df['diff'].apply(lambda x: False if x is None else True)

    assert_df("""
id   diff
0   1  False
1   2   True
""", df)

    log = tsh.log(engine, names=['xserie', 'yserie'])
    # 5 and 7 have disappeared
    assert [l['author'] for l in log
    ] == ['babar', 'celeste', 'babar', 'celeste', 'celeste', 'celeste']

    log = tsh.log(engine, stripped=True, names=['xserie', 'yserie'])
    assert [list(l['meta'].values())[0][:18] + 'X' for l in log if l['meta']
    ] == [
        'got stripped from X',
        'got stripped from X'
    ]
856
857
858
859
860
861
862
863
864
865
866
867


def test_prepend(engine, tsh):
    serie = genserie(datetime(2010, 1, 1), 'D', 40)

    assert 40 == len(serie)
    ts_insert = serie[2:]
    tsh.insert(engine, ts_insert, 'prepend', 'test')
    assert 38 == len(tsh.get(engine, 'prepend'))

    tsh.insert(engine, serie, 'prepend', 'test')
    assert 40 == len(tsh.get(engine, 'prepend'))