test_tsio.py 29.4 KB
Newer Older
1
2
3
# coding: utf-8
from pathlib import Path
from datetime import datetime
4
from time import time
5
from dateutil import parser
6
import calendar
7
8
9

import pandas as pd
import numpy as np
10
import pytest
11
from mock import patch
12

13
from tshistory.testutil import assert_group_equals, genserie, assert_df
14

15
DATADIR = Path(__file__).parent / 'data'
16

Aurélien Campéas's avatar
Aurélien Campéas committed
17

18
def test_changeset(engine, tsh):
19
    index = pd.date_range(start=datetime(2017, 1, 1), freq='D', periods=3)
20
    data = [1., 2., 3.]
21

22
23
    with patch('tshistory.tsio.datetime') as mock_date:
        mock_date.now.return_value = datetime(2020, 1, 1)
24
        with engine.connect() as cn:
25
26
27
            with tsh.newchangeset(cn, 'babar'):
                tsh.insert(cn, pd.Series(data, index=index), 'ts_values')
                tsh.insert(cn, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
28

29
30
        g = tsh.get_group(engine, 'ts_values')
        g2 = tsh.get_group(engine, 'ts_othervalues')
31
        assert_group_equals(g, g2)
32

33
        with pytest.raises(AssertionError):
34
            tsh.insert(engine, pd.Series([2, 3, 4], index=index), 'ts_values')
35

36
        with engine.connect() as cn:
37
            data.append(data.pop(0))
38
39
            with tsh.newchangeset(cn, 'celeste'):
                tsh.insert(cn, pd.Series(data, index=index), 'ts_values')
40
                # below should be a noop
41
                tsh.insert(cn, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
42

43
    g = tsh.get_group(engine, 'ts_values')
44
45
    assert ['ts_values'] == list(g.keys())

46
    assert_df("""
47
48
49
2017-01-01    2.0
2017-01-02    3.0
2017-01-03    1.0
50
""", tsh.get(engine, 'ts_values'))
51

52
    assert_df("""
53
54
55
2017-01-01    a
2017-01-02    b
2017-01-03    c
56
""", tsh.get(engine, 'ts_othervalues'))
57

58
    log = tsh.log(engine, names=['ts_values', 'ts_othervalues'])
59
60
61
62
63
64
65
66
67
68
69
    assert [
        {'author': 'babar',
         'rev': 1,
         'date': datetime(2020, 1, 1, 0, 0),
         'names': ['ts_values', 'ts_othervalues']},
        {'author': 'celeste',
         'rev': 2,
         'date': datetime(2020, 1, 1, 0, 0),
         'names': ['ts_values']}
    ] == log

70
    log = tsh.log(engine, names=['ts_othervalues'])
71
72
    assert len(log) == 1
    assert log[0]['rev'] == 1
73
    assert log[0]['names'] == ['ts_values', 'ts_othervalues']
74

75
    log = tsh.log(engine, fromrev=2)
76
77
    assert len(log) == 1

78
    log = tsh.log(engine, torev=1)
79
80
    assert len(log) == 1

81
    info = tsh.info(engine)
82
83
84
85
86
87
    assert {
        'changeset count': 2,
        'serie names': ['ts_othervalues', 'ts_values'],
        'series count': 2
    } == info

88

89
def test_tstamp_roundtrip(engine, tsh):
90
91
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
92
93
94
95
96
97
98
99
100
101
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

102
103
    tsh.insert(engine, ts, 'tztest', 'Babar')
    back = tsh.get(engine, 'tztest')
104
105
106
107
108
109
110
111
112
113
114
115
116

    # though un localized we understand it's been normalized to utc
    assert_df("""
2017-10-28 23:00:00    0.0
2017-10-29 00:00:00    1.0
2017-10-29 01:00:00    2.0
2017-10-29 02:00:00    3.0
""", back)

    back.index = back.index.tz_localize('UTC')
    assert (ts.index == back.index).all()


117
def test_differential(engine, tsh):
118
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
119
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
120

121
122
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
123

124
    assert_df("""
125
126
127
128
129
130
131
132
133
134
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
135
""", tsh.get(engine, 'ts_test'))
136
137

    # we should detect the emission of a message
138
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
139

140
    assert_df("""
141
142
143
144
145
146
147
148
149
150
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
151
""", tsh.get(engine, 'ts_test'))
152
153
154
155

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
156
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
157

158
    assert_df("""
159
160
161
162
163
164
165
166
167
168
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
169
""", tsh.get(engine, 'ts_test'))
170

171
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
172
173
174
175
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

176
    tsh.insert(engine, ts_longer, 'ts_test', 'test')
177

178
    assert_df("""
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
196
""", tsh.get(engine, 'ts_test'))
197
198

    # start testing manual overrides
199
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
200
    ts_begin.loc['2010-01-04'] = -1
201
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
202
203

    # -1 represents bogus upstream data
204
    assert_df("""
205
206
207
208
209
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
210
""", tsh.get(engine, 'ts_mixte'))
211
212

    # refresh all the period + 1 extra data point
213
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
214
    ts_more.loc['2010-01-04'] = -1
215
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
216

217
    assert_df("""
218
219
220
221
222
223
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
224
""", tsh.get(engine, 'ts_mixte'))
225
226

    # just append an extra data point
227
228
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
229
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
230

231
    assert_df("""
232
233
234
235
236
237
238
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
239
""", tsh.get(engine, 'ts_mixte'))
240

241
    with engine.connect() as cn:
242
        cn.execute('set search_path to "{0}.timeserie", {0}, public'.format(tsh.namespace))
243
244
245
        hist = pd.read_sql('select id, parent from ts_test order by id',
                           cn)
        assert_df("""
246
247
248
249
   id  parent
0   1     NaN
1   2     1.0
2   3     2.0
250
""", hist)
251

252
253
254
        hist = pd.read_sql('select id, parent from ts_mixte order by id',
                           cn)
        assert_df("""
255
256
257
258
   id  parent
0   1     NaN
1   2     1.0
2   3     2.0
259
""", hist)
260

261
262
263
        allts = pd.read_sql("select name, table_name from registry "
                            "where name in ('ts_test', 'ts_mixte')",
                            cn)
264

265
266
        assert_df("""
name              table_name
267
268
269
0   ts_test   {0}.timeserie.ts_test
1  ts_mixte  {0}.timeserie.ts_mixte
""".format(tsh.namespace), allts)
270

271
        assert_df("""
272
273
274
275
276
277
278
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
279
""", tsh.get(cn, 'ts_mixte',
280
             revision_date=datetime.now()))
281
282


283
def test_bad_import(engine, tsh):
284
285
286
287
288
    # the data were parsed as date by pd.read_json()
    df_result = pd.read_csv(DATADIR / 'test_data.csv')
    df_result['Gas Day'] = df_result['Gas Day'].apply(parser.parse, dayfirst=True, yearfirst=False)
    df_result.set_index('Gas Day', inplace=True)
    ts = df_result['SC']
289

290
291
    tsh.insert(engine, ts, 'SND_SC', 'test')
    result = tsh.get(engine, 'SND_SC')
292
    assert result.dtype == 'float64'
293
294
295

    # insertion of empty ts
    ts = pd.Series(name='truc', dtype='object')
296
297
    tsh.insert(engine, ts, 'empty_ts', 'test')
    assert tsh.get(engine, 'empty_ts') is None
298
299
300

    # nan in ts
    # all na
301
    ts = genserie(datetime(2010, 1, 10), 'D', 10, [np.nan], name='truc')
302
303
    tsh.insert(engine, ts, 'test_nan', 'test')
    assert tsh.get(engine, 'test_nan') is None
304
305
306
307
308

    # mixe na
    ts = pd.Series([np.nan] * 5 + [3] * 5,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
309
310
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
311

312
313
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
314
    assert_df("""
315
316
317
318
319
2010-01-15    3.0
2010-01-16    3.0
2010-01-17    3.0
2010-01-18    3.0
2010-01-19    3.0
320
""", result)
321
322
323

    # get_ts with name not in database

324
    tsh.get(engine, 'inexisting_name', 'test')
325
326


327
def test_revision_date(engine, tsh):
328
    idate1 = datetime(2015, 1, 1, 15, 43, 23)
329
    with tsh.newchangeset(engine, 'test', _insertion_date=idate1):
330

331
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
332
333
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
334

335
    idate2 = datetime(2015, 1, 2, 15, 43, 23)
336
    with tsh.newchangeset(engine, 'test', _insertion_date=idate2):
337

338
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
339
340
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
341

342
    idate3 = datetime(2015, 1, 3, 15, 43, 23)
343
    with tsh.newchangeset(engine, 'test', _insertion_date=idate3):
344

345
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
346
347
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
348

349
    ts = tsh.get(engine, 'ts_through_time')
350

351
    assert_df("""
352
353
354
355
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
356
""", ts)
357

358
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
359
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
360

361
    assert_df("""
362
363
364
365
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
366
""", ts)
367

368
    ts = tsh.get(engine, 'ts_through_time',
369
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
370

371
    assert_df("""
372
373
374
375
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
376
""", ts)
377

378
    ts = tsh.get(engine, 'ts_through_time',
379
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
380
381
382

    assert ts is None

383

384
def test_snapshots(engine, tsh):
385
    baseinterval = tsh._snapshot_interval
386
    tsh._snapshot_interval = 4
387

388
    with engine.connect() as cn:
389
        for tscount in range(1, 11):
390
            ts = genserie(datetime(2015, 1, 1), 'D', tscount, [1])
391
            diff = tsh.insert(cn, ts, 'growing', 'babar')
392
393
            assert diff.index[0] == diff.index[-1] == ts.index[-1]

394
    diff = tsh.insert(engine, ts, 'growing', 'babar')
395
    assert diff is None
396

397
    with engine.connect() as cn:
398
        cn.execute('set search_path to "{}.timeserie"'.format(tsh.namespace))
399
400
401
        df = pd.read_sql("select id from growing where snapshot is not null",
                         cn)
        assert_df("""
402
403
   id
0   1
404
405
406
1   4
2   8
3  10
407
""", df)
408

409
410
        ts = tsh.get(cn, 'growing')
        assert_df("""
411
412
413
414
415
416
417
418
419
420
2015-01-01    1.0
2015-01-02    1.0
2015-01-03    1.0
2015-01-04    1.0
2015-01-05    1.0
2015-01-06    1.0
2015-01-07    1.0
2015-01-08    1.0
2015-01-09    1.0
2015-01-10    1.0
421
""", ts)
422

423
424
425
        df = pd.read_sql("select id, diff, snapshot from growing order by id", cn)
        for attr in ('diff', 'snapshot'):
            df[attr] = df[attr].apply(lambda x: 0 if x is None else len(x))
426

427
        assert_df("""
428
429
430
431
432
433
434
435
436
437
438
id  diff  snapshot
0   1     0        35
1   2    36         0
2   3    36         0
3   4    36        47
4   5    36         0
5   6    36         0
6   7    36         0
7   8    36        59
8   9    36         0
9  10    36        67
439
""", df)
440

441
442
    table = tsh._get_ts_table(engine, 'growing')
    snapid, snap = tsh._find_snapshot(engine, table, ())
443
444
    assert snapid == 10
    assert (ts == snap).all()
445
    tsh._snapshot_interval = baseinterval
446
447


448
def test_deletion(engine, tsh):
449
450
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
451
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
452

453
    ts = tsh._build_snapshot_upto(engine, tsh._get_ts_table(engine, 'ts_del'))
454
    assert ts.iloc[-1] == 9.0
455

456
    ts_begin.iloc[0] = np.nan
457
    ts_begin.iloc[3] = np.nan
458

459
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
460

461
    assert_df("""
462
463
464
465
466
467
468
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
469
2010-01-10    9.0
470
""", tsh.get(engine, 'ts_del'))
471

472
    ts2 = tsh.get(engine, 'ts_del',
473
474
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
475
    assert (tsh.get(engine, 'ts_del') == ts2).all()
476

477
478
479
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

480
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
481

482
    assert_df("""
483
484
485
486
487
488
489
490
491
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
492
2010-01-10     9.0
493
""", tsh.get(engine, 'ts_del'))
494
495
496

    # now with string!

497
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
498
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
499
500
501
502

    ts_string[4] = None
    ts_string[5] = None

503
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
504
    assert_df("""
505
506
507
508
509
510
511
512
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
513
""", tsh.get(engine, 'ts_string_del'))
514
515
516
517

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

518
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
519
    assert_df("""
520
521
522
523
524
525
526
527
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
528
2010-01-10    machin
529
""", tsh.get(engine, 'ts_string_del'))
530

531
    ts_string[ts_string.index] = np.nan
532
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
533

534
    erased = tsh.get(engine, 'ts_string_del')
535
536
    assert len(erased) == 0

537
538
    # first insertion with only nan

539
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
540
    tsh.insert(engine, ts_begin, 'ts_null', 'test')
541

542
    assert tsh.get(engine, 'ts_null') is None
543

544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
    # exhibit issue with nans handling
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

563
564
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
565
566
    assert diff is None

567
    # there is no difference
568
    assert 0 == len(tsh._compute_diff(ts_repushed, ts_repushed))
569
570
571
572
573

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
574
    diff = tsh._compute_diff(ts_repushed, ts_add)
575
576
577
578
579
580

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
581
2010-01-13    12.0""", diff.sort_index())
582
583
584
585
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
586

Aurélien Campéas's avatar
Aurélien Campéas committed
587
    # full erasing
588
589
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
590
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
591

Aurélien Campéas's avatar
Aurélien Campéas committed
592
    ts_begin.iloc[:] = np.nan
593
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
594
595

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
596
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
597
598
599
600

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
601
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
602
603

    ts_begin.iloc[:] = np.nan
604
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
605
606

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
607
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
608

Aurélien Campéas's avatar
Aurélien Campéas committed
609

610
def test_multi_index(engine, tsh):
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
    appdate_0 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 2),
                                 freq='D').values
    pubdate_0 = [pd.datetime(2015, 1, 11, 12, 0, 0)] * 2
    insertion_date_0 = [pd.datetime(2015, 1, 11, 12, 30, 0)] * 2

    multi = [
        appdate_0,
        np.array(pubdate_0),
        np.array(insertion_date_0)
    ]

    ts_multi = pd.Series(range(2), index=multi)
    ts_multi.index.rename(['b', 'c', 'a'], inplace=True)

626
    tsh.insert(engine, ts_multi, 'ts_multi_simple', 'test')
627

628
    ts = tsh.get(engine, 'ts_multi_simple')
629
630
631
    assert_df("""
                                                    ts_multi_simple
a                   b          c                                   
632
633
2015-01-11 12:30:00 2015-01-01 2015-01-11 12:00:00              0.0
                    2015-01-02 2015-01-11 12:00:00              1.0
634
635
""", pd.DataFrame(ts))

636
    diff = tsh.insert(engine, ts_multi, 'ts_multi_simple', 'test')
637
638
639
640
641
    assert diff is None

    ts_multi_2 = pd.Series([0, 2], index=multi)
    ts_multi_2.index.rename(['b', 'c', 'a'], inplace=True)

642
643
    tsh.insert(engine, ts_multi_2, 'ts_multi_simple', 'test')
    ts = tsh.get(engine, 'ts_multi_simple')
644
645
646
647

    assert_df("""
                                                    ts_multi_simple
a                   b          c                                   
648
649
2015-01-11 12:30:00 2015-01-01 2015-01-11 12:00:00              0.0
                    2015-01-02 2015-01-11 12:00:00              2.0
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
""", pd.DataFrame(ts))

    # bigger ts
    appdate_0 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 4),
                                 freq='D').values
    pubdate_0 = [pd.datetime(2015, 1, 11, 12, 0, 0)] * 4
    insertion_date_0 = [pd.datetime(2015, 1, 11, 12, 30, 0)] * 4

    appdate_1 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 4),
                                 freq='D').values

    pubdate_1 = [pd.datetime(2015, 1, 21, 12, 0, 0)] * 4
    insertion_date_1 = [pd.datetime(2015, 1, 21, 12, 30, 0)] * 4

    multi = [
        np.concatenate([appdate_0, appdate_1]),
        np.array(pubdate_0 + pubdate_1),
        np.array(insertion_date_0 + insertion_date_1)
    ]

    ts_multi = pd.Series(range(8), index=multi)
    ts_multi.index.rename(['a', 'c', 'b'], inplace=True)

675
676
    tsh.insert(engine, ts_multi, 'ts_multi', 'test')
    ts = tsh.get(engine, 'ts_multi')
677
678
679
680

    assert_df("""
                                                    ts_multi
a          b                   c                            
681
682
683
684
685
686
687
688
2015-01-01 2015-01-11 12:30:00 2015-01-11 12:00:00       0.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
2015-01-02 2015-01-11 12:30:00 2015-01-11 12:00:00       1.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       5.0
2015-01-03 2015-01-11 12:30:00 2015-01-11 12:00:00       2.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       6.0
2015-01-04 2015-01-11 12:30:00 2015-01-11 12:00:00       3.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       7.0
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
    """, pd.DataFrame(ts.sort_index()))
    # Note: the columnns are returned according to the alphabetic order

    appdate_2 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 4),
                                 freq='D').values
    pubdate_2 = [pd.datetime(2015, 1, 31, 12, 0, 0)] * 4
    insertion_date_2 = [pd.datetime(2015, 1, 31, 12, 30, 0)] * 4

    multi_2 = [
        np.concatenate([appdate_1, appdate_2]),
        np.array(pubdate_1 + pubdate_2),
        np.array(insertion_date_1 + insertion_date_2)
    ]

    ts_multi_2 = pd.Series([4] * 8, index=multi_2)
    ts_multi_2.index.rename(['a', 'c', 'b'], inplace=True)

    # A second ts is inserted with some index in common with the first
    # one: appdate_1, pubdate_1,and insertion_date_1. The value is set
    # at 4, which matches the previous value of the "2015-01-01" point.

711
    diff = tsh.insert(engine, ts_multi_2, 'ts_multi', 'test')
712
713
714
715
716
717
718
719
720
721
722
723
724
725
    assert_df("""
                                                    ts_multi
a          b                   c                            
2015-01-01 2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-02 2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-03 2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-04 2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
        """, pd.DataFrame(diff.sort_index()))
    # the differential skips a value for "2015-01-01"
    # which does not change from the previous ts

726
    ts = tsh.get(engine, 'ts_multi')
727
728
729
    assert_df("""
                                                    ts_multi
a          b                   c                            
730
731
732
733
734
735
736
737
738
739
740
741
2015-01-01 2015-01-11 12:30:00 2015-01-11 12:00:00       0.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-02 2015-01-11 12:30:00 2015-01-11 12:00:00       1.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-03 2015-01-11 12:30:00 2015-01-11 12:00:00       2.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-04 2015-01-11 12:30:00 2015-01-11 12:00:00       3.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
742
743
744
        """, pd.DataFrame(ts.sort_index()))

    # the result ts have now 3 values for each point in 'a'
745
746


747
def test_get_history(engine, tsh):
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
    for numserie in (1, 2, 3):
        with engine.connect() as cn:
            with tsh.newchangeset(cn, 'aurelien.campeas@pythonian.fr',
                                  _insertion_date=datetime(2017, 2, numserie)):
                tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie')

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
         'date': datetime(2017, 2, 1, 0, 0),
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
         'date': datetime(2017, 2, 2, 0, 0),
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
         'date': datetime(2017, 2, 3, 0, 0),
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')
778
    assert histts.name == 'smallserie'
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830

    assert_df("""
insertion_date  value_date
2017-02-01      2017-01-01    0.0
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
2017-02-03      2017-01-01    0.0
                2017-01-02    1.0
                2017-01-03    2.0
""", histts)

    for idx, idate in enumerate(histts.groupby('insertion_date').groups):
        with engine.connect() as cn:
            with tsh.newchangeset(cn, 'aurelien.campeas@pythonian.f',
                                  _insertion_date=idate):
                tsh.insert(cn, histts[idate], 'smallserie2')

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
    assert (tsh.get_history(engine, 'smallserie2') == histts).all()

    # get history ranges
    tsa = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2))
    assert_df("""
insertion_date  value_date
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
2017-02-03      2017-01-01    0.0
                2017-01-02    1.0
                2017-01-03    2.0
""", tsa)

    tsb = tsh.get_history(engine, 'smallserie',
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
insertion_date  value_date
2017-02-01      2017-01-01    0.0
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
""", tsb)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2),
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
insertion_date  value_date
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
""", tsc)


831
def test_add_na(engine, tsh):
832
833
834
835
836
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

837
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
838
    assert diff is None
839
    result = tsh.get(engine, 'ts_add_na')
840
841
842
843
    assert result is None

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
844
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
845
846
847
848
849

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

850
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
851
852
    assert diff is None

853
    result = tsh.get(engine, 'ts_add_na')
854
    assert len(result) == 5
855
856


857
def test_dtype_mismatch(engine, tsh):
858
    tsh.insert(engine,
859
860
861
862
863
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
864
        tsh.insert(engine,
865
866
867
868
869
870
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

871
    tsh.insert(engine,
872
873
874
875
876
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
877
        tsh.insert(engine,
878
879
880
881
882
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)
883
884


885
@pytest.mark.perf
886
def test_bigdata(engine, tracker, tsh):
887
888
889
890
891
892
893
    def create_data():
        for year in range(2015, 2020):
            serie = genserie(datetime(year, 1, 1), '10Min', 6 * 24 * 365)
            tsh.insert(engine, serie, 'big', 'aurelien.campeas@pythonian.fr')

    t0 = time()
    create_data()
894
    t1 = time() - t0
895
    tshclass = tsh.__class__.__name__
896

897
    with engine.connect() as cn:
898
        cn.execute('set search_path to "{}.timeserie"'.format(tsh.namespace))
899
900
        df = pd.read_sql('select id, diff, snapshot from big order by id', cn)

901
902
903
    for attr in ('diff', 'snapshot'):
        df[attr] = df[attr].apply(lambda x: 0 if x is None else len(x))

904
    size = df[['diff', 'snapshot']].sum().to_dict()
905
    tracker.append({'test': 'bigdata_insert',
906
907
908
909
                    'class': tshclass,
                    'time': t1,
                    'diffsize': size['diff'],
                    'snapsize': size['snapshot']})
910

911
912
913
914
915
916
917
918
919
    t0 = time()
    tsh.get_history(engine, 'big')
    t1 = time() - t0
    tracker.append({'test': 'bigdata_gethistory',
                    'class': tshclass,
                    'time': t1,
                    'diffsize': None,
                    'snapsize': None})

920

921
@pytest.mark.perf
922
def test_lots_of_diffs(engine, tracker, tsh):
923
924
925
926
927
928
929
930
931
932
    def create_data():
        for month in range(1, 4):
            days = calendar.monthrange(2017, month)[1]
            for day in range(1, days+1):
                serie = genserie(datetime(2017, month, day), '10Min', 6*24)
                with engine.connect() as cn:
                    tsh.insert(cn, serie, 'manydiffs', 'aurelien.campeas@pythonian.fr')

    t0 = time()
    create_data()
933
    t1 = time() - t0
934
    tshclass = tsh.__class__.__name__
935

936
    with engine.connect() as cn:
937
        cn.execute('set search_path to "{}.timeserie"'.format(tsh.namespace))
938
939
        df = pd.read_sql("select id, diff, snapshot from manydiffs order by id ",
                         cn)
940
941
942
    for attr in ('diff', 'snapshot'):
        df[attr] = df[attr].apply(lambda x: 0 if x is None else len(x))

943
    size = df[['diff', 'snapshot']].sum().to_dict()
944
    tracker.append({'test': 'lots_of_diffs_insert',
945
946
947
948
                    'class': tshclass,
                    'time': t1,
                    'diffsize': size['diff'],
                    'snapsize': size['snapshot']})
949
950
951
952
953
954
955
956
957

    t0 = time()
    tsh.get_history(engine, 'manydiffs')
    t1 = time() - t0
    tracker.append({'test': 'lots_of_diffs_gethistory',
                    'class': tshclass,
                    'time': t1,
                    'diffsize': None,
                    'snapsize': None})