test_tsio.py 28.3 KB
Newer Older
1
2
3
# coding: utf-8
from pathlib import Path
from datetime import datetime
4
from time import time
5
from dateutil import parser
6
import calendar
7
8
9

import pandas as pd
import numpy as np
10
import pytest
11
from mock import patch
12

13
from tshistory.testutil import assert_group_equals, genserie, assert_df
14

15
DATADIR = Path(__file__).parent / 'data'
16

Aurélien Campéas's avatar
Aurélien Campéas committed
17

18
def test_changeset(engine, tsh):
19
    index = pd.date_range(start=datetime(2017, 1, 1), freq='D', periods=3)
20
    data = [1., 2., 3.]
21

22
23
    with patch('tshistory.tsio.datetime') as mock_date:
        mock_date.now.return_value = datetime(2020, 1, 1)
24
        with engine.connect() as cn:
25
26
27
            with tsh.newchangeset(cn, 'babar'):
                tsh.insert(cn, pd.Series(data, index=index), 'ts_values')
                tsh.insert(cn, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
28

29
30
        g = tsh.get_group(engine, 'ts_values')
        g2 = tsh.get_group(engine, 'ts_othervalues')
31
        assert_group_equals(g, g2)
32

33
        with pytest.raises(AssertionError):
34
            tsh.insert(engine, pd.Series([2, 3, 4], index=index), 'ts_values')
35

36
        with engine.connect() as cn:
37
            data.append(data.pop(0))
38
39
            with tsh.newchangeset(cn, 'celeste'):
                tsh.insert(cn, pd.Series(data, index=index), 'ts_values')
40
                # below should be a noop
41
                tsh.insert(cn, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
42

43
    g = tsh.get_group(engine, 'ts_values')
44
45
    assert ['ts_values'] == list(g.keys())

46
    assert_df("""
47
48
49
2017-01-01    2.0
2017-01-02    3.0
2017-01-03    1.0
50
""", tsh.get(engine, 'ts_values'))
51

52
    assert_df("""
53
54
55
2017-01-01    a
2017-01-02    b
2017-01-03    c
56
""", tsh.get(engine, 'ts_othervalues'))
57

58
    log = tsh.log(engine, names=['ts_values', 'ts_othervalues'])
59
60
61
62
63
64
65
66
67
68
69
    assert [
        {'author': 'babar',
         'rev': 1,
         'date': datetime(2020, 1, 1, 0, 0),
         'names': ['ts_values', 'ts_othervalues']},
        {'author': 'celeste',
         'rev': 2,
         'date': datetime(2020, 1, 1, 0, 0),
         'names': ['ts_values']}
    ] == log

70
    log = tsh.log(engine, names=['ts_othervalues'])
71
72
    assert len(log) == 1
    assert log[0]['rev'] == 1
73
    assert log[0]['names'] == ['ts_values', 'ts_othervalues']
74

75
    log = tsh.log(engine, fromrev=2)
76
77
    assert len(log) == 1

78
    log = tsh.log(engine, torev=1)
79
80
    assert len(log) == 1

81
    info = tsh.info(engine)
82
83
84
85
86
87
    assert {
        'changeset count': 2,
        'serie names': ['ts_othervalues', 'ts_values'],
        'series count': 2
    } == info

88

89
def test_tstamp_roundtrip(engine, tsh):
90
91
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
92
93
94
95
96
97
98
99
100
101
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

102
103
    tsh.insert(engine, ts, 'tztest', 'Babar')
    back = tsh.get(engine, 'tztest')
104
105
106
107
108
109
110
111
112
113
114
115
116

    # though un localized we understand it's been normalized to utc
    assert_df("""
2017-10-28 23:00:00    0.0
2017-10-29 00:00:00    1.0
2017-10-29 01:00:00    2.0
2017-10-29 02:00:00    3.0
""", back)

    back.index = back.index.tz_localize('UTC')
    assert (ts.index == back.index).all()


117
def test_differential(engine, tsh):
118
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
119
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
120

121
122
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
123

124
    assert_df("""
125
126
127
128
129
130
131
132
133
134
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
135
""", tsh.get(engine, 'ts_test'))
136
137

    # we should detect the emission of a message
138
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
139

140
    assert_df("""
141
142
143
144
145
146
147
148
149
150
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
151
""", tsh.get(engine, 'ts_test'))
152
153
154
155

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
156
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
157

158
    assert_df("""
159
160
161
162
163
164
165
166
167
168
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
169
""", tsh.get(engine, 'ts_test'))
170

171
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
172
173
174
175
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

176
    tsh.insert(engine, ts_longer, 'ts_test', 'test')
177

178
    assert_df("""
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
196
""", tsh.get(engine, 'ts_test'))
197
198

    # start testing manual overrides
199
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
200
    ts_begin.loc['2010-01-04'] = -1
201
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
202
203

    # -1 represents bogus upstream data
204
    assert_df("""
205
206
207
208
209
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
210
""", tsh.get(engine, 'ts_mixte'))
211
212

    # refresh all the period + 1 extra data point
213
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
214
    ts_more.loc['2010-01-04'] = -1
215
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
216

217
    assert_df("""
218
219
220
221
222
223
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
224
""", tsh.get(engine, 'ts_mixte'))
225
226

    # just append an extra data point
227
228
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
229
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
230

231
    assert_df("""
232
233
234
235
236
237
238
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
239
""", tsh.get(engine, 'ts_mixte'))
240

241
    hist = pd.read_sql('select id, parent from timeserie.ts_test order by id',
Aurélien Campéas's avatar
Aurélien Campéas committed
242
                       engine)
243
    assert_df("""
244
245
246
247
   id  parent
0   1     NaN
1   2     1.0
2   3     2.0
248
""", hist)
249

250
    hist = pd.read_sql('select id, parent from timeserie.ts_mixte order by id',
Aurélien Campéas's avatar
Aurélien Campéas committed
251
                       engine)
252
    assert_df("""
253
254
255
256
   id  parent
0   1     NaN
1   2     1.0
2   3     2.0
257
""", hist)
258

259
    allts = pd.read_sql("select name, table_name from registry "
260
                        "where name in ('ts_test', 'ts_mixte')",
261
262
                        engine)

263
    assert_df("""
264
265
266
       name          table_name
0   ts_test   timeserie.ts_test
1  ts_mixte  timeserie.ts_mixte
267
""", allts)
268

269
    assert_df("""
270
271
272
273
274
275
276
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
277
""", tsh.get(engine, 'ts_mixte',
278
             revision_date=datetime.now()))
279
280


281
def test_bad_import(engine, tsh):
282
283
284
285
286
    # the data were parsed as date by pd.read_json()
    df_result = pd.read_csv(DATADIR / 'test_data.csv')
    df_result['Gas Day'] = df_result['Gas Day'].apply(parser.parse, dayfirst=True, yearfirst=False)
    df_result.set_index('Gas Day', inplace=True)
    ts = df_result['SC']
287

288
289
    tsh.insert(engine, ts, 'SND_SC', 'test')
    result = tsh.get(engine, 'SND_SC')
290
    assert result.dtype == 'float64'
291
292
293

    # insertion of empty ts
    ts = pd.Series(name='truc', dtype='object')
294
295
    tsh.insert(engine, ts, 'empty_ts', 'test')
    assert tsh.get(engine, 'empty_ts') is None
296
297
298

    # nan in ts
    # all na
299
    ts = genserie(datetime(2010, 1, 10), 'D', 10, [np.nan], name='truc')
300
301
    tsh.insert(engine, ts, 'test_nan', 'test')
    assert tsh.get(engine, 'test_nan') is None
302
303
304
305
306

    # mixe na
    ts = pd.Series([np.nan] * 5 + [3] * 5,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
307
308
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
309

310
311
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
312
    assert_df("""
313
314
315
316
317
2010-01-15    3.0
2010-01-16    3.0
2010-01-17    3.0
2010-01-18    3.0
2010-01-19    3.0
318
""", result)
319
320
321

    # get_ts with name not in database

322
    tsh.get(engine, 'inexisting_name', 'test')
323
324


325
def test_revision_date(engine, tsh):
326
    idate1 = datetime(2015, 1, 1, 15, 43, 23)
327
    with tsh.newchangeset(engine, 'test', _insertion_date=idate1):
328

329
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
330
331
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
332

333
    idate2 = datetime(2015, 1, 2, 15, 43, 23)
334
    with tsh.newchangeset(engine, 'test', _insertion_date=idate2):
335

336
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
337
338
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
339

340
    idate3 = datetime(2015, 1, 3, 15, 43, 23)
341
    with tsh.newchangeset(engine, 'test', _insertion_date=idate3):
342

343
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
344
345
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
346

347
    ts = tsh.get(engine, 'ts_through_time')
348

349
    assert_df("""
350
351
352
353
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
354
""", ts)
355

356
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
357
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
358

359
    assert_df("""
360
361
362
363
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
364
""", ts)
365

366
    ts = tsh.get(engine, 'ts_through_time',
367
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
368

369
    assert_df("""
370
371
372
373
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
374
""", ts)
375

376
    ts = tsh.get(engine, 'ts_through_time',
377
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
378
379
380

    assert ts is None

381

382
def test_snapshots(engine, tsh):
383
    baseinterval = tsh._snapshot_interval
384
    tsh._snapshot_interval = 4
385

386
    with engine.connect() as cn:
387
        for tscount in range(1, 11):
388
            ts = genserie(datetime(2015, 1, 1), 'D', tscount, [1])
389
            diff = tsh.insert(cn, ts, 'growing', 'babar')
390
391
            assert diff.index[0] == diff.index[-1] == ts.index[-1]

392
    diff = tsh.insert(engine, ts, 'growing', 'babar')
393
    assert diff is None
394

395
396
    df = pd.read_sql("select id from timeserie.growing where snapshot is not null",
                     engine)
397
    assert_df("""
398
399
   id
0   1
400
401
402
1   4
2   8
3  10
403
""", df)
404

405
    ts = tsh.get(engine, 'growing')
406
    assert_df("""
407
408
409
410
411
412
413
414
415
416
2015-01-01    1.0
2015-01-02    1.0
2015-01-03    1.0
2015-01-04    1.0
2015-01-05    1.0
2015-01-06    1.0
2015-01-07    1.0
2015-01-08    1.0
2015-01-09    1.0
2015-01-10    1.0
417
""", ts)
418

419
    df = pd.read_sql("select id, diff, snapshot from timeserie.growing order by id", engine)
420
421
422
    for attr in ('diff', 'snapshot'):
        df[attr] = df[attr].apply(lambda x: 0 if x is None else len(x))

423
424
425
426
427
428
429
430
431
432
433
434
    assert_df("""
id  diff  snapshot
0   1     0        35
1   2    36         0
2   3    36         0
3   4    36        47
4   5    36         0
5   6    36         0
6   7    36         0
7   8    36        59
8   9    36         0
9  10    36        67
435
""", df)
436

437
438
    table = tsh._get_ts_table(engine, 'growing')
    snapid, snap = tsh._find_snapshot(engine, table, ())
439
440
    assert snapid == 10
    assert (ts == snap).all()
441
    tsh._snapshot_interval = baseinterval
442
443


444
def test_deletion(engine, tsh):
445
446
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
447
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
448

449
    ts = tsh._build_snapshot_upto(engine, tsh._get_ts_table(engine, 'ts_del'))
450
    assert ts.iloc[-1] == 9.0
451

452
    ts_begin.iloc[0] = np.nan
453
    ts_begin.iloc[3] = np.nan
454

455
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
456

457
    assert_df("""
458
459
460
461
462
463
464
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
465
2010-01-10    9.0
466
""", tsh.get(engine, 'ts_del'))
467

468
    ts2 = tsh.get(engine, 'ts_del',
469
470
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
471
    assert (tsh.get(engine, 'ts_del') == ts2).all()
472

473
474
475
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

476
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
477

478
    assert_df("""
479
480
481
482
483
484
485
486
487
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
488
2010-01-10     9.0
489
""", tsh.get(engine, 'ts_del'))
490
491
492

    # now with string!

493
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
494
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
495
496
497
498

    ts_string[4] = None
    ts_string[5] = None

499
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
500
    assert_df("""
501
502
503
504
505
506
507
508
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
509
""", tsh.get(engine, 'ts_string_del'))
510
511
512
513

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

514
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
515
    assert_df("""
516
517
518
519
520
521
522
523
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
524
2010-01-10    machin
525
""", tsh.get(engine, 'ts_string_del'))
526

527
    ts_string[ts_string.index] = np.nan
528
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
529

530
    erased = tsh.get(engine, 'ts_string_del')
531
532
    assert len(erased) == 0

533
534
    # first insertion with only nan

535
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
536
    tsh.insert(engine, ts_begin, 'ts_null', 'test')
537

538
    assert tsh.get(engine, 'ts_null') is None
539

540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
    # exhibit issue with nans handling
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

559
560
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
561
562
    assert diff is None

563
    # there is no difference
564
    assert 0 == len(tsh._compute_diff(ts_repushed, ts_repushed))
565
566
567
568
569

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
570
    diff = tsh._compute_diff(ts_repushed, ts_add)
571
572
573
574
575
576

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
577
2010-01-13    12.0""", diff.sort_index())
578
579
580
581
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
582

Aurélien Campéas's avatar
Aurélien Campéas committed
583
    # full erasing
584
585
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
586
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
587

Aurélien Campéas's avatar
Aurélien Campéas committed
588
    ts_begin.iloc[:] = np.nan
589
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
590
591

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
592
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
593
594
595
596

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
597
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
598
599

    ts_begin.iloc[:] = np.nan
600
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
601
602

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
603
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
604

Aurélien Campéas's avatar
Aurélien Campéas committed
605

606
def test_multi_index(engine, tsh):
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
    appdate_0 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 2),
                                 freq='D').values
    pubdate_0 = [pd.datetime(2015, 1, 11, 12, 0, 0)] * 2
    insertion_date_0 = [pd.datetime(2015, 1, 11, 12, 30, 0)] * 2

    multi = [
        appdate_0,
        np.array(pubdate_0),
        np.array(insertion_date_0)
    ]

    ts_multi = pd.Series(range(2), index=multi)
    ts_multi.index.rename(['b', 'c', 'a'], inplace=True)

622
    tsh.insert(engine, ts_multi, 'ts_multi_simple', 'test')
623

624
    ts = tsh.get(engine, 'ts_multi_simple')
625
626
627
    assert_df("""
                                                    ts_multi_simple
a                   b          c                                   
628
629
2015-01-11 12:30:00 2015-01-01 2015-01-11 12:00:00              0.0
                    2015-01-02 2015-01-11 12:00:00              1.0
630
631
""", pd.DataFrame(ts))

632
    diff = tsh.insert(engine, ts_multi, 'ts_multi_simple', 'test')
633
634
635
636
637
    assert diff is None

    ts_multi_2 = pd.Series([0, 2], index=multi)
    ts_multi_2.index.rename(['b', 'c', 'a'], inplace=True)

638
639
    tsh.insert(engine, ts_multi_2, 'ts_multi_simple', 'test')
    ts = tsh.get(engine, 'ts_multi_simple')
640
641
642
643

    assert_df("""
                                                    ts_multi_simple
a                   b          c                                   
644
645
2015-01-11 12:30:00 2015-01-01 2015-01-11 12:00:00              0.0
                    2015-01-02 2015-01-11 12:00:00              2.0
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
""", pd.DataFrame(ts))

    # bigger ts
    appdate_0 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 4),
                                 freq='D').values
    pubdate_0 = [pd.datetime(2015, 1, 11, 12, 0, 0)] * 4
    insertion_date_0 = [pd.datetime(2015, 1, 11, 12, 30, 0)] * 4

    appdate_1 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 4),
                                 freq='D').values

    pubdate_1 = [pd.datetime(2015, 1, 21, 12, 0, 0)] * 4
    insertion_date_1 = [pd.datetime(2015, 1, 21, 12, 30, 0)] * 4

    multi = [
        np.concatenate([appdate_0, appdate_1]),
        np.array(pubdate_0 + pubdate_1),
        np.array(insertion_date_0 + insertion_date_1)
    ]

    ts_multi = pd.Series(range(8), index=multi)
    ts_multi.index.rename(['a', 'c', 'b'], inplace=True)

671
672
    tsh.insert(engine, ts_multi, 'ts_multi', 'test')
    ts = tsh.get(engine, 'ts_multi')
673
674
675
676

    assert_df("""
                                                    ts_multi
a          b                   c                            
677
678
679
680
681
682
683
684
2015-01-01 2015-01-11 12:30:00 2015-01-11 12:00:00       0.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
2015-01-02 2015-01-11 12:30:00 2015-01-11 12:00:00       1.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       5.0
2015-01-03 2015-01-11 12:30:00 2015-01-11 12:00:00       2.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       6.0
2015-01-04 2015-01-11 12:30:00 2015-01-11 12:00:00       3.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       7.0
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
    """, pd.DataFrame(ts.sort_index()))
    # Note: the columnns are returned according to the alphabetic order

    appdate_2 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 4),
                                 freq='D').values
    pubdate_2 = [pd.datetime(2015, 1, 31, 12, 0, 0)] * 4
    insertion_date_2 = [pd.datetime(2015, 1, 31, 12, 30, 0)] * 4

    multi_2 = [
        np.concatenate([appdate_1, appdate_2]),
        np.array(pubdate_1 + pubdate_2),
        np.array(insertion_date_1 + insertion_date_2)
    ]

    ts_multi_2 = pd.Series([4] * 8, index=multi_2)
    ts_multi_2.index.rename(['a', 'c', 'b'], inplace=True)

    # A second ts is inserted with some index in common with the first
    # one: appdate_1, pubdate_1,and insertion_date_1. The value is set
    # at 4, which matches the previous value of the "2015-01-01" point.

707
    diff = tsh.insert(engine, ts_multi_2, 'ts_multi', 'test')
708
709
710
711
712
713
714
715
716
717
718
719
720
721
    assert_df("""
                                                    ts_multi
a          b                   c                            
2015-01-01 2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-02 2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-03 2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-04 2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
        """, pd.DataFrame(diff.sort_index()))
    # the differential skips a value for "2015-01-01"
    # which does not change from the previous ts

722
    ts = tsh.get(engine, 'ts_multi')
723
724
725
    assert_df("""
                                                    ts_multi
a          b                   c                            
726
727
728
729
730
731
732
733
734
735
736
737
2015-01-01 2015-01-11 12:30:00 2015-01-11 12:00:00       0.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-02 2015-01-11 12:30:00 2015-01-11 12:00:00       1.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-03 2015-01-11 12:30:00 2015-01-11 12:00:00       2.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-04 2015-01-11 12:30:00 2015-01-11 12:00:00       3.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
738
739
740
        """, pd.DataFrame(ts.sort_index()))

    # the result ts have now 3 values for each point in 'a'
741
742


743
def test_get_history(engine, tsh):
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
    for numserie in (1, 2, 3):
        with engine.connect() as cn:
            with tsh.newchangeset(cn, 'aurelien.campeas@pythonian.fr',
                                  _insertion_date=datetime(2017, 2, numserie)):
                tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie')

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
         'date': datetime(2017, 2, 1, 0, 0),
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
         'date': datetime(2017, 2, 2, 0, 0),
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
         'date': datetime(2017, 2, 3, 0, 0),
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')

    assert_df("""
insertion_date  value_date
2017-02-01      2017-01-01    0.0
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
2017-02-03      2017-01-01    0.0
                2017-01-02    1.0
                2017-01-03    2.0
""", histts)

    for idx, idate in enumerate(histts.groupby('insertion_date').groups):
        with engine.connect() as cn:
            with tsh.newchangeset(cn, 'aurelien.campeas@pythonian.f',
                                  _insertion_date=idate):
                tsh.insert(cn, histts[idate], 'smallserie2')

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
    assert (tsh.get_history(engine, 'smallserie2') == histts).all()

    # get history ranges
    tsa = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2))
    assert_df("""
insertion_date  value_date
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
2017-02-03      2017-01-01    0.0
                2017-01-02    1.0
                2017-01-03    2.0
""", tsa)

    tsb = tsh.get_history(engine, 'smallserie',
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
insertion_date  value_date
2017-02-01      2017-01-01    0.0
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
""", tsb)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2),
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
insertion_date  value_date
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
""", tsc)


826
def test_add_na(engine, tsh):
827
828
829
830
831
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

832
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
833
    assert diff is None
834
    result = tsh.get(engine, 'ts_add_na')
835
836
837
838
    assert result is None

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
839
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
840
841
842
843
844

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

845
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
846
847
    assert diff is None

848
    result = tsh.get(engine, 'ts_add_na')
849
    assert len(result) == 5
850
851


852
def test_dtype_mismatch(engine, tsh):
853
    tsh.insert(engine,
854
855
856
857
858
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
859
        tsh.insert(engine,
860
861
862
863
864
865
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

866
    tsh.insert(engine,
867
868
869
870
871
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
872
        tsh.insert(engine,
873
874
875
876
877
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)
878
879


880
@pytest.mark.perf
881
def test_bigdata(engine, tracker, tsh):
882
883
884
885
886
887
888
    def create_data():
        for year in range(2015, 2020):
            serie = genserie(datetime(year, 1, 1), '10Min', 6 * 24 * 365)
            tsh.insert(engine, serie, 'big', 'aurelien.campeas@pythonian.fr')

    t0 = time()
    create_data()
889
    t1 = time() - t0
890
    tshclass = tsh.__class__.__name__
891
892
893
894
895

    df = pd.read_sql("select id, diff, snapshot from timeserie.big order by id", engine)
    for attr in ('diff', 'snapshot'):
        df[attr] = df[attr].apply(lambda x: 0 if x is None else len(x))

896
897
898
899
900
901
    size = df[['diff', 'snapshot']].sum().to_dict()
    tracker.append({'test': 'bigdata',
                    'class': tshclass,
                    'time': t1,
                    'diffsize': size['diff'],
                    'snapsize': size['snapshot']})
902
903


904
@pytest.mark.perf
905
def test_lots_of_diffs(engine, tracker, tsh):
906
907
908
909
910
911
912
913
914
915
    def create_data():
        for month in range(1, 4):
            days = calendar.monthrange(2017, month)[1]
            for day in range(1, days+1):
                serie = genserie(datetime(2017, month, day), '10Min', 6*24)
                with engine.connect() as cn:
                    tsh.insert(cn, serie, 'manydiffs', 'aurelien.campeas@pythonian.fr')

    t0 = time()
    create_data()
916
    t1 = time() - t0
917
    tshclass = tsh.__class__.__name__
918
919
920
921
922
923

    df = pd.read_sql("select id, diff, snapshot from timeserie.manydiffs order by id ",
                     engine)
    for attr in ('diff', 'snapshot'):
        df[attr] = df[attr].apply(lambda x: 0 if x is None else len(x))

924
925
926
927
928
929
    size = df[['diff', 'snapshot']].sum().to_dict()
    tracker.append({'test': 'lots_of_diffs',
                    'class': tshclass,
                    'time': t1,
                    'diffsize': size['diff'],
                    'snapsize': size['snapshot']})