test_tsio.py 24.6 KB
Newer Older
1
2
3
4
5
6
7
# coding: utf-8
from pathlib import Path
from datetime import datetime
from dateutil import parser

import pandas as pd
import numpy as np
8
import pytest
9
from mock import patch
10

11
from tshistory.tsio import TimeSerie
12
13
14
15

DATADIR = Path(__file__).parent / 'data'


16
17
18
19
20
21
22
def assert_group_equals(g1, g2):
    for (n1, s1), (n2, s2) in zip(sorted(g1.items()),
                                  sorted(g2.items())):
        assert n1 == n2
        assert s1.equals(s2)


23
24
25
26
def assert_df(expected, df):
    assert expected.strip() == df.to_string().strip()


27
28
29
30
31
32
33
34
35
36
37
38
def genserie(start, freq, repeat, initval=None, tz=None, name=None):
    if initval is None:
        values = range(repeat)
    else:
        values = initval * repeat
    return pd.Series(values,
                     name=name,
                     index=pd.date_range(start=start,
                                         freq=freq,
                                         periods=repeat,
                                         tz=tz))

Aurélien Campéas's avatar
Aurélien Campéas committed
39

40
def test_changeset(engine):
41
    # instantiate one time serie handler object
42
    tsh = TimeSerie()
43
44

    index = pd.date_range(start=datetime(2017, 1, 1), freq='D', periods=3)
45
    data = [1., 2., 3.]
46

47
48
    with patch('tshistory.tsio.datetime') as mock_date:
        mock_date.now.return_value = datetime(2020, 1, 1)
49
        with engine.connect() as cn:
50
51
52
            with tsh.newchangeset(cn, 'babar'):
                tsh.insert(cn, pd.Series(data, index=index), 'ts_values')
                tsh.insert(cn, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
53

54
55
        g = tsh.get_group(engine, 'ts_values')
        g2 = tsh.get_group(engine, 'ts_othervalues')
56
        assert_group_equals(g, g2)
57

58
        with pytest.raises(AssertionError):
59
            tsh.insert(engine, pd.Series([2, 3, 4], index=index), 'ts_values')
60

61
        with engine.connect() as cn:
62
            data.append(data.pop(0))
63
64
            with tsh.newchangeset(cn, 'celeste'):
                tsh.insert(cn, pd.Series(data, index=index), 'ts_values')
65
                # below should be a noop
66
                tsh.insert(cn, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
67

68
    g = tsh.get_group(engine, 'ts_values')
69
70
    assert ['ts_values'] == list(g.keys())

71
    assert_df("""
72
73
74
2017-01-01    2.0
2017-01-02    3.0
2017-01-03    1.0
75
""", tsh.get(engine, 'ts_values'))
76

77
    assert_df("""
78
79
80
2017-01-01    a
2017-01-02    b
2017-01-03    c
81
""", tsh.get(engine, 'ts_othervalues'))
82

83
    log = tsh.log(engine)
84
85
86
87
88
89
90
91
92
93
94
    assert [
        {'author': 'babar',
         'rev': 1,
         'date': datetime(2020, 1, 1, 0, 0),
         'names': ['ts_values', 'ts_othervalues']},
        {'author': 'celeste',
         'rev': 2,
         'date': datetime(2020, 1, 1, 0, 0),
         'names': ['ts_values']}
    ] == log

95
    log = tsh.log(engine, names=['ts_othervalues'])
96
97
    assert len(log) == 1
    assert log[0]['rev'] == 1
98
    assert log[0]['names'] == ['ts_values', 'ts_othervalues']
99

100
    log = tsh.log(engine, fromrev=2)
101
102
    assert len(log) == 1

103
    log = tsh.log(engine, torev=1)
104
105
    assert len(log) == 1

106
    info = tsh.info(engine)
107
108
109
110
111
112
    assert {
        'changeset count': 2,
        'serie names': ['ts_othervalues', 'ts_values'],
        'series count': 2
    } == info

113

114
def test_tstamp_roundtrip(engine):
115
    tsh = TimeSerie()
116
117
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
118
119
120
121
122
123
124
125
126
127
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

128
129
    tsh.insert(engine, ts, 'tztest', 'Babar')
    back = tsh.get(engine, 'tztest')
130
131
132
133
134
135
136
137
138
139
140
141
142

    # though un localized we understand it's been normalized to utc
    assert_df("""
2017-10-28 23:00:00    0.0
2017-10-29 00:00:00    1.0
2017-10-29 01:00:00    2.0
2017-10-29 02:00:00    3.0
""", back)

    back.index = back.index.tz_localize('UTC')
    assert (ts.index == back.index).all()


143
def test_differential(engine):
144
    # instantiate one time serie handler object
145
    tsh = TimeSerie()
146

147
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
148
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
149

150
151
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
152

153
    assert_df("""
154
155
156
157
158
159
160
161
162
163
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
164
""", tsh.get(engine, 'ts_test'))
165
166

    # we should detect the emission of a message
167
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
168

169
    assert_df("""
170
171
172
173
174
175
176
177
178
179
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
180
""", tsh.get(engine, 'ts_test'))
181
182
183
184

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
185
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
186

187
    assert_df("""
188
189
190
191
192
193
194
195
196
197
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
198
""", tsh.get(engine, 'ts_test'))
199

200
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
201
202
203
204
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

205
    tsh.insert(engine, ts_longer, 'ts_test', 'test')
206

207
    assert_df("""
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
225
""", tsh.get(engine, 'ts_test'))
226
227

    # start testing manual overrides
228
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
229
    ts_begin.loc['2010-01-04'] = -1
230
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
231
232

    # -1 represents bogus upstream data
233
    assert_df("""
234
235
236
237
238
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
239
""", tsh.get(engine, 'ts_mixte'))
240
241

    # refresh all the period + 1 extra data point
242
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
243
    ts_more.loc['2010-01-04'] = -1
244
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
245

246
    assert_df("""
247
248
249
250
251
252
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
253
""", tsh.get(engine, 'ts_mixte'))
254
255

    # just append an extra data point
256
257
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
258
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
259

260
    assert_df("""
261
262
263
264
265
266
267
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
268
""", tsh.get(engine, 'ts_mixte'))
269

270
    hist = pd.read_sql('select id, parent from timeserie.ts_test order by id',
Aurélien Campéas's avatar
Aurélien Campéas committed
271
                       engine)
272
    assert_df("""
273
274
275
276
   id  parent
0   1     NaN
1   2     1.0
2   3     2.0
277
""", hist)
278

279
    hist = pd.read_sql('select id, parent from timeserie.ts_mixte order by id',
Aurélien Campéas's avatar
Aurélien Campéas committed
280
                       engine)
281
    assert_df("""
282
283
284
285
   id  parent
0   1     NaN
1   2     1.0
2   3     2.0
286
""", hist)
287

288
    allts = pd.read_sql("select name, table_name from registry "
289
                        "where name in ('ts_test', 'ts_mixte')",
290
291
                        engine)

292
    assert_df("""
293
294
295
       name          table_name
0   ts_test   timeserie.ts_test
1  ts_mixte  timeserie.ts_mixte
296
""", allts)
297

298
    assert_df("""
299
300
301
302
303
304
305
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
306
""", tsh.get(engine, 'ts_mixte',
307
             revision_date=datetime.now()))
308
309
310


def test_bad_import(engine):
311
    # instantiate one time serie handler object
312
    tsh = TimeSerie()
313

314
315
316
317
318
    # the data were parsed as date by pd.read_json()
    df_result = pd.read_csv(DATADIR / 'test_data.csv')
    df_result['Gas Day'] = df_result['Gas Day'].apply(parser.parse, dayfirst=True, yearfirst=False)
    df_result.set_index('Gas Day', inplace=True)
    ts = df_result['SC']
319

320
321
    tsh.insert(engine, ts, 'SND_SC', 'test')
    result = tsh.get(engine, 'SND_SC')
322
    assert result.dtype == 'float64'
323
324
325

    # insertion of empty ts
    ts = pd.Series(name='truc', dtype='object')
326
327
    tsh.insert(engine, ts, 'empty_ts', 'test')
    assert tsh.get(engine, 'empty_ts') is None
328
329
330

    # nan in ts
    # all na
331
    ts = genserie(datetime(2010, 1, 10), 'D', 10, [np.nan], name='truc')
332
333
    tsh.insert(engine, ts, 'test_nan', 'test')
    assert tsh.get(engine, 'test_nan') is None
334
335
336
337
338

    # mixe na
    ts = pd.Series([np.nan] * 5 + [3] * 5,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
339
340
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
341

342
343
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
344
    assert_df("""
345
346
347
348
349
2010-01-15    3.0
2010-01-16    3.0
2010-01-17    3.0
2010-01-18    3.0
2010-01-19    3.0
350
""", result)
351
352
353

    # get_ts with name not in database

354
    tsh.get(engine, 'inexisting_name', 'test')
355
356
357


def test_revision_date(engine):
358
    # instantiate one time serie handler object
359
    tsh = TimeSerie()
360

361
    idate1 = datetime(2015, 1, 1, 15, 43, 23)
362
    with tsh.newchangeset(engine, 'test', _insertion_date=idate1):
363

364
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
365
366
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
367

368
    idate2 = datetime(2015, 1, 2, 15, 43, 23)
369
    with tsh.newchangeset(engine, 'test', _insertion_date=idate2):
370

371
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
372
373
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
374

375
    idate3 = datetime(2015, 1, 3, 15, 43, 23)
376
    with tsh.newchangeset(engine, 'test', _insertion_date=idate3):
377

378
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
379
380
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
381

382
    ts = tsh.get(engine, 'ts_through_time')
383

384
    assert_df("""
385
386
387
388
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
389
""", ts)
390

391
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
392
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
393

394
    assert_df("""
395
396
397
398
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
399
""", ts)
400

401
    ts = tsh.get(engine, 'ts_through_time',
402
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
403

404
    assert_df("""
405
406
407
408
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
409
""", ts)
410

411
    ts = tsh.get(engine, 'ts_through_time',
412
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
413
414
415

    assert ts is None

416
417

def test_snapshots(engine):
418
419
    tsh = TimeSerie()
    tsh._snapshot_interval = 4
420

421
    with engine.connect() as cn:
422
        for tscount in range(1, 11):
423
            ts = genserie(datetime(2015, 1, 1), 'D', tscount, [1])
424
            diff = tsh.insert(cn, ts, 'growing', 'babar')
425
426
            assert diff.index[0] == diff.index[-1] == ts.index[-1]

427
    diff = tsh.insert(engine, ts, 'growing', 'babar')
428
    assert diff is None
429

430
431
    df = pd.read_sql("select id from timeserie.growing where snapshot is not null",
                     engine)
432
    assert_df("""
433
434
   id
0   1
435
436
437
1   4
2   8
3  10
438
""", df)
439

440
    ts = tsh.get(engine, 'growing')
441
    assert_df("""
442
443
444
445
446
447
448
449
450
451
2015-01-01    1.0
2015-01-02    1.0
2015-01-03    1.0
2015-01-04    1.0
2015-01-05    1.0
2015-01-06    1.0
2015-01-07    1.0
2015-01-08    1.0
2015-01-09    1.0
2015-01-10    1.0
452
""", ts)
453

454
    df = pd.read_sql("select id, diff, snapshot from timeserie.growing order by id", engine)
455
456
457
    for attr in ('diff', 'snapshot'):
        df[attr] = df[attr].apply(lambda x: 0 if x is None else len(x))

458
    assert_df("""
459
   id  diff  snapshot
Arnaud Campeas's avatar
Arnaud Campeas committed
460
461
462
463
464
465
466
467
468
469
0   1     0        32
1   2    32         0
2   3    32         0
3   4    32       125
4   5    32         0
5   6    32         0
6   7    32         0
7   8    32       249
8   9    32         0
9  10    32       311
470
""", df)
471

472
473
    table = tsh._get_ts_table(engine, 'growing')
    snapid, snap = tsh._find_snapshot(engine, table, ())
474
475
    assert snapid == 10
    assert (ts == snap).all()
476
477
478


def test_deletion(engine):
479
    tsh = TimeSerie()
480

481
482
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
483
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
484

485
    ts = tsh._build_snapshot_upto(engine, tsh._get_ts_table(engine, 'ts_del'))
486
    assert ts.iloc[-1] == 9.0
487

488
    ts_begin.iloc[0] = np.nan
489
    ts_begin.iloc[3] = np.nan
490

491
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
492

493
    assert_df("""
494
495
496
497
498
499
500
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
501
2010-01-10    9.0
502
""", tsh.get(engine, 'ts_del'))
503

504
    ts2 = tsh.get(engine, 'ts_del',
505
506
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
507
    assert (tsh.get(engine, 'ts_del') == ts2).all()
508

509
510
511
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

512
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
513

514
    assert_df("""
515
516
517
518
519
520
521
522
523
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
524
2010-01-10     9.0
525
""", tsh.get(engine, 'ts_del'))
526
527
528

    # now with string!

529
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
530
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
531
532
533
534

    ts_string[4] = None
    ts_string[5] = None

535
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
536
    assert_df("""
537
538
539
540
541
542
543
544
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
545
""", tsh.get(engine, 'ts_string_del'))
546
547
548
549

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

550
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
551
    assert_df("""
552
553
554
555
556
557
558
559
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
560
2010-01-10    machin
561
""", tsh.get(engine, 'ts_string_del'))
562

563
    ts_string[ts_string.index] = np.nan
564
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
565

566
    erased = tsh.get(engine, 'ts_string_del')
567
568
    assert len(erased) == 0

569
570
    # first insertion with only nan

571
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
572
    tsh.insert(engine, ts_begin, 'ts_null', 'test')
573

574
    assert tsh.get(engine, 'ts_null') is None
575

576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
    # exhibit issue with nans handling
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

595
596
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
597
598
    assert diff is None

599
    # there is no difference
600
    assert 0 == len(tsh._compute_diff(ts_repushed, ts_repushed))
601
602
603
604
605

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
606
    diff = tsh._compute_diff(ts_repushed, ts_add)
607
608
609
610
611
612

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
613
2010-01-13    12.0""", diff.sort_index())
614
615
616
617
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
618

Aurélien Campéas's avatar
Aurélien Campéas committed
619
    # full erasing
620
621
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
622
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
623

Aurélien Campéas's avatar
Aurélien Campéas committed
624
    ts_begin.iloc[:] = np.nan
625
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
626
627

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
628
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
629
630
631
632

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
633
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
634
635

    ts_begin.iloc[:] = np.nan
636
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
637
638

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
639
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
640

Aurélien Campéas's avatar
Aurélien Campéas committed
641

642
def test_multi_index(engine):
643
    tsh = TimeSerie()
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659

    appdate_0 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 2),
                                 freq='D').values
    pubdate_0 = [pd.datetime(2015, 1, 11, 12, 0, 0)] * 2
    insertion_date_0 = [pd.datetime(2015, 1, 11, 12, 30, 0)] * 2

    multi = [
        appdate_0,
        np.array(pubdate_0),
        np.array(insertion_date_0)
    ]

    ts_multi = pd.Series(range(2), index=multi)
    ts_multi.index.rename(['b', 'c', 'a'], inplace=True)

660
    tsh.insert(engine, ts_multi, 'ts_multi_simple', 'test')
661

662
    ts = tsh.get(engine, 'ts_multi_simple')
663
664
665
    assert_df("""
                                                    ts_multi_simple
a                   b          c                                   
666
667
2015-01-11 12:30:00 2015-01-01 2015-01-11 12:00:00              0.0
                    2015-01-02 2015-01-11 12:00:00              1.0
668
669
""", pd.DataFrame(ts))

670
    diff = tsh.insert(engine, ts_multi, 'ts_multi_simple', 'test')
671
672
673
674
675
    assert diff is None

    ts_multi_2 = pd.Series([0, 2], index=multi)
    ts_multi_2.index.rename(['b', 'c', 'a'], inplace=True)

676
677
    tsh.insert(engine, ts_multi_2, 'ts_multi_simple', 'test')
    ts = tsh.get(engine, 'ts_multi_simple')
678
679
680
681

    assert_df("""
                                                    ts_multi_simple
a                   b          c                                   
682
683
2015-01-11 12:30:00 2015-01-01 2015-01-11 12:00:00              0.0
                    2015-01-02 2015-01-11 12:00:00              2.0
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
""", pd.DataFrame(ts))

    # bigger ts
    appdate_0 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 4),
                                 freq='D').values
    pubdate_0 = [pd.datetime(2015, 1, 11, 12, 0, 0)] * 4
    insertion_date_0 = [pd.datetime(2015, 1, 11, 12, 30, 0)] * 4

    appdate_1 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 4),
                                 freq='D').values

    pubdate_1 = [pd.datetime(2015, 1, 21, 12, 0, 0)] * 4
    insertion_date_1 = [pd.datetime(2015, 1, 21, 12, 30, 0)] * 4

    multi = [
        np.concatenate([appdate_0, appdate_1]),
        np.array(pubdate_0 + pubdate_1),
        np.array(insertion_date_0 + insertion_date_1)
    ]

    ts_multi = pd.Series(range(8), index=multi)
    ts_multi.index.rename(['a', 'c', 'b'], inplace=True)

709
710
    tsh.insert(engine, ts_multi, 'ts_multi', 'test')
    ts = tsh.get(engine, 'ts_multi')
711
712
713
714

    assert_df("""
                                                    ts_multi
a          b                   c                            
715
716
717
718
719
720
721
722
2015-01-01 2015-01-11 12:30:00 2015-01-11 12:00:00       0.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
2015-01-02 2015-01-11 12:30:00 2015-01-11 12:00:00       1.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       5.0
2015-01-03 2015-01-11 12:30:00 2015-01-11 12:00:00       2.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       6.0
2015-01-04 2015-01-11 12:30:00 2015-01-11 12:00:00       3.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       7.0
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
    """, pd.DataFrame(ts.sort_index()))
    # Note: the columnns are returned according to the alphabetic order

    appdate_2 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 4),
                                 freq='D').values
    pubdate_2 = [pd.datetime(2015, 1, 31, 12, 0, 0)] * 4
    insertion_date_2 = [pd.datetime(2015, 1, 31, 12, 30, 0)] * 4

    multi_2 = [
        np.concatenate([appdate_1, appdate_2]),
        np.array(pubdate_1 + pubdate_2),
        np.array(insertion_date_1 + insertion_date_2)
    ]

    ts_multi_2 = pd.Series([4] * 8, index=multi_2)
    ts_multi_2.index.rename(['a', 'c', 'b'], inplace=True)

    # A second ts is inserted with some index in common with the first
    # one: appdate_1, pubdate_1,and insertion_date_1. The value is set
    # at 4, which matches the previous value of the "2015-01-01" point.

745
    diff = tsh.insert(engine, ts_multi_2, 'ts_multi', 'test')
746
747
748
749
750
751
752
753
754
755
756
757
758
759
    assert_df("""
                                                    ts_multi
a          b                   c                            
2015-01-01 2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-02 2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-03 2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-04 2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
        """, pd.DataFrame(diff.sort_index()))
    # the differential skips a value for "2015-01-01"
    # which does not change from the previous ts

760
    ts = tsh.get(engine, 'ts_multi')
761
762
763
    assert_df("""
                                                    ts_multi
a          b                   c                            
764
765
766
767
768
769
770
771
772
773
774
775
2015-01-01 2015-01-11 12:30:00 2015-01-11 12:00:00       0.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-02 2015-01-11 12:30:00 2015-01-11 12:00:00       1.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-03 2015-01-11 12:30:00 2015-01-11 12:00:00       2.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-04 2015-01-11 12:30:00 2015-01-11 12:00:00       3.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
776
777
778
        """, pd.DataFrame(ts.sort_index()))

    # the result ts have now 3 values for each point in 'a'
779
780
781


def test_add_na(engine):
782
    tsh = TimeSerie()
783
784
785
786
787
788

    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

789
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
790
    assert diff is None
791
    result = tsh.get(engine, 'ts_add_na')
792
793
794
795
    assert result is None

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
796
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
797
798
799
800
801

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

802
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
803
804
    assert diff is None

805
    result = tsh.get(engine, 'ts_add_na')
806
    assert len(result) == 5
807
808
809


def test_dtype_mismatch(engine):
810
    tsh = TimeSerie()
811

812
    tsh.insert(engine,
813
814
815
816
817
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
818
        tsh.insert(engine,
819
820
821
822
823
824
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

825
    tsh.insert(engine,
826
827
828
829
830
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
831
        tsh.insert(engine,
832
833
834
835
836
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)