test_tsio.py 27.6 KB
Newer Older
1
2
3
4
5
6
7
# coding: utf-8
from pathlib import Path
from datetime import datetime
from dateutil import parser

import pandas as pd
import numpy as np
8
import pytest
9
from mock import patch
10

11
from tshistory.tsio import BigdataTimeSerie
12
13
14
15

DATADIR = Path(__file__).parent / 'data'


16
17
18
19
20
21
22
def assert_group_equals(g1, g2):
    for (n1, s1), (n2, s2) in zip(sorted(g1.items()),
                                  sorted(g2.items())):
        assert n1 == n2
        assert s1.equals(s2)


23
24
25
26
27
28
def remove_metadata(tsrepr):
    if 'Freq' in tsrepr or 'Name' in tsrepr:
        return tsrepr[:tsrepr.rindex('\n')]
    return tsrepr


29
def assert_df(expected, df):
30
31
32
    exp = remove_metadata(expected.strip())
    got = remove_metadata(df.to_string().strip())
    assert exp == got
33
34


35
36
37
38
39
40
41
42
43
44
45
46
def genserie(start, freq, repeat, initval=None, tz=None, name=None):
    if initval is None:
        values = range(repeat)
    else:
        values = initval * repeat
    return pd.Series(values,
                     name=name,
                     index=pd.date_range(start=start,
                                         freq=freq,
                                         periods=repeat,
                                         tz=tz))

Aurélien Campéas's avatar
Aurélien Campéas committed
47

48
def test_changeset(engine, tsh):
49
    index = pd.date_range(start=datetime(2017, 1, 1), freq='D', periods=3)
50
    data = [1., 2., 3.]
51

52
53
    with patch('tshistory.tsio.datetime') as mock_date:
        mock_date.now.return_value = datetime(2020, 1, 1)
54
        with engine.connect() as cn:
55
56
57
            with tsh.newchangeset(cn, 'babar'):
                tsh.insert(cn, pd.Series(data, index=index), 'ts_values')
                tsh.insert(cn, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
58

59
60
        g = tsh.get_group(engine, 'ts_values')
        g2 = tsh.get_group(engine, 'ts_othervalues')
61
        assert_group_equals(g, g2)
62

63
        with pytest.raises(AssertionError):
64
            tsh.insert(engine, pd.Series([2, 3, 4], index=index), 'ts_values')
65

66
        with engine.connect() as cn:
67
            data.append(data.pop(0))
68
69
            with tsh.newchangeset(cn, 'celeste'):
                tsh.insert(cn, pd.Series(data, index=index), 'ts_values')
70
                # below should be a noop
71
                tsh.insert(cn, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
72

73
    g = tsh.get_group(engine, 'ts_values')
74
75
    assert ['ts_values'] == list(g.keys())

76
    assert_df("""
77
78
79
2017-01-01    2.0
2017-01-02    3.0
2017-01-03    1.0
80
""", tsh.get(engine, 'ts_values'))
81

82
    assert_df("""
83
84
85
2017-01-01    a
2017-01-02    b
2017-01-03    c
86
""", tsh.get(engine, 'ts_othervalues'))
87

88
    log = tsh.log(engine)
89
90
91
92
93
94
95
96
97
98
99
    assert [
        {'author': 'babar',
         'rev': 1,
         'date': datetime(2020, 1, 1, 0, 0),
         'names': ['ts_values', 'ts_othervalues']},
        {'author': 'celeste',
         'rev': 2,
         'date': datetime(2020, 1, 1, 0, 0),
         'names': ['ts_values']}
    ] == log

100
    log = tsh.log(engine, names=['ts_othervalues'])
101
102
    assert len(log) == 1
    assert log[0]['rev'] == 1
103
    assert log[0]['names'] == ['ts_values', 'ts_othervalues']
104

105
    log = tsh.log(engine, fromrev=2)
106
107
    assert len(log) == 1

108
    log = tsh.log(engine, torev=1)
109
110
    assert len(log) == 1

111
    info = tsh.info(engine)
112
113
114
115
116
117
    assert {
        'changeset count': 2,
        'serie names': ['ts_othervalues', 'ts_values'],
        'series count': 2
    } == info

118

119
def test_tstamp_roundtrip(engine, tsh):
120
121
    if isinstance(tsh, BigdataTimeSerie):
        return
122
123
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
124
125
126
127
128
129
130
131
132
133
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

134
135
    tsh.insert(engine, ts, 'tztest', 'Babar')
    back = tsh.get(engine, 'tztest')
136
137
138
139
140
141
142
143
144
145
146
147
148

    # though un localized we understand it's been normalized to utc
    assert_df("""
2017-10-28 23:00:00    0.0
2017-10-29 00:00:00    1.0
2017-10-29 01:00:00    2.0
2017-10-29 02:00:00    3.0
""", back)

    back.index = back.index.tz_localize('UTC')
    assert (ts.index == back.index).all()


149
def test_differential(engine, tsh):
150
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
151
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
152

153
154
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
155

156
    assert_df("""
157
158
159
160
161
162
163
164
165
166
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
167
""", tsh.get(engine, 'ts_test'))
168
169

    # we should detect the emission of a message
170
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
171

172
    assert_df("""
173
174
175
176
177
178
179
180
181
182
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
183
""", tsh.get(engine, 'ts_test'))
184
185
186
187

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
188
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
189

190
    assert_df("""
191
192
193
194
195
196
197
198
199
200
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
201
""", tsh.get(engine, 'ts_test'))
202

203
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
204
205
206
207
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

208
    tsh.insert(engine, ts_longer, 'ts_test', 'test')
209

210
    assert_df("""
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
228
""", tsh.get(engine, 'ts_test'))
229
230

    # start testing manual overrides
231
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
232
    ts_begin.loc['2010-01-04'] = -1
233
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
234
235

    # -1 represents bogus upstream data
236
    assert_df("""
237
238
239
240
241
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
242
""", tsh.get(engine, 'ts_mixte'))
243
244

    # refresh all the period + 1 extra data point
245
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
246
    ts_more.loc['2010-01-04'] = -1
247
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
248

249
    assert_df("""
250
251
252
253
254
255
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
256
""", tsh.get(engine, 'ts_mixte'))
257
258

    # just append an extra data point
259
260
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
261
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
262

263
    assert_df("""
264
265
266
267
268
269
270
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
271
""", tsh.get(engine, 'ts_mixte'))
272

273
    hist = pd.read_sql('select id, parent from timeserie.ts_test order by id',
Aurélien Campéas's avatar
Aurélien Campéas committed
274
                       engine)
275
    assert_df("""
276
277
278
279
   id  parent
0   1     NaN
1   2     1.0
2   3     2.0
280
""", hist)
281

282
    hist = pd.read_sql('select id, parent from timeserie.ts_mixte order by id',
Aurélien Campéas's avatar
Aurélien Campéas committed
283
                       engine)
284
    assert_df("""
285
286
287
288
   id  parent
0   1     NaN
1   2     1.0
2   3     2.0
289
""", hist)
290

291
    allts = pd.read_sql("select name, table_name from registry "
292
                        "where name in ('ts_test', 'ts_mixte')",
293
294
                        engine)

295
    assert_df("""
296
297
298
       name          table_name
0   ts_test   timeserie.ts_test
1  ts_mixte  timeserie.ts_mixte
299
""", allts)
300

301
    assert_df("""
302
303
304
305
306
307
308
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
309
""", tsh.get(engine, 'ts_mixte',
310
             revision_date=datetime.now()))
311
312


313
def test_bad_import(engine, tsh):
314
315
316
317
318
    # the data were parsed as date by pd.read_json()
    df_result = pd.read_csv(DATADIR / 'test_data.csv')
    df_result['Gas Day'] = df_result['Gas Day'].apply(parser.parse, dayfirst=True, yearfirst=False)
    df_result.set_index('Gas Day', inplace=True)
    ts = df_result['SC']
319

320
321
    tsh.insert(engine, ts, 'SND_SC', 'test')
    result = tsh.get(engine, 'SND_SC')
322
    assert result.dtype == 'float64'
323
324
325

    # insertion of empty ts
    ts = pd.Series(name='truc', dtype='object')
326
327
    tsh.insert(engine, ts, 'empty_ts', 'test')
    assert tsh.get(engine, 'empty_ts') is None
328
329
330

    # nan in ts
    # all na
331
    ts = genserie(datetime(2010, 1, 10), 'D', 10, [np.nan], name='truc')
332
333
    tsh.insert(engine, ts, 'test_nan', 'test')
    assert tsh.get(engine, 'test_nan') is None
334
335
336
337
338

    # mixe na
    ts = pd.Series([np.nan] * 5 + [3] * 5,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
339
340
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
341

342
343
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
344
    assert_df("""
345
346
347
348
349
2010-01-15    3.0
2010-01-16    3.0
2010-01-17    3.0
2010-01-18    3.0
2010-01-19    3.0
350
""", result)
351
352
353

    # get_ts with name not in database

354
    tsh.get(engine, 'inexisting_name', 'test')
355
356


357
def test_revision_date(engine, tsh):
358
    idate1 = datetime(2015, 1, 1, 15, 43, 23)
359
    with tsh.newchangeset(engine, 'test', _insertion_date=idate1):
360

361
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
362
363
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
364

365
    idate2 = datetime(2015, 1, 2, 15, 43, 23)
366
    with tsh.newchangeset(engine, 'test', _insertion_date=idate2):
367

368
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
369
370
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
371

372
    idate3 = datetime(2015, 1, 3, 15, 43, 23)
373
    with tsh.newchangeset(engine, 'test', _insertion_date=idate3):
374

375
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
376
377
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
378

379
    ts = tsh.get(engine, 'ts_through_time')
380

381
    assert_df("""
382
383
384
385
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
386
""", ts)
387

388
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
389
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
390

391
    assert_df("""
392
393
394
395
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
396
""", ts)
397

398
    ts = tsh.get(engine, 'ts_through_time',
399
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
400

401
    assert_df("""
402
403
404
405
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
406
""", ts)
407

408
    ts = tsh.get(engine, 'ts_through_time',
409
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
410
411
412

    assert ts is None

413

414
def test_snapshots(engine, tsh):
415
    tsh._snapshot_interval = 4
416

417
    with engine.connect() as cn:
418
        for tscount in range(1, 11):
419
            ts = genserie(datetime(2015, 1, 1), 'D', tscount, [1])
420
            diff = tsh.insert(cn, ts, 'growing', 'babar')
421
422
            assert diff.index[0] == diff.index[-1] == ts.index[-1]

423
    diff = tsh.insert(engine, ts, 'growing', 'babar')
424
    assert diff is None
425

426
427
    df = pd.read_sql("select id from timeserie.growing where snapshot is not null",
                     engine)
428
    assert_df("""
429
430
   id
0   1
431
432
433
1   4
2   8
3  10
434
""", df)
435

436
    ts = tsh.get(engine, 'growing')
437
    assert_df("""
438
439
440
441
442
443
444
445
446
447
2015-01-01    1.0
2015-01-02    1.0
2015-01-03    1.0
2015-01-04    1.0
2015-01-05    1.0
2015-01-06    1.0
2015-01-07    1.0
2015-01-08    1.0
2015-01-09    1.0
2015-01-10    1.0
448
""", ts)
449

450
    df = pd.read_sql("select id, diff, snapshot from timeserie.growing order by id", engine)
451
452
453
    for attr in ('diff', 'snapshot'):
        df[attr] = df[attr].apply(lambda x: 0 if x is None else len(x))

454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
    if isinstance(tsh, BigdataTimeSerie):
        assert_df("""
   id  diff  snapshot
0   1     0        35
1   2    36         0
2   3    36         0
3   4    36        47
4   5    36         0
5   6    36         0
6   7    36         0
7   8    36        59
8   9    36         0
9  10    36        67
""", df)
    else:
        assert_df("""
470
   id  diff  snapshot
Arnaud Campeas's avatar
Arnaud Campeas committed
471
472
473
474
475
476
477
478
479
480
0   1     0        32
1   2    32         0
2   3    32         0
3   4    32       125
4   5    32         0
5   6    32         0
6   7    32         0
7   8    32       249
8   9    32         0
9  10    32       311
481
""", df)
482

483
484
    table = tsh._get_ts_table(engine, 'growing')
    snapid, snap = tsh._find_snapshot(engine, table, ())
485
486
    assert snapid == 10
    assert (ts == snap).all()
487
488


489
def test_deletion(engine, tsh):
490
491
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
492
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
493

494
    ts = tsh._build_snapshot_upto(engine, tsh._get_ts_table(engine, 'ts_del'))
495
    assert ts.iloc[-1] == 9.0
496

497
    ts_begin.iloc[0] = np.nan
498
    ts_begin.iloc[3] = np.nan
499

500
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
501

502
    assert_df("""
503
504
505
506
507
508
509
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
510
2010-01-10    9.0
511
""", tsh.get(engine, 'ts_del'))
512

513
    ts2 = tsh.get(engine, 'ts_del',
514
515
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
516
    assert (tsh.get(engine, 'ts_del') == ts2).all()
517

518
519
520
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

521
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
522

523
    assert_df("""
524
525
526
527
528
529
530
531
532
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
533
2010-01-10     9.0
534
""", tsh.get(engine, 'ts_del'))
535
536
537

    # now with string!

538
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
539
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
540
541
542
543

    ts_string[4] = None
    ts_string[5] = None

544
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
545
    assert_df("""
546
547
548
549
550
551
552
553
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
554
""", tsh.get(engine, 'ts_string_del'))
555
556
557
558

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

559
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
560
    assert_df("""
561
562
563
564
565
566
567
568
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
569
2010-01-10    machin
570
""", tsh.get(engine, 'ts_string_del'))
571

572
    ts_string[ts_string.index] = np.nan
573
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
574

575
    erased = tsh.get(engine, 'ts_string_del')
576
577
    assert len(erased) == 0

578
579
    # first insertion with only nan

580
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
581
    tsh.insert(engine, ts_begin, 'ts_null', 'test')
582

583
    assert tsh.get(engine, 'ts_null') is None
584

585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
    # exhibit issue with nans handling
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

604
605
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
606
607
    assert diff is None

608
    # there is no difference
609
    assert 0 == len(tsh._compute_diff(ts_repushed, ts_repushed))
610
611
612
613
614

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
615
    diff = tsh._compute_diff(ts_repushed, ts_add)
616
617
618
619
620
621

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
622
2010-01-13    12.0""", diff.sort_index())
623
624
625
626
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
627

Aurélien Campéas's avatar
Aurélien Campéas committed
628
    # full erasing
629
630
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
631
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
632

Aurélien Campéas's avatar
Aurélien Campéas committed
633
    ts_begin.iloc[:] = np.nan
634
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
635
636

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
637
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
638
639
640
641

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
642
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
643
644

    ts_begin.iloc[:] = np.nan
645
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
646
647

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
648
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
649

Aurélien Campéas's avatar
Aurélien Campéas committed
650

651
def test_multi_index(engine, tsh):
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
    appdate_0 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 2),
                                 freq='D').values
    pubdate_0 = [pd.datetime(2015, 1, 11, 12, 0, 0)] * 2
    insertion_date_0 = [pd.datetime(2015, 1, 11, 12, 30, 0)] * 2

    multi = [
        appdate_0,
        np.array(pubdate_0),
        np.array(insertion_date_0)
    ]

    ts_multi = pd.Series(range(2), index=multi)
    ts_multi.index.rename(['b', 'c', 'a'], inplace=True)

667
    tsh.insert(engine, ts_multi, 'ts_multi_simple', 'test')
668

669
    ts = tsh.get(engine, 'ts_multi_simple')
670
671
672
    assert_df("""
                                                    ts_multi_simple
a                   b          c                                   
673
674
2015-01-11 12:30:00 2015-01-01 2015-01-11 12:00:00              0.0
                    2015-01-02 2015-01-11 12:00:00              1.0
675
676
""", pd.DataFrame(ts))

677
    diff = tsh.insert(engine, ts_multi, 'ts_multi_simple', 'test')
678
679
680
681
682
    assert diff is None

    ts_multi_2 = pd.Series([0, 2], index=multi)
    ts_multi_2.index.rename(['b', 'c', 'a'], inplace=True)

683
684
    tsh.insert(engine, ts_multi_2, 'ts_multi_simple', 'test')
    ts = tsh.get(engine, 'ts_multi_simple')
685
686
687
688

    assert_df("""
                                                    ts_multi_simple
a                   b          c                                   
689
690
2015-01-11 12:30:00 2015-01-01 2015-01-11 12:00:00              0.0
                    2015-01-02 2015-01-11 12:00:00              2.0
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
""", pd.DataFrame(ts))

    # bigger ts
    appdate_0 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 4),
                                 freq='D').values
    pubdate_0 = [pd.datetime(2015, 1, 11, 12, 0, 0)] * 4
    insertion_date_0 = [pd.datetime(2015, 1, 11, 12, 30, 0)] * 4

    appdate_1 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 4),
                                 freq='D').values

    pubdate_1 = [pd.datetime(2015, 1, 21, 12, 0, 0)] * 4
    insertion_date_1 = [pd.datetime(2015, 1, 21, 12, 30, 0)] * 4

    multi = [
        np.concatenate([appdate_0, appdate_1]),
        np.array(pubdate_0 + pubdate_1),
        np.array(insertion_date_0 + insertion_date_1)
    ]

    ts_multi = pd.Series(range(8), index=multi)
    ts_multi.index.rename(['a', 'c', 'b'], inplace=True)

716
717
    tsh.insert(engine, ts_multi, 'ts_multi', 'test')
    ts = tsh.get(engine, 'ts_multi')
718
719
720
721

    assert_df("""
                                                    ts_multi
a          b                   c                            
722
723
724
725
726
727
728
729
2015-01-01 2015-01-11 12:30:00 2015-01-11 12:00:00       0.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
2015-01-02 2015-01-11 12:30:00 2015-01-11 12:00:00       1.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       5.0
2015-01-03 2015-01-11 12:30:00 2015-01-11 12:00:00       2.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       6.0
2015-01-04 2015-01-11 12:30:00 2015-01-11 12:00:00       3.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       7.0
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
    """, pd.DataFrame(ts.sort_index()))
    # Note: the columnns are returned according to the alphabetic order

    appdate_2 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 4),
                                 freq='D').values
    pubdate_2 = [pd.datetime(2015, 1, 31, 12, 0, 0)] * 4
    insertion_date_2 = [pd.datetime(2015, 1, 31, 12, 30, 0)] * 4

    multi_2 = [
        np.concatenate([appdate_1, appdate_2]),
        np.array(pubdate_1 + pubdate_2),
        np.array(insertion_date_1 + insertion_date_2)
    ]

    ts_multi_2 = pd.Series([4] * 8, index=multi_2)
    ts_multi_2.index.rename(['a', 'c', 'b'], inplace=True)

    # A second ts is inserted with some index in common with the first
    # one: appdate_1, pubdate_1,and insertion_date_1. The value is set
    # at 4, which matches the previous value of the "2015-01-01" point.

752
    diff = tsh.insert(engine, ts_multi_2, 'ts_multi', 'test')
753
754
755
756
757
758
759
760
761
762
763
764
765
766
    assert_df("""
                                                    ts_multi
a          b                   c                            
2015-01-01 2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-02 2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-03 2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-04 2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
        """, pd.DataFrame(diff.sort_index()))
    # the differential skips a value for "2015-01-01"
    # which does not change from the previous ts

767
    ts = tsh.get(engine, 'ts_multi')
768
769
770
    assert_df("""
                                                    ts_multi
a          b                   c                            
771
772
773
774
775
776
777
778
779
780
781
782
2015-01-01 2015-01-11 12:30:00 2015-01-11 12:00:00       0.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-02 2015-01-11 12:30:00 2015-01-11 12:00:00       1.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-03 2015-01-11 12:30:00 2015-01-11 12:00:00       2.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-04 2015-01-11 12:30:00 2015-01-11 12:00:00       3.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
783
784
785
        """, pd.DataFrame(ts.sort_index()))

    # the result ts have now 3 values for each point in 'a'
786
787


788
def test_get_history(engine, tsh):
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
    for numserie in (1, 2, 3):
        with engine.connect() as cn:
            with tsh.newchangeset(cn, 'aurelien.campeas@pythonian.fr',
                                  _insertion_date=datetime(2017, 2, numserie)):
                tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie')

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
         'date': datetime(2017, 2, 1, 0, 0),
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
         'date': datetime(2017, 2, 2, 0, 0),
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
         'date': datetime(2017, 2, 3, 0, 0),
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')

    assert_df("""
insertion_date  value_date
2017-02-01      2017-01-01    0.0
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
2017-02-03      2017-01-01    0.0
                2017-01-02    1.0
                2017-01-03    2.0
""", histts)

    for idx, idate in enumerate(histts.groupby('insertion_date').groups):
        with engine.connect() as cn:
            with tsh.newchangeset(cn, 'aurelien.campeas@pythonian.f',
                                  _insertion_date=idate):
                tsh.insert(cn, histts[idate], 'smallserie2')

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
    assert (tsh.get_history(engine, 'smallserie2') == histts).all()

    # get history ranges
    tsa = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2))
    assert_df("""
insertion_date  value_date
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
2017-02-03      2017-01-01    0.0
                2017-01-02    1.0
                2017-01-03    2.0
""", tsa)

    tsb = tsh.get_history(engine, 'smallserie',
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
insertion_date  value_date
2017-02-01      2017-01-01    0.0
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
""", tsb)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2),
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
insertion_date  value_date
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
""", tsc)


871
def test_add_na(engine, tsh):
872
873
874
875
876
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

877
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
878
    assert diff is None
879
    result = tsh.get(engine, 'ts_add_na')
880
881
882
883
    assert result is None

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
884
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
885
886
887
888
889

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

890
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
891
892
    assert diff is None

893
    result = tsh.get(engine, 'ts_add_na')
894
    assert len(result) == 5
895
896


897
def test_dtype_mismatch(engine, tsh):
898
    tsh.insert(engine,
899
900
901
902
903
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
904
        tsh.insert(engine,
905
906
907
908
909
910
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

911
    tsh.insert(engine,
912
913
914
915
916
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
917
        tsh.insert(engine,
918
919
920
921
922
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)