test_tsio.py 34.9 KB
Newer Older
1
2
# coding: utf-8
from pathlib import Path
3
from datetime import datetime, timedelta
4
from time import time
5
from dateutil import parser
6
import calendar
7
8
9

import pandas as pd
import numpy as np
10
import pytest
11
from mock import patch
12

13
from tshistory.testutil import assert_group_equals, genserie, assert_df
14

15
DATADIR = Path(__file__).parent / 'data'
16

Aurélien Campéas's avatar
Aurélien Campéas committed
17

18
def test_changeset(engine, tsh):
19
    index = pd.date_range(start=datetime(2017, 1, 1), freq='D', periods=3)
20
    data = [1., 2., 3.]
21

22
23
    with patch('tshistory.tsio.datetime') as mock_date:
        mock_date.now.return_value = datetime(2020, 1, 1)
24
        with engine.connect() as cn:
25
            with tsh.newchangeset(cn, 'babar'):
26
                tsh.insert(cn, pd.Series(data, index=index), 'ts_values', author='WONTBEUSED')
27
                tsh.insert(cn, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
28

29
30
31
        # bogus author won't show up
        assert tsh.log(engine)[0]['author'] == 'babar'

32
33
        g = tsh.get_group(engine, 'ts_values')
        g2 = tsh.get_group(engine, 'ts_othervalues')
34
        assert_group_equals(g, g2)
35

36
        with pytest.raises(AssertionError):
37
            tsh.insert(engine, pd.Series([2, 3, 4], index=index), 'ts_values')
38

39
        with engine.connect() as cn:
40
            data.append(data.pop(0))
41
42
            with tsh.newchangeset(cn, 'celeste'):
                tsh.insert(cn, pd.Series(data, index=index), 'ts_values')
43
                # below should be a noop
44
                tsh.insert(cn, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
45

46
    g = tsh.get_group(engine, 'ts_values')
47
48
    assert ['ts_values'] == list(g.keys())

49
    assert_df("""
50
51
52
2017-01-01    2.0
2017-01-02    3.0
2017-01-03    1.0
53
""", tsh.get(engine, 'ts_values'))
54

55
    assert_df("""
56
57
58
2017-01-01    a
2017-01-02    b
2017-01-03    c
59
""", tsh.get(engine, 'ts_othervalues'))
60

61
    log = tsh.log(engine, names=['ts_values', 'ts_othervalues'])
62
63
64
65
66
67
68
69
70
71
72
    assert [
        {'author': 'babar',
         'rev': 1,
         'date': datetime(2020, 1, 1, 0, 0),
         'names': ['ts_values', 'ts_othervalues']},
        {'author': 'celeste',
         'rev': 2,
         'date': datetime(2020, 1, 1, 0, 0),
         'names': ['ts_values']}
    ] == log

73
    log = tsh.log(engine, names=['ts_othervalues'])
74
75
    assert len(log) == 1
    assert log[0]['rev'] == 1
76
    assert log[0]['names'] == ['ts_values', 'ts_othervalues']
77

78
    log = tsh.log(engine, fromrev=2)
79
80
    assert len(log) == 1

81
    log = tsh.log(engine, torev=1)
82
83
    assert len(log) == 1

84
    info = tsh.info(engine)
85
86
87
88
89
90
    assert {
        'changeset count': 2,
        'serie names': ['ts_othervalues', 'ts_values'],
        'series count': 2
    } == info

91

92
def test_tstamp_roundtrip(engine, tsh):
93
94
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
95
96
97
98
99
100
101
102
103
104
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

105
106
    tsh.insert(engine, ts, 'tztest', 'Babar')
    back = tsh.get(engine, 'tztest')
107
108
109
110
111
112
113
114
115
116
117
118
119

    # though un localized we understand it's been normalized to utc
    assert_df("""
2017-10-28 23:00:00    0.0
2017-10-29 00:00:00    1.0
2017-10-29 01:00:00    2.0
2017-10-29 02:00:00    3.0
""", back)

    back.index = back.index.tz_localize('UTC')
    assert (ts.index == back.index).all()


120
def test_differential(engine, tsh):
121
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
122
    tsh.insert(engine, ts_begin, 'ts_test', 'test')
123

124
125
    assert tsh.exists(engine, 'ts_test')
    assert not tsh.exists(engine, 'this_does_not_exist')
126

127
    assert_df("""
128
129
130
131
132
133
134
135
136
137
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
138
""", tsh.get(engine, 'ts_test'))
139
140

    # we should detect the emission of a message
141
    tsh.insert(engine, ts_begin, 'ts_test', 'babar')
142

143
    assert_df("""
144
145
146
147
148
149
150
151
152
153
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
154
""", tsh.get(engine, 'ts_test'))
155
156
157
158

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
159
    tsh.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
160

161
    assert_df("""
162
163
164
165
166
167
168
169
170
171
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
172
""", tsh.get(engine, 'ts_test'))
173

174
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
175
176
177
178
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

179
    tsh.insert(engine, ts_longer, 'ts_test', 'test')
180

181
    assert_df("""
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
199
""", tsh.get(engine, 'ts_test'))
200
201

    # start testing manual overrides
202
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
203
    ts_begin.loc['2010-01-04'] = -1
204
    tsh.insert(engine, ts_begin, 'ts_mixte', 'test')
205
206

    # -1 represents bogus upstream data
207
    assert_df("""
208
209
210
211
212
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
213
""", tsh.get(engine, 'ts_mixte'))
214
215

    # refresh all the period + 1 extra data point
216
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
217
    ts_more.loc['2010-01-04'] = -1
218
    tsh.insert(engine, ts_more, 'ts_mixte', 'test')
219

220
    assert_df("""
221
222
223
224
225
226
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
227
""", tsh.get(engine, 'ts_mixte'))
228
229

    # just append an extra data point
230
231
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
232
    tsh.insert(engine, ts_one_more, 'ts_mixte', 'test')
233

234
    assert_df("""
235
236
237
238
239
240
241
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
242
""", tsh.get(engine, 'ts_mixte'))
243

244
    with engine.connect() as cn:
245
        cn.execute('set search_path to "{0}.timeserie", {0}, public'.format(tsh.namespace))
246
247
248
        hist = pd.read_sql('select id, parent from ts_test order by id',
                           cn)
        assert_df("""
249
250
251
252
   id  parent
0   1     NaN
1   2     1.0
2   3     2.0
253
""", hist)
254

255
256
257
        hist = pd.read_sql('select id, parent from ts_mixte order by id',
                           cn)
        assert_df("""
258
259
260
261
   id  parent
0   1     NaN
1   2     1.0
2   3     2.0
262
""", hist)
263

264
265
266
        allts = pd.read_sql("select name, table_name from registry "
                            "where name in ('ts_test', 'ts_mixte')",
                            cn)
267

268
269
        assert_df("""
name              table_name
270
271
272
0   ts_test   {0}.timeserie.ts_test
1  ts_mixte  {0}.timeserie.ts_mixte
""".format(tsh.namespace), allts)
273

274
        assert_df("""
275
276
277
278
279
280
281
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
282
""", tsh.get(cn, 'ts_mixte',
283
             revision_date=datetime.now()))
284
285


286
def test_bad_import(engine, tsh):
287
288
289
290
291
    # the data were parsed as date by pd.read_json()
    df_result = pd.read_csv(DATADIR / 'test_data.csv')
    df_result['Gas Day'] = df_result['Gas Day'].apply(parser.parse, dayfirst=True, yearfirst=False)
    df_result.set_index('Gas Day', inplace=True)
    ts = df_result['SC']
292

293
294
    tsh.insert(engine, ts, 'SND_SC', 'test')
    result = tsh.get(engine, 'SND_SC')
295
    assert result.dtype == 'float64'
296
297
298

    # insertion of empty ts
    ts = pd.Series(name='truc', dtype='object')
299
300
    tsh.insert(engine, ts, 'empty_ts', 'test')
    assert tsh.get(engine, 'empty_ts') is None
301
302
303

    # nan in ts
    # all na
304
    ts = genserie(datetime(2010, 1, 10), 'D', 10, [np.nan], name='truc')
305
306
    tsh.insert(engine, ts, 'test_nan', 'test')
    assert tsh.get(engine, 'test_nan') is None
307
308
309
310
311

    # mixe na
    ts = pd.Series([np.nan] * 5 + [3] * 5,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
312
313
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
314

315
316
    tsh.insert(engine, ts, 'test_nan', 'test')
    result = tsh.get(engine, 'test_nan')
317
    assert_df("""
318
319
320
321
322
2010-01-15    3.0
2010-01-16    3.0
2010-01-17    3.0
2010-01-18    3.0
2010-01-19    3.0
323
""", result)
324
325
326

    # get_ts with name not in database

327
    tsh.get(engine, 'inexisting_name', 'test')
328
329


330
def test_revision_date(engine, tsh):
331
332
333
334
335
336
337
    idate0 = datetime(2015, 1, 1, 0, 0, 0)
    with tsh.newchangeset(engine, 'test', _insertion_date=idate0):

        ts = genserie(datetime(2010, 1, 4), 'D', 4, [0], name='truc')
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate0 == tsh.latest_insertion_date(engine, 'ts_through_time')

338
    idate1 = datetime(2015, 1, 1, 15, 43, 23)
339
    with tsh.newchangeset(engine, 'test', _insertion_date=idate1):
340

341
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
342
343
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate1 == tsh.latest_insertion_date(engine, 'ts_through_time')
344

345
    idate2 = datetime(2015, 1, 2, 15, 43, 23)
346
    with tsh.newchangeset(engine, 'test', _insertion_date=idate2):
347

348
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
349
350
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate2 == tsh.latest_insertion_date(engine, 'ts_through_time')
351

352
    idate3 = datetime(2015, 1, 3, 15, 43, 23)
353
    with tsh.newchangeset(engine, 'test', _insertion_date=idate3):
354

355
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
356
357
        tsh.insert(engine, ts, 'ts_through_time')
        assert idate3 == tsh.latest_insertion_date(engine, 'ts_through_time')
358

359
    ts = tsh.get(engine, 'ts_through_time')
360

361
    assert_df("""
362
363
364
365
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
366
""", ts)
367

368
    ts = tsh.get(engine, 'ts_through_time',
Aurélien Campéas's avatar
Aurélien Campéas committed
369
                 revision_date=datetime(2015, 1, 2, 18, 43, 23))
370

371
    assert_df("""
372
373
374
375
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
376
""", ts)
377

378
    ts = tsh.get(engine, 'ts_through_time',
379
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
380

381
    assert_df("""
382
383
384
385
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
386
""", ts)
387

388
    ts = tsh.get(engine, 'ts_through_time',
389
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
390
391
392

    assert ts is None

393

394
def test_snapshots(engine, tsh):
395
    baseinterval = tsh._snapshot_interval
396
    tsh._snapshot_interval = 4
397

398
    with engine.connect() as cn:
399
        for tscount in range(1, 11):
400
            ts = genserie(datetime(2015, 1, 1), 'D', tscount, [1])
401
            diff = tsh.insert(cn, ts, 'growing', 'babar')
402
403
            assert diff.index[0] == diff.index[-1] == ts.index[-1]

404
    diff = tsh.insert(engine, ts, 'growing', 'babar')
405
    assert diff is None
406

407
    with engine.connect() as cn:
408
        cn.execute('set search_path to "{}.timeserie"'.format(tsh.namespace))
409
410
411
        df = pd.read_sql("select id from growing where snapshot is not null",
                         cn)
        assert_df("""
412
413
   id
0   1
414
415
416
1   4
2   8
3  10
417
""", df)
418

419
420
        ts = tsh.get(cn, 'growing')
        assert_df("""
421
422
423
424
425
426
427
428
429
430
2015-01-01    1.0
2015-01-02    1.0
2015-01-03    1.0
2015-01-04    1.0
2015-01-05    1.0
2015-01-06    1.0
2015-01-07    1.0
2015-01-08    1.0
2015-01-09    1.0
2015-01-10    1.0
431
""", ts)
432

433
434
435
        df = pd.read_sql("select id, diff, snapshot from growing order by id", cn)
        for attr in ('diff', 'snapshot'):
            df[attr] = df[attr].apply(lambda x: 0 if x is None else len(x))
436

437
        assert_df("""
438
439
440
441
442
443
444
445
446
447
448
id  diff  snapshot
0   1     0        35
1   2    36         0
2   3    36         0
3   4    36        47
4   5    36         0
5   6    36         0
6   7    36         0
7   8    36        59
8   9    36         0
9  10    36        67
449
""", df)
450

451
452
    table = tsh._get_ts_table(engine, 'growing')
    snapid, snap = tsh._find_snapshot(engine, table, ())
453
454
    assert snapid == 10
    assert (ts == snap).all()
455
    tsh._snapshot_interval = baseinterval
456
457


458
def test_deletion(engine, tsh):
459
460
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
461
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
462

463
    ts = tsh._build_snapshot_upto(engine, tsh._get_ts_table(engine, 'ts_del'))
464
    assert ts.iloc[-1] == 9.0
465

466
    ts_begin.iloc[0] = np.nan
467
    ts_begin.iloc[3] = np.nan
468

469
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
470

471
    assert_df("""
472
473
474
475
476
477
478
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
479
2010-01-10    9.0
480
""", tsh.get(engine, 'ts_del'))
481

482
    ts2 = tsh.get(engine, 'ts_del',
483
484
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
485
    assert (tsh.get(engine, 'ts_del') == ts2).all()
486

487
488
489
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

490
    tsh.insert(engine, ts_begin, 'ts_del', 'test')
491

492
    assert_df("""
493
494
495
496
497
498
499
500
501
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
502
2010-01-10     9.0
503
""", tsh.get(engine, 'ts_del'))
504
505
506

    # now with string!

507
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
508
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
509
510
511
512

    ts_string[4] = None
    ts_string[5] = None

513
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
514
    assert_df("""
515
516
517
518
519
520
521
522
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
523
""", tsh.get(engine, 'ts_string_del'))
524
525
526
527

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

528
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
529
    assert_df("""
530
531
532
533
534
535
536
537
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
538
2010-01-10    machin
539
""", tsh.get(engine, 'ts_string_del'))
540

541
    ts_string[ts_string.index] = np.nan
542
    tsh.insert(engine, ts_string, 'ts_string_del', 'test')
543

544
    erased = tsh.get(engine, 'ts_string_del')
545
546
    assert len(erased) == 0

547
548
    # first insertion with only nan

549
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
550
    tsh.insert(engine, ts_begin, 'ts_null', 'test')
551

552
    assert tsh.get(engine, 'ts_null') is None
553

554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
    # exhibit issue with nans handling
    ts_repushed = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_repushed[0:3] = np.nan

    assert_df("""
2010-01-01     NaN
2010-01-02     NaN
2010-01-03     NaN
2010-01-04     3.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0
2010-01-11    10.0
Freq: D
""", ts_repushed)

573
574
    tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
    diff = tsh.insert(engine, ts_repushed, 'ts_repushed', 'test')
575
576
    assert diff is None

577
    # there is no difference
578
    assert 0 == len(tsh._compute_diff(ts_repushed, ts_repushed))
579
580
581
582
583

    ts_add = genserie(datetime(2010, 1, 1), 'D', 15)
    ts_add.iloc[0] = np.nan
    ts_add.iloc[13:] = np.nan
    ts_add.iloc[8] = np.nan
584
    diff = tsh._compute_diff(ts_repushed, ts_add)
585
586
587
588
589
590

    assert_df("""
2010-01-02     1.0
2010-01-03     2.0
2010-01-09     NaN
2010-01-12    11.0
591
2010-01-13    12.0""", diff.sort_index())
592
593
594
595
    # value on nan => value
    # nan on value => nan
    # nan on nan => Nothing
    # nan on nothing=> Nothing
596

Aurélien Campéas's avatar
Aurélien Campéas committed
597
    # full erasing
598
599
    # numeric
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4)
600
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
601

Aurélien Campéas's avatar
Aurélien Campéas committed
602
    ts_begin.iloc[:] = np.nan
603
    tsh.insert(engine, ts_begin, 'ts_full_del', 'test')
604
605

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4)
606
    tsh.insert(engine, ts_end, 'ts_full_del', 'test')
607
608
609
610

    # string

    ts_begin = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
611
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
612
613

    ts_begin.iloc[:] = np.nan
614
    tsh.insert(engine, ts_begin, 'ts_full_del_str', 'test')
615
616

    ts_end = genserie(datetime(2010, 1, 1), 'D', 4, ['text'])
617
    tsh.insert(engine, ts_end, 'ts_full_del_str', 'test')
618

Aurélien Campéas's avatar
Aurélien Campéas committed
619

620
def test_multi_index(engine, tsh):
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
    appdate_0 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 2),
                                 freq='D').values
    pubdate_0 = [pd.datetime(2015, 1, 11, 12, 0, 0)] * 2
    insertion_date_0 = [pd.datetime(2015, 1, 11, 12, 30, 0)] * 2

    multi = [
        appdate_0,
        np.array(pubdate_0),
        np.array(insertion_date_0)
    ]

    ts_multi = pd.Series(range(2), index=multi)
    ts_multi.index.rename(['b', 'c', 'a'], inplace=True)

636
    tsh.insert(engine, ts_multi, 'ts_multi_simple', 'test')
637

638
    ts = tsh.get(engine, 'ts_multi_simple')
639
640
641
    assert_df("""
                                                    ts_multi_simple
a                   b          c                                   
642
643
2015-01-11 12:30:00 2015-01-01 2015-01-11 12:00:00              0.0
                    2015-01-02 2015-01-11 12:00:00              1.0
644
645
""", pd.DataFrame(ts))

646
    diff = tsh.insert(engine, ts_multi, 'ts_multi_simple', 'test')
647
648
649
650
651
    assert diff is None

    ts_multi_2 = pd.Series([0, 2], index=multi)
    ts_multi_2.index.rename(['b', 'c', 'a'], inplace=True)

652
653
    tsh.insert(engine, ts_multi_2, 'ts_multi_simple', 'test')
    ts = tsh.get(engine, 'ts_multi_simple')
654
655
656
657

    assert_df("""
                                                    ts_multi_simple
a                   b          c                                   
658
659
2015-01-11 12:30:00 2015-01-01 2015-01-11 12:00:00              0.0
                    2015-01-02 2015-01-11 12:00:00              2.0
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
""", pd.DataFrame(ts))

    # bigger ts
    appdate_0 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 4),
                                 freq='D').values
    pubdate_0 = [pd.datetime(2015, 1, 11, 12, 0, 0)] * 4
    insertion_date_0 = [pd.datetime(2015, 1, 11, 12, 30, 0)] * 4

    appdate_1 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 4),
                                 freq='D').values

    pubdate_1 = [pd.datetime(2015, 1, 21, 12, 0, 0)] * 4
    insertion_date_1 = [pd.datetime(2015, 1, 21, 12, 30, 0)] * 4

    multi = [
        np.concatenate([appdate_0, appdate_1]),
        np.array(pubdate_0 + pubdate_1),
        np.array(insertion_date_0 + insertion_date_1)
    ]

    ts_multi = pd.Series(range(8), index=multi)
    ts_multi.index.rename(['a', 'c', 'b'], inplace=True)

685
686
    tsh.insert(engine, ts_multi, 'ts_multi', 'test')
    ts = tsh.get(engine, 'ts_multi')
687
688
689
690

    assert_df("""
                                                    ts_multi
a          b                   c                            
691
692
693
694
695
696
697
698
2015-01-01 2015-01-11 12:30:00 2015-01-11 12:00:00       0.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
2015-01-02 2015-01-11 12:30:00 2015-01-11 12:00:00       1.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       5.0
2015-01-03 2015-01-11 12:30:00 2015-01-11 12:00:00       2.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       6.0
2015-01-04 2015-01-11 12:30:00 2015-01-11 12:00:00       3.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       7.0
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
    """, pd.DataFrame(ts.sort_index()))
    # Note: the columnns are returned according to the alphabetic order

    appdate_2 = pd.DatetimeIndex(start=datetime(2015, 1, 1),
                                 end=datetime(2015, 1, 4),
                                 freq='D').values
    pubdate_2 = [pd.datetime(2015, 1, 31, 12, 0, 0)] * 4
    insertion_date_2 = [pd.datetime(2015, 1, 31, 12, 30, 0)] * 4

    multi_2 = [
        np.concatenate([appdate_1, appdate_2]),
        np.array(pubdate_1 + pubdate_2),
        np.array(insertion_date_1 + insertion_date_2)
    ]

    ts_multi_2 = pd.Series([4] * 8, index=multi_2)
    ts_multi_2.index.rename(['a', 'c', 'b'], inplace=True)

    # A second ts is inserted with some index in common with the first
    # one: appdate_1, pubdate_1,and insertion_date_1. The value is set
    # at 4, which matches the previous value of the "2015-01-01" point.

721
    diff = tsh.insert(engine, ts_multi_2, 'ts_multi', 'test')
722
723
724
725
726
727
728
729
730
731
732
733
734
735
    assert_df("""
                                                    ts_multi
a          b                   c                            
2015-01-01 2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-02 2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-03 2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-04 2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
        """, pd.DataFrame(diff.sort_index()))
    # the differential skips a value for "2015-01-01"
    # which does not change from the previous ts

736
    ts = tsh.get(engine, 'ts_multi')
737
738
739
    assert_df("""
                                                    ts_multi
a          b                   c                            
740
741
742
743
744
745
746
747
748
749
750
751
2015-01-01 2015-01-11 12:30:00 2015-01-11 12:00:00       0.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-02 2015-01-11 12:30:00 2015-01-11 12:00:00       1.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-03 2015-01-11 12:30:00 2015-01-11 12:00:00       2.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
2015-01-04 2015-01-11 12:30:00 2015-01-11 12:00:00       3.0
           2015-01-21 12:30:00 2015-01-21 12:00:00       4.0
           2015-01-31 12:30:00 2015-01-31 12:00:00       4.0
752
753
754
        """, pd.DataFrame(ts.sort_index()))

    # the result ts have now 3 values for each point in 'a'
755
756


757
def test_get_history(engine, tsh):
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
    for numserie in (1, 2, 3):
        with engine.connect() as cn:
            with tsh.newchangeset(cn, 'aurelien.campeas@pythonian.fr',
                                  _insertion_date=datetime(2017, 2, numserie)):
                tsh.insert(cn, genserie(datetime(2017, 1, 1), 'D', numserie), 'smallserie')

    ts = tsh.get(engine, 'smallserie')
    assert_df("""
2017-01-01    0.0
2017-01-02    1.0
2017-01-03    2.0
""", ts)

    logs = tsh.log(engine, names=['smallserie'])
    assert [
        {'author': 'aurelien.campeas@pythonian.fr',
         'date': datetime(2017, 2, 1, 0, 0),
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
         'date': datetime(2017, 2, 2, 0, 0),
         'names': ['smallserie']
        },
        {'author': 'aurelien.campeas@pythonian.fr',
         'date': datetime(2017, 2, 3, 0, 0),
         'names': ['smallserie']
        }
    ] == [{k: v for k, v in log.items() if k != 'rev'}
          for log in logs]
    histts = tsh.get_history(engine, 'smallserie')
788
    assert histts.name == 'smallserie'
789
790
791
792
793
794
795
796
797
798
799

    assert_df("""
insertion_date  value_date
2017-02-01      2017-01-01    0.0
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
2017-02-03      2017-01-01    0.0
                2017-01-02    1.0
                2017-01-03    2.0
""", histts)

800
801
802
803
804
805
806
807
    diffs = tsh.get_history(engine, 'smallserie', diffmode=True)
    assert_df("""
insertion_date  value_date
2017-02-01      2017-01-01    0.0
2017-02-02      2017-01-02    1.0
2017-02-03      2017-01-03    2.0
""", diffs)

808
    for idate in histts.index.get_level_values('insertion_date').unique():
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
        with engine.connect() as cn:
            with tsh.newchangeset(cn, 'aurelien.campeas@pythonian.f',
                                  _insertion_date=idate):
                tsh.insert(cn, histts[idate], 'smallserie2')

    # this is perfectly round-tripable
    assert (tsh.get(engine, 'smallserie2') == ts).all()
    assert (tsh.get_history(engine, 'smallserie2') == histts).all()

    # get history ranges
    tsa = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2))
    assert_df("""
insertion_date  value_date
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
2017-02-03      2017-01-01    0.0
                2017-01-02    1.0
                2017-01-03    2.0
""", tsa)

    tsb = tsh.get_history(engine, 'smallserie',
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
insertion_date  value_date
2017-02-01      2017-01-01    0.0
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
""", tsb)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 2),
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
insertion_date  value_date
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
""", tsc)

848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2017, 2, 4),
                          to_insertion_date=datetime(2017, 2, 4))
    assert tsc is None

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2017, 2, 2))
    assert_df("""
insertion_date  value_date
2017-02-01      2017-01-01    0.0
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          from_insertion_date=datetime(2016, 2, 1),
                          to_insertion_date=datetime(2016, 12, 31))
    assert tsc is None

868
869
870
871
872
873
874
875
876
877
878
879
880
    # restrictions on value dates
    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 1),
                          to_value_date=datetime(2017, 1, 2))
    assert_df("""
insertion_date  value_date
2017-02-01      2017-01-01    0.0
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
2017-02-03      2017-01-01    0.0
                2017-01-02    1.0
""", tsc)

881
882
883
884
885
886
887
888
889
890
    diffs = tsh.get_history(engine, 'smallserie',
                            diffmode=True,
                            from_value_date=datetime(2017, 1, 1),
                            to_value_date=datetime(2017, 1, 2))
    assert_df("""
insertion_date  value_date
2017-02-01      2017-01-01    0.0
2017-02-02      2017-01-02    1.0
""", diffs)

891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
    tsc = tsh.get_history(engine, 'smallserie',
                          from_value_date=datetime(2017, 1, 2))
    assert_df("""
insertion_date  value_date
2017-02-02      2017-01-02    1.0
2017-02-03      2017-01-02    1.0
                2017-01-03    2.0
""", tsc)

    tsc = tsh.get_history(engine, 'smallserie',
                          to_value_date=datetime(2017, 1, 2))
    assert_df("""
insertion_date  value_date
2017-02-01      2017-01-01    0.0
2017-02-02      2017-01-01    0.0
                2017-01-02    1.0
2017-02-03      2017-01-01    0.0
                2017-01-02    1.0
""", tsc)

911

912
def test_add_na(engine, tsh):
913
914
915
916
917
    # a serie of NaNs won't be insert in base
    # in case of first insertion
    ts_nan = genserie(datetime(2010, 1, 1), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan

918
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
919
    assert diff is None
920
    result = tsh.get(engine, 'ts_add_na')
921
922
923
924
    assert result is None

    # in case of insertion in existing data
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5)
925
    tsh.insert(engine, ts_begin, 'ts_add_na', 'test')
926
927
928
929
930

    ts_nan = genserie(datetime(2010, 1, 6), 'D', 5)
    ts_nan[[True] * len(ts_nan)] = np.nan
    ts_nan = pd.concat([ts_begin, ts_nan])

931
    diff = tsh.insert(engine, ts_nan, 'ts_add_na', 'test')
932
933
    assert diff is None

934
    result = tsh.get(engine, 'ts_add_na')
935
    assert len(result) == 5
936
937


938
def test_dtype_mismatch(engine, tsh):
939
    tsh.insert(engine,
940
941
942
943
944
               genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
               'error1',
               'test')

    with pytest.raises(Exception) as excinfo:
945
        tsh.insert(engine,
946
947
948
949
950
951
                   genserie(datetime(2015, 1, 1), 'D', 11),
                   'error1',
                   'test')

    assert 'Type error when inserting error1, new type is float64, type in base is object' == str(excinfo.value)

952
    tsh.insert(engine,
953
954
955
956
957
               genserie(datetime(2015, 1, 1), 'D', 11),
               'error2',
               'test')

    with pytest.raises(Exception) as excinfo:
958
        tsh.insert(engine,
959
960
961
962
963
                   genserie(datetime(2015, 1, 1), 'D', 11).astype('str'),
                   'error2',
                   'test')

    assert 'Type error when inserting error2, new type is object, type in base is float64' == str(excinfo.value)
964
965


966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
def test_precision(engine, tsh):

    floaty = 0.123456789123456789
    ts = genserie(datetime(2015, 1, 1), 'D', 5, initval=[floaty])

    tsh.insert(engine, ts, 'precision', 'test')
    ts_round = tsh.get(engine, 'precision')
    assert 0.12345678912346 == ts_round.iloc[0]

    diff = tsh.insert(engine, ts_round, 'precision', 'test')
    assert diff is None # the roundtriped series does not produce a diff when reinserted

    diff = tsh.insert(engine, ts, 'precision', 'test') # neither does the original series
    assert diff is None


982
@pytest.mark.perf
983
984
def test_bigdata(engine, tracker, ptsh):
    tsh = ptsh
985
986
    def create_data():
        for year in range(2015, 2020):
987
988
989
990
991
            date = datetime(year, 1, 1)
            serie = genserie(date, '10Min', 6 * 24 * 365)
            with tsh.newchangeset(engine, 'aurelien.campeas@pythonian.fr',
                                  _insertion_date=date):
                tsh.insert(engine, serie, 'big')
992
993
994

    t0 = time()
    create_data()
995
    t1 = time() - t0
996
    tshclass = tsh.__class__.__name__
997

998
    with engine.connect() as cn:
999
        cn.execute('set search_path to "{}.timeserie"'.format(tsh.namespace))
1000
1001
        df = pd.read_sql('select id, diff, snapshot from big order by id', cn)

1002
1003
1004
    for attr in ('diff', 'snapshot'):
        df[attr] = df[attr].apply(lambda x: 0 if x is None else len(x))

1005
    size = df[['diff', 'snapshot']].sum().to_dict()
1006
    tracker.append({'test': 'bigdata_insert',
1007
1008
1009
1010
                    'class': tshclass,
                    'time': t1,
                    'diffsize': size['diff'],
                    'snapsize': size['snapshot']})
1011

1012
1013
1014
    t0 = time()
    tsh.get_history(engine, 'big')
    t1 = time() - t0
1015
    tracker.append({'test': 'bigdata_history_all',
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
                    'class': tshclass,
                    'time': t1,
                    'diffsize': None,
                    'snapsize': None})

    t0 = time()
    for year in (2015, 2017, 2019):
        for month in (1, 5, 9, 12):
            date = datetime(year, month, 1)
            tsh.get_history(engine, 'big',
                            from_insertion_date=date,
                            to_insertion_date=date+timedelta(days=31))
    t1 = time() - t0
1029
    tracker.append({'test': 'bigdata_history_chunks',
1030
1031
1032
1033
1034
                    'class': tshclass,
                    'time': t1,
                    'diffsize': None,
                    'snapsize': None})

1035

1036
@pytest.mark.perf
1037
1038
def test_lots_of_diffs(engine, tracker, ptsh):
    tsh = ptsh
1039
    def create_data():
1040
        # one insert per day for 4 months
1041
1042
1043
        for month in range(1, 4):
            days = calendar.monthrange(2017, month)[1]
            for day in range(