test_tsio.py 13.9 KB
Newer Older
1
2
3
4
5
6
7
# coding: utf-8
from pathlib import Path
from datetime import datetime
from dateutil import parser

import pandas as pd
import numpy as np
8
import pytest
9

10
from tshistory.tsio import TimeSerie
11
12
13
14

DATADIR = Path(__file__).parent / 'data'


15
16
17
18
19
20
21
def assert_group_equals(g1, g2):
    for (n1, s1), (n2, s2) in zip(sorted(g1.items()),
                                  sorted(g2.items())):
        assert n1 == n2
        assert s1.equals(s2)


22
23
24
25
def assert_df(expected, df):
    assert expected.strip() == df.to_string().strip()


26
27
28
29
30
31
32
33
34
35
36
37
38
def genserie(start, freq, repeat, initval=None, tz=None, name=None):
    if initval is None:
        values = range(repeat)
    else:
        values = initval * repeat
    return pd.Series(values,
                     name=name,
                     index=pd.date_range(start=start,
                                         freq=freq,
                                         periods=repeat,
                                         tz=tz))


39
def test_changeset(engine):
40
41
42
43
    # instantiate one time serie handler object
    tso = TimeSerie()

    index = pd.date_range(start=datetime(2017, 1, 1), freq='D', periods=3)
44
    data = [1., 2., 3.]
45
46
47

    with engine.connect() as cnx:
        with tso.newchangeset(cnx, 'babar'):
48
            tso.insert(cnx, pd.Series(data, index=index), 'ts_values')
49
            tso.insert(cnx, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
50

51
52
53
54
    g = tso.get_group(engine, 'ts_values')
    g2 = tso.get_group(engine, 'ts_othervalues')
    assert_group_equals(g, g2)

55
56
57
    with pytest.raises(AssertionError):
        tso.insert(engine, pd.Series([2,3,4], index=index), 'ts_values')

58
59
60
61
62
    with engine.connect() as cnx:
        data.append(data.pop(0))
        with tso.newchangeset(cnx, 'celeste'):
            tso.insert(cnx, pd.Series(data, index=index), 'ts_values')
            # below should be a noop
63
            tso.insert(cnx, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
64

65
66
67
    g = tso.get_group(engine, 'ts_values')
    assert ['ts_values'] == list(g.keys())

68
    assert_df("""
69
70
71
2017-01-01    2.0
2017-01-02    3.0
2017-01-03    1.0
72
""", tso.get(engine, 'ts_values'))
73

74
    assert_df("""
75
76
77
2017-01-01    a
2017-01-02    b
2017-01-03    c
78
""", tso.get(engine, 'ts_othervalues'))
79

80

81
82
def test_tstamp_roundtrip(engine):
    tso = TimeSerie()
83
84
    ts = genserie(datetime(2017, 10, 28, 23),
                  'H', 4, tz='UTC')
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
    ts.index = ts.index.tz_convert('Europe/Paris')

    assert_df("""
2017-10-29 01:00:00+02:00    0
2017-10-29 02:00:00+02:00    1
2017-10-29 02:00:00+01:00    2
2017-10-29 03:00:00+01:00    3
Freq: H
    """, ts)

    tso.insert(engine, ts, 'tztest', 'Babar')
    back = tso.get(engine, 'tztest')

    # though un localized we understand it's been normalized to utc
    assert_df("""
2017-10-28 23:00:00    0.0
2017-10-29 00:00:00    1.0
2017-10-29 01:00:00    2.0
2017-10-29 02:00:00    3.0
""", back)

    back.index = back.index.tz_localize('UTC')
    assert (ts.index == back.index).all()


110
def test_differential(engine):
111
112
    # instantiate one time serie handler object
    tso = TimeSerie()
113

114
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10)
115
    tso.insert(engine, ts_begin, 'ts_test', 'test')
116

117
    assert_df("""
118
119
120
121
122
123
124
125
126
127
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
128
""", tso.get(engine, 'ts_test'))
129
130

    # we should detect the emission of a message
131
    tso.insert(engine, ts_begin, 'ts_test', 'babar')
132

133
    assert_df("""
134
135
136
137
138
139
140
141
142
143
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
144
""", tso.get(engine, 'ts_test'))
145
146
147
148

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
149
    tso.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
150

151
    assert_df("""
152
153
154
155
156
157
158
159
160
161
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
162
""", tso.get(engine, 'ts_test'))
163

164
    ts_longer = genserie(datetime(2010, 1, 3), 'D', 15)
165
166
167
168
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

169
    tso.insert(engine, ts_longer, 'ts_test', 'test')
170

171
    assert_df("""
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
189
""", tso.get(engine, 'ts_test'))
190
191

    # start testing manual overrides
192
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 5, initval=[2])
193
    ts_begin.loc['2010-01-04'] = -1
194
    tso.insert(engine, ts_begin, 'ts_mixte', 'test')
195
196

    # -1 represents bogus upstream data
197
    assert_df("""
198
199
200
201
202
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
203
""", tso.get(engine, 'ts_mixte'))
204
205

    # refresh all the period + 1 extra data point
206
    ts_more = genserie(datetime(2010, 1, 2), 'D', 5, [2])
207
    ts_more.loc['2010-01-04'] = -1
208
    tso.insert(engine, ts_more, 'ts_mixte', 'test')
209

210
    assert_df("""
211
212
213
214
215
216
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
217
""", tso.get(engine, 'ts_mixte'))
218
219

    # just append an extra data point
220
221
    # with no intersection with the previous ts
    ts_one_more = genserie(datetime(2010, 1, 7), 'D', 1, [3])
222
    tso.insert(engine, ts_one_more, 'ts_mixte', 'test')
223

224
    assert_df("""
225
226
227
228
229
230
231
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
232
""", tso.get(engine, 'ts_mixte'))
233

234
    hist = pd.read_sql('select id, parent from timeserie.ts_test order by id',
235
                        engine)
236
    assert_df("""
237
238
239
240
   id  parent
0   1     NaN
1   2     1.0
2   3     2.0
241
""", hist)
242

243
    hist = pd.read_sql('select id, parent from timeserie.ts_mixte order by id',
244
                        engine)
245
    assert_df("""
246
247
248
249
   id  parent
0   1     NaN
1   2     1.0
2   3     2.0
250
""", hist)
251

252
    allts = pd.read_sql("select name, table_name from registry "
253
                        "where name in ('ts_test', 'ts_mixte')",
254
255
                        engine)

256
    assert_df("""
257
258
259
       name          table_name
0   ts_test   timeserie.ts_test
1  ts_mixte  timeserie.ts_mixte
260
""", allts)
261

262
    assert_df("""
263
264
265
266
267
268
269
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
270
271
""", tso.get(engine, 'ts_mixte',
             revision_date=datetime.now()))
272
273
274


def test_bad_import(engine):
275
276
277
    # instantiate one time serie handler object
    tso = TimeSerie()

278
279
280
281
282
    # the data were parsed as date by pd.read_json()
    df_result = pd.read_csv(DATADIR / 'test_data.csv')
    df_result['Gas Day'] = df_result['Gas Day'].apply(parser.parse, dayfirst=True, yearfirst=False)
    df_result.set_index('Gas Day', inplace=True)
    ts = df_result['SC']
283
284
285

    tso.insert(engine, ts, 'SND_SC', 'test')
    result = tso.get(engine, 'SND_SC')
286
    assert result.dtype == 'float64'
287
288
289

    # insertion of empty ts
    ts = pd.Series(name='truc', dtype='object')
290
291
    tso.insert(engine, ts, 'empty_ts', 'test')
    assert tso.get(engine, 'empty_ts') is None
292
293
294

    # nan in ts
    # all na
295
    ts = genserie(datetime(2010, 1, 10), 'D', 10, [np.nan], name='truc')
296
297
    tso.insert(engine, ts, 'test_nan', 'test')
    assert tso.get(engine, 'test_nan') is None
298
299
300
301
302

    # mixe na
    ts = pd.Series([np.nan] * 5 + [3] * 5,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
303
304
    tso.insert(engine, ts, 'test_nan', 'test')
    result = tso.get(engine, 'test_nan')
305

306
307
    tso.insert(engine, ts, 'test_nan', 'test')
    result = tso.get(engine, 'test_nan')
308
    assert_df("""
309
310
311
312
313
2010-01-15    3.0
2010-01-16    3.0
2010-01-17    3.0
2010-01-18    3.0
2010-01-19    3.0
314
""", result)
315
316
317

    # get_ts with name not in database

318
    tso.get(engine, 'inexisting_name', 'test')
319
320
321


def test_revision_date(engine):
322
323
324
    # instantiate one time serie handler object
    tso = TimeSerie()

325
326
    idate1 = datetime(2015, 1, 1, 15, 43, 23)
    with tso.newchangeset(engine, 'test', _insertion_date=idate1):
327

328
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [1], name='truc')
329
330
        tso.insert(engine, ts, 'ts_through_time')
        assert idate1 == tso.latest_insertion_date(engine, 'ts_through_time')
331

332
333
    idate2 = datetime(2015, 1, 2, 15, 43, 23)
    with tso.newchangeset(engine, 'test', _insertion_date=idate2):
334

335
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [2], name='truc')
336
337
        tso.insert(engine, ts, 'ts_through_time')
        assert idate2 == tso.latest_insertion_date(engine, 'ts_through_time')
338

339
340
    idate3 = datetime(2015, 1, 3, 15, 43, 23)
    with tso.newchangeset(engine, 'test', _insertion_date=idate3):
341

342
        ts = genserie(datetime(2010, 1, 4), 'D', 4, [3], name='truc')
343
344
        tso.insert(engine, ts, 'ts_through_time')
        assert idate3 == tso.latest_insertion_date(engine, 'ts_through_time')
345

346
    ts = tso.get(engine, 'ts_through_time')
347

348
    assert_df("""
349
350
351
352
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
353
""", ts)
354

355
356
    ts = tso.get(engine, 'ts_through_time',
                 revision_date=datetime(2015, 1, 2, 18, 43, 23) )
357

358
    assert_df("""
359
360
361
362
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
363
""", ts)
364

365
366
    ts = tso.get(engine, 'ts_through_time',
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
367

368
    assert_df("""
369
370
371
372
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
373
""", ts)
374

375
376
    ts = tso.get(engine, 'ts_through_time',
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
377
378
379

    assert ts is None

380
381
382

def test_snapshots(engine):
    tso = TimeSerie()
383
    tso._snapshot_interval = 4
384
385

    with engine.connect() as cnx:
386
        for tscount in range(1, 11):
387
            ts = genserie(datetime(2015, 1, 1), 'D', tscount, [1])
388
389
390
391
392
            diff = tso.insert(cnx, ts, 'growing', 'babar')
            assert diff.index[0] == diff.index[-1] == ts.index[-1]

    diff = tso.insert(engine, ts, 'growing', 'babar')
    assert diff is None
393

394
395
    df = pd.read_sql("select id from timeserie.growing where snapshot is not null",
                     engine)
396
    assert_df("""
397
398
   id
0   1
399
400
401
1   4
2   8
3  10
402
""", df)
403
404

    ts = tso.get(engine, 'growing')
405
    assert_df("""
406
407
408
409
410
411
412
413
414
415
2015-01-01    1.0
2015-01-02    1.0
2015-01-03    1.0
2015-01-04    1.0
2015-01-05    1.0
2015-01-06    1.0
2015-01-07    1.0
2015-01-08    1.0
2015-01-09    1.0
2015-01-10    1.0
416
""", ts)
417

418
    df = pd.read_sql("select id, diff, snapshot from timeserie.growing order by id", engine)
419
420
421
    for attr in ('diff', 'snapshot'):
        df[attr] = df[attr].apply(lambda x: 0 if x is None else len(x))

422
    assert_df("""
423
   id  diff  snapshot
Arnaud Campeas's avatar
Arnaud Campeas committed
424
425
426
427
428
429
430
431
432
433
0   1     0        32
1   2    32         0
2   3    32         0
3   4    32       125
4   5    32         0
5   6    32         0
6   7    32         0
7   8    32       249
8   9    32         0
9  10    32       311
434
""", df)
435
436
437
438
439

    table = tso._get_ts_table(engine, 'growing')
    snapid, snap = tso._find_snapshot(engine, table, ())
    assert snapid == 10
    assert (ts == snap).all()
440
441
442
443
444


def test_deletion(engine):
    tso = TimeSerie()

445
446
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 11)
    ts_begin.iloc[-1] = np.nan
447
448
    tso.insert(engine, ts_begin, 'ts_del', 'test')

449
    ts = tso._build_snapshot_upto(engine, tso._get_ts_table(engine, 'ts_del'))
450
    assert ts.iloc[-1] == 9.0
451

452
    ts_begin.iloc[0] = np.nan
453
    ts_begin.iloc[3] = np.nan
454
455
456

    tso.insert(engine, ts_begin, 'ts_del', 'test')

457
    assert_df("""
458
459
460
461
462
463
464
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
465
466
2010-01-10    9.0
""", tso.get(engine, 'ts_del'))
467

468
469
470
471
472
    ts2 = tso.get(engine, 'ts_del',
                 # force snapshot reconstruction feature
                 revision_date=datetime(2038, 1, 1))
    assert (tso.get(engine, 'ts_del') == ts2).all()

473
474
475
476
477
    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

    tso.insert(engine, ts_begin, 'ts_del', 'test')

478
    assert_df("""
479
480
481
482
483
484
485
486
487
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
488
489
2010-01-10     9.0
""", tso.get(engine, 'ts_del'))
490
491
492

    # now with string!

493
    ts_string = genserie(datetime(2010, 1, 1), 'D', 10, ['machin'])
494
495
496
497
498
499
    tso.insert(engine, ts_string, 'ts_string_del', 'test')

    ts_string[4] = None
    ts_string[5] = None

    tso.insert(engine, ts_string, 'ts_string_del', 'test')
500
    assert_df("""
501
502
503
504
505
506
507
508
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
509
""", tso.get(engine, 'ts_string_del'))
510
511
512
513
514

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

    tso.insert(engine, ts_string, 'ts_string_del', 'test')
515
    assert_df("""
516
517
518
519
520
521
522
523
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
524
525
2010-01-10    machin
""", tso.get(engine, 'ts_string_del'))
526
527
528

    # first insertion with only nan

529
    ts_begin = genserie(datetime(2010, 1, 1), 'D', 10, [np.nan])
530
531
532
    tso.insert(engine, ts_begin, 'ts_null', 'test')

    assert tso.get(engine, 'ts_null') is None