test_tsio.py 13.4 KB
Newer Older
1
2
3
4
5
6
7
# coding: utf-8
from pathlib import Path
from datetime import datetime
from dateutil import parser

import pandas as pd
import numpy as np
8
import pytest
9

10
from tshistory.tsio import TimeSerie
11
12
13
14

DATADIR = Path(__file__).parent / 'data'


15
16
17
18
19
20
21
def assert_group_equals(g1, g2):
    for (n1, s1), (n2, s2) in zip(sorted(g1.items()),
                                  sorted(g2.items())):
        assert n1 == n2
        assert s1.equals(s2)


22
23
24
25
def assert_df(expected, df):
    assert expected.strip() == df.to_string().strip()


26
def test_changeset(engine):
27
28
29
30
    # instantiate one time serie handler object
    tso = TimeSerie()

    index = pd.date_range(start=datetime(2017, 1, 1), freq='D', periods=3)
31
    data = [1., 2., 3.]
32
33
34

    with engine.connect() as cnx:
        with tso.newchangeset(cnx, 'babar'):
35
            tso.insert(cnx, pd.Series(data, index=index), 'ts_values')
36
            tso.insert(cnx, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
37

38
39
40
41
    g = tso.get_group(engine, 'ts_values')
    g2 = tso.get_group(engine, 'ts_othervalues')
    assert_group_equals(g, g2)

42
43
44
    with pytest.raises(AssertionError):
        tso.insert(engine, pd.Series([2,3,4], index=index), 'ts_values')

45
46
47
48
49
    with engine.connect() as cnx:
        data.append(data.pop(0))
        with tso.newchangeset(cnx, 'celeste'):
            tso.insert(cnx, pd.Series(data, index=index), 'ts_values')
            # below should be a noop
50
            tso.insert(cnx, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
51

52
53
54
    g = tso.get_group(engine, 'ts_values')
    assert ['ts_values'] == list(g.keys())

55
    assert_df("""
56
57
58
2017-01-01    2.0
2017-01-02    3.0
2017-01-03    1.0
59
""", tso.get(engine, 'ts_values'))
60

61
    assert_df("""
62
63
64
2017-01-01    a
2017-01-02    b
2017-01-03    c
65
""", tso.get(engine, 'ts_othervalues'))
66

67

68
def test_differential(engine):
69
70
    # instantiate one time serie handler object
    tso = TimeSerie()
71
72
73

    ts_begin = pd.Series(range(10))
    ts_begin.index = pd.date_range(start=datetime(2010, 1, 1), freq='D', periods=10)
74
    tso.insert(engine, ts_begin, 'ts_test', 'test')
75

76
    assert_df("""
77
78
79
80
81
82
83
84
85
86
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
87
""", tso.get(engine, 'ts_test'))
88
89

    # we should detect the emission of a message
90
    tso.insert(engine, ts_begin, 'ts_test', 'babar')
91

92
    assert_df("""
93
94
95
96
97
98
99
100
101
102
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
103
""", tso.get(engine, 'ts_test'))
104
105
106
107

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
108
    tso.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
109

110
    assert_df("""
111
112
113
114
115
116
117
118
119
120
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
121
""", tso.get(engine, 'ts_test'))
122
123
124
125
126
127
128

    ts_longer = pd.Series(range(15))
    ts_longer.index = pd.date_range(start=datetime(2010, 1, 3), freq='D', periods=15)
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

129
    tso.insert(engine, ts_longer, 'ts_test', 'test')
130

131
    assert_df("""
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
149
""", tso.get(engine, 'ts_test'))
150
151
152
153
154

    # start testing manual overrides
    ts_begin = pd.Series([2] * 5)
    ts_begin.index = pd.date_range(start=datetime(2010, 1, 1), freq='D', periods=5)
    ts_begin.loc['2010-01-04'] = -1
155
    tso.insert(engine, ts_begin, 'ts_mixte', 'test')
156
157

    # -1 represents bogus upstream data
158
    assert_df("""
159
160
161
162
163
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
164
""", tso.get(engine, 'ts_mixte'))
165
166
167
168
169

    # refresh all the period + 1 extra data point
    ts_more = pd.Series([2] * 5)
    ts_more.index = pd.date_range(start=datetime(2010, 1, 2), freq='D', periods=5)
    ts_more.loc['2010-01-04'] = -1
170
    tso.insert(engine, ts_more, 'ts_mixte', 'test')
171

172
    assert_df("""
173
174
175
176
177
178
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
179
""", tso.get(engine, 'ts_mixte'))
180
181
182
183

    # just append an extra data point
    ts_one_more = pd.Series([3])  # with no intersection with the previous ts
    ts_one_more.index = pd.date_range(start=datetime(2010, 1, 7), freq='D', periods=1)
184
    tso.insert(engine, ts_one_more, 'ts_mixte', 'test')
185

186
    assert_df("""
187
188
189
190
191
192
193
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
194
""", tso.get(engine, 'ts_mixte'))
195

196
    hist = pd.read_sql('select id, parent from timeserie.ts_test order by id',
197
                        engine)
198
    assert_df("""
199
200
201
202
   id  parent
0   1     NaN
1   2     1.0
2   3     2.0
203
""", hist)
204

205
    hist = pd.read_sql('select id, parent from timeserie.ts_mixte order by id',
206
                        engine)
207
    assert_df("""
208
209
210
211
   id  parent
0   1     NaN
1   2     1.0
2   3     2.0
212
""", hist)
213

214
    allts = pd.read_sql("select name, table_name from registry "
215
                        "where name in ('ts_test', 'ts_mixte')",
216
217
                        engine)

218
    assert_df("""
219
220
221
       name          table_name
0   ts_test   timeserie.ts_test
1  ts_mixte  timeserie.ts_mixte
222
""", allts)
223

224
    assert_df("""
225
226
227
228
229
230
231
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
232
233
""", tso.get(engine, 'ts_mixte',
             revision_date=datetime.now()))
234
235
236


def test_bad_import(engine):
237
238
239
    # instantiate one time serie handler object
    tso = TimeSerie()

240
241
242
243
244
    # the data were parsed as date by pd.read_json()
    df_result = pd.read_csv(DATADIR / 'test_data.csv')
    df_result['Gas Day'] = df_result['Gas Day'].apply(parser.parse, dayfirst=True, yearfirst=False)
    df_result.set_index('Gas Day', inplace=True)
    ts = df_result['SC']
245
246
247

    tso.insert(engine, ts, 'SND_SC', 'test')
    result = tso.get(engine, 'SND_SC')
248
    assert result.dtype == 'float64'
249
250
251

    # insertion of empty ts
    ts = pd.Series(name='truc', dtype='object')
252
253
    tso.insert(engine, ts, 'empty_ts', 'test')
    assert tso.get(engine, 'empty_ts') is None
254
255
256
257
258
259

    # nan in ts
    # all na
    ts = pd.Series([np.nan] * 10,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
260
261
    tso.insert(engine, ts, 'test_nan', 'test')
    assert tso.get(engine, 'test_nan') is None
262
263
264
265
266

    # mixe na
    ts = pd.Series([np.nan] * 5 + [3] * 5,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
267
268
    tso.insert(engine, ts, 'test_nan', 'test')
    result = tso.get(engine, 'test_nan')
269

270
271
    tso.insert(engine, ts, 'test_nan', 'test')
    result = tso.get(engine, 'test_nan')
272
    assert_df("""
273
274
275
276
277
2010-01-15    3.0
2010-01-16    3.0
2010-01-17    3.0
2010-01-18    3.0
2010-01-19    3.0
278
""", result)
279
280
281

    # get_ts with name not in database

282
    tso.get(engine, 'inexisting_name', 'test')
283
284
285


def test_revision_date(engine):
286
287
288
    # instantiate one time serie handler object
    tso = TimeSerie()

289
290
    idate1 = datetime(2015, 1, 1, 15, 43, 23)
    with tso.newchangeset(engine, 'test', _insertion_date=idate1):
291
292
293
294

        ts = pd.Series([1] * 4,
                       index=pd.date_range(start=datetime(2010, 1, 4),
                                           freq='D', periods=4), name='truc')
295
296
        tso.insert(engine, ts, 'ts_through_time')
        assert idate1 == tso.latest_insertion_date(engine, 'ts_through_time')
297

298
299
    idate2 = datetime(2015, 1, 2, 15, 43, 23)
    with tso.newchangeset(engine, 'test', _insertion_date=idate2):
300
301
302
303

        ts = pd.Series([2] * 4,
                       index=pd.date_range(start=datetime(2010, 1, 4),
                                           freq='D', periods=4), name='truc')
304
305
        tso.insert(engine, ts, 'ts_through_time')
        assert idate2 == tso.latest_insertion_date(engine, 'ts_through_time')
306

307
308
    idate3 = datetime(2015, 1, 3, 15, 43, 23)
    with tso.newchangeset(engine, 'test', _insertion_date=idate3):
309
310
311
312

        ts = pd.Series([3] * 4,
                       index=pd.date_range(start=datetime(2010, 1, 4),
                                           freq='D', periods=4), name='truc')
313
314
        tso.insert(engine, ts, 'ts_through_time')
        assert idate3 == tso.latest_insertion_date(engine, 'ts_through_time')
315

316
    ts = tso.get(engine, 'ts_through_time')
317

318
    assert_df("""
319
320
321
322
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
323
""", ts)
324

325
326
    ts = tso.get(engine, 'ts_through_time',
                 revision_date=datetime(2015, 1, 2, 18, 43, 23) )
327

328
    assert_df("""
329
330
331
332
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
333
""", ts)
334

335
336
    ts = tso.get(engine, 'ts_through_time',
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
337

338
    assert_df("""
339
340
341
342
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
343
""", ts)
344

345
346
    ts = tso.get(engine, 'ts_through_time',
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
347
348
349

    assert ts is None

350
351
352

def test_snapshots(engine):
    tso = TimeSerie()
353
    tso._snapshot_interval = 4
354
355

    with engine.connect() as cnx:
356
        for tscount in range(1, 11):
357
358
359
            ts = pd.Series([1] * tscount,
                           index=pd.date_range(datetime(2015, 1, 1),
                                               freq='D', periods=tscount))
360
361
362
363
364
            diff = tso.insert(cnx, ts, 'growing', 'babar')
            assert diff.index[0] == diff.index[-1] == ts.index[-1]

    diff = tso.insert(engine, ts, 'growing', 'babar')
    assert diff is None
365

366
367
    df = pd.read_sql("select id from timeserie.growing where snapshot is not null",
                     engine)
368
    assert_df("""
369
370
   id
0   1
371
372
373
1   4
2   8
3  10
374
""", df)
375
376

    ts = tso.get(engine, 'growing')
377
    assert_df("""
378
379
380
381
382
383
384
385
386
387
2015-01-01    1.0
2015-01-02    1.0
2015-01-03    1.0
2015-01-04    1.0
2015-01-05    1.0
2015-01-06    1.0
2015-01-07    1.0
2015-01-08    1.0
2015-01-09    1.0
2015-01-10    1.0
388
""", ts)
389

390
    df = pd.read_sql("select id, diff, snapshot from timeserie.growing order by id", engine)
391
392
393
    for attr in ('diff', 'snapshot'):
        df[attr] = df[attr].apply(lambda x: 0 if x is None else len(x))

394
    assert_df("""
395
   id  diff  snapshot
Arnaud Campeas's avatar
Arnaud Campeas committed
396
397
398
399
400
401
402
403
404
405
0   1     0        32
1   2    32         0
2   3    32         0
3   4    32       125
4   5    32         0
5   6    32         0
6   7    32         0
7   8    32       249
8   9    32         0
9  10    32       311
406
""", df)
407
408
409
410
411

    table = tso._get_ts_table(engine, 'growing')
    snapid, snap = tso._find_snapshot(engine, table, ())
    assert snapid == 10
    assert (ts == snap).all()
412
413
414
415
416
417
418
419
420
421
422
423
424
425


def test_deletion(engine):
    tso = TimeSerie()

    ts_begin = pd.Series(range(10))
    ts_begin.index = pd.date_range(start=datetime(2010, 1, 1), freq='D', periods=10)
    tso.insert(engine, ts_begin, 'ts_del', 'test')

    ts_begin.iloc[3] = np.nan
    ts_begin.iloc[0] = np.nan

    tso.insert(engine, ts_begin, 'ts_del', 'test')

426
    assert_df("""
427
428
429
430
431
432
433
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
434
435
2010-01-10    9.0
""", tso.get(engine, 'ts_del'))
436
437
438
439
440
441

    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

    tso.insert(engine, ts_begin, 'ts_del', 'test')

442
    assert_df("""
443
444
445
446
447
448
449
450
451
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
452
453
2010-01-10     9.0
""", tso.get(engine, 'ts_del'))
454
455
456
457
458
459
460
461
462
463
464

    # now with string!

    ts_string = pd.Series(['machin'] * 10)
    ts_string.index = pd.date_range(start=datetime(2010, 1, 1), freq='D', periods=10)
    tso.insert(engine, ts_string, 'ts_string_del', 'test')

    ts_string[4] = None
    ts_string[5] = None

    tso.insert(engine, ts_string, 'ts_string_del', 'test')
465
    assert_df("""
466
467
468
469
470
471
472
473
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
474
""", tso.get(engine, 'ts_string_del'))
475
476
477
478
479

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

    tso.insert(engine, ts_string, 'ts_string_del', 'test')
480
    assert_df("""
481
482
483
484
485
486
487
488
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
489
490
2010-01-10    machin
""", tso.get(engine, 'ts_string_del'))
491
492
493
494
495
496
497
498

    # first insertion with only nan

    ts_begin = pd.Series([np.nan] * 10)
    ts_begin.index = pd.date_range(start=datetime(2010, 1, 1), freq='D', periods=10)
    tso.insert(engine, ts_begin, 'ts_null', 'test')

    assert tso.get(engine, 'ts_null') is None