test_tsio.py 13.9 KB
Newer Older
1
2
3
4
5
6
7
# coding: utf-8
from pathlib import Path
from datetime import datetime
from dateutil import parser

import pandas as pd
import numpy as np
8
import pytest
9

10
from tshistory.tsio import TimeSerie
11
12
13
14

DATADIR = Path(__file__).parent / 'data'


15
16
17
18
19
20
21
def assert_group_equals(g1, g2):
    for (n1, s1), (n2, s2) in zip(sorted(g1.items()),
                                  sorted(g2.items())):
        assert n1 == n2
        assert s1.equals(s2)


22
def test_changeset(engine):
23
24
25
26
    # instantiate one time serie handler object
    tso = TimeSerie()

    index = pd.date_range(start=datetime(2017, 1, 1), freq='D', periods=3)
27
    data = [1., 2., 3.]
28
29
30

    with engine.connect() as cnx:
        with tso.newchangeset(cnx, 'babar'):
31
            tso.insert(cnx, pd.Series(data, index=index), 'ts_values')
32
            tso.insert(cnx, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
33

34
35
36
37
    g = tso.get_group(engine, 'ts_values')
    g2 = tso.get_group(engine, 'ts_othervalues')
    assert_group_equals(g, g2)

38
39
40
    with pytest.raises(AssertionError):
        tso.insert(engine, pd.Series([2,3,4], index=index), 'ts_values')

41
42
43
44
45
    with engine.connect() as cnx:
        data.append(data.pop(0))
        with tso.newchangeset(cnx, 'celeste'):
            tso.insert(cnx, pd.Series(data, index=index), 'ts_values')
            # below should be a noop
46
            tso.insert(cnx, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
47

48
49
50
    g = tso.get_group(engine, 'ts_values')
    assert ['ts_values'] == list(g.keys())

51
52
53
54
55
56
57
    assert """
2017-01-01    2.0
2017-01-02    3.0
2017-01-03    1.0
""".strip() == tso.get(engine, 'ts_values').to_string().strip()

    assert """
58
59
60
2017-01-01    a
2017-01-02    b
2017-01-03    c
61
62
""".strip() == tso.get(engine, 'ts_othervalues').to_string().strip()

63

64
def test_differential(engine):
65
66
    # instantiate one time serie handler object
    tso = TimeSerie()
67
68
69

    ts_begin = pd.Series(range(10))
    ts_begin.index = pd.date_range(start=datetime(2010, 1, 1), freq='D', periods=10)
70
    tso.insert(engine, ts_begin, 'ts_test', 'test')
71
72

    assert """
73
74
75
76
77
78
79
80
81
82
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
83
""".strip() == tso.get(engine, 'ts_test').to_string().strip()
84
85

    # we should detect the emission of a message
86
    tso.insert(engine, ts_begin, 'ts_test', 'babar')
87
88

    assert """
89
90
91
92
93
94
95
96
97
98
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
99
""".strip() == tso.get(engine, 'ts_test').to_string().strip()
100
101
102
103

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
104
    tso.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
105
106

    assert """
107
108
109
110
111
112
113
114
115
116
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
117
""".strip() == tso.get(engine, 'ts_test').to_string().strip()
118
119
120
121
122
123
124

    ts_longer = pd.Series(range(15))
    ts_longer.index = pd.date_range(start=datetime(2010, 1, 3), freq='D', periods=15)
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

125
    tso.insert(engine, ts_longer, 'ts_test', 'test')
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144

    assert """
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
145
""".strip() == tso.get(engine, 'ts_test').to_string().strip()
146
147
148
149
150

    # start testing manual overrides
    ts_begin = pd.Series([2] * 5)
    ts_begin.index = pd.date_range(start=datetime(2010, 1, 1), freq='D', periods=5)
    ts_begin.loc['2010-01-04'] = -1
151
    tso.insert(engine, ts_begin, 'ts_mixte', 'test')
152
153
154

    # -1 represents bogus upstream data
    assert """
155
156
157
158
159
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
160
""".strip() == tso.get(engine, 'ts_mixte').to_string().strip()
161
162
163
164
165

    # refresh all the period + 1 extra data point
    ts_more = pd.Series([2] * 5)
    ts_more.index = pd.date_range(start=datetime(2010, 1, 2), freq='D', periods=5)
    ts_more.loc['2010-01-04'] = -1
166
    tso.insert(engine, ts_more, 'ts_mixte', 'test')
167
168
169
170
171
172
173
174

    assert """
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
175
""".strip() == tso.get(engine, 'ts_mixte').to_string().strip()
176
177
178
179

    # just append an extra data point
    ts_one_more = pd.Series([3])  # with no intersection with the previous ts
    ts_one_more.index = pd.date_range(start=datetime(2010, 1, 7), freq='D', periods=1)
180
    tso.insert(engine, ts_one_more, 'ts_mixte', 'test')
181
182
183
184
185
186
187
188
189

    assert """
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
190
""".strip() == tso.get(engine, 'ts_mixte').to_string().strip()
191

192
    hist = pd.read_sql('select id, parent from timeserie.ts_test order by id',
193
194
                        engine)
    assert """
195
196
197
198
199
200
   id  parent
0   1     NaN
1   2     1.0
2   3     2.0
""".strip() == hist.to_string().strip()

201
    hist = pd.read_sql('select id, parent from timeserie.ts_mixte order by id',
202
203
204
205
206
207
208
209
                        engine)
    assert """
   id  parent
0   1     NaN
1   2     1.0
2   3     2.0
""".strip() == hist.to_string().strip()

210
    allts = pd.read_sql("select name, table_name from registry "
211
                        "where name in ('ts_test', 'ts_mixte')",
212
213
214
                        engine)

    assert """
215
216
217
       name          table_name
0   ts_test   timeserie.ts_test
1  ts_mixte  timeserie.ts_mixte
218
219
220
221
222
223
224
225
226
227
""".strip() == allts.to_string().strip()

    assert """
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
228
229
""".strip() == tso.get(engine, 'ts_mixte',
                       revision_date=datetime.now()).to_string().strip()
230
231
232


def test_bad_import(engine):
233
234
235
    # instantiate one time serie handler object
    tso = TimeSerie()

236
237
238
239
240
    # the data were parsed as date by pd.read_json()
    df_result = pd.read_csv(DATADIR / 'test_data.csv')
    df_result['Gas Day'] = df_result['Gas Day'].apply(parser.parse, dayfirst=True, yearfirst=False)
    df_result.set_index('Gas Day', inplace=True)
    ts = df_result['SC']
241
242
243

    tso.insert(engine, ts, 'SND_SC', 'test')
    result = tso.get(engine, 'SND_SC')
244
    assert result.dtype == 'float64'
245
246
247

    # insertion of empty ts
    ts = pd.Series(name='truc', dtype='object')
248
249
    tso.insert(engine, ts, 'empty_ts', 'test')
    assert tso.get(engine, 'empty_ts') is None
250
251
252
253
254
255

    # nan in ts
    # all na
    ts = pd.Series([np.nan] * 10,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
256
257
    tso.insert(engine, ts, 'test_nan', 'test')
    assert tso.get(engine, 'test_nan') is None
258
259
260
261
262

    # mixe na
    ts = pd.Series([np.nan] * 5 + [3] * 5,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
263
264
    tso.insert(engine, ts, 'test_nan', 'test')
    result = tso.get(engine, 'test_nan')
265

266
267
    tso.insert(engine, ts, 'test_nan', 'test')
    result = tso.get(engine, 'test_nan')
268
269
270
271
272
273
274
275
276
277
    assert """
2010-01-15    3.0
2010-01-16    3.0
2010-01-17    3.0
2010-01-18    3.0
2010-01-19    3.0
""".strip() == result.to_string().strip()

    # get_ts with name not in database

278
    tso.get(engine, 'inexisting_name', 'test')
279
280
281


def test_revision_date(engine):
282
283
284
    # instantiate one time serie handler object
    tso = TimeSerie()

285
286
    idate1 = datetime(2015, 1, 1, 15, 43, 23)
    with tso.newchangeset(engine, 'test', _insertion_date=idate1):
287
288
289
290

        ts = pd.Series([1] * 4,
                       index=pd.date_range(start=datetime(2010, 1, 4),
                                           freq='D', periods=4), name='truc')
291
292
        tso.insert(engine, ts, 'ts_through_time')
        assert idate1 == tso.latest_insertion_date(engine, 'ts_through_time')
293

294
295
    idate2 = datetime(2015, 1, 2, 15, 43, 23)
    with tso.newchangeset(engine, 'test', _insertion_date=idate2):
296
297
298
299

        ts = pd.Series([2] * 4,
                       index=pd.date_range(start=datetime(2010, 1, 4),
                                           freq='D', periods=4), name='truc')
300
301
        tso.insert(engine, ts, 'ts_through_time')
        assert idate2 == tso.latest_insertion_date(engine, 'ts_through_time')
302

303
304
    idate3 = datetime(2015, 1, 3, 15, 43, 23)
    with tso.newchangeset(engine, 'test', _insertion_date=idate3):
305
306
307
308

        ts = pd.Series([3] * 4,
                       index=pd.date_range(start=datetime(2010, 1, 4),
                                           freq='D', periods=4), name='truc')
309
310
        tso.insert(engine, ts, 'ts_through_time')
        assert idate3 == tso.latest_insertion_date(engine, 'ts_through_time')
311

312
    ts = tso.get(engine, 'ts_through_time')
313
314

    assert """
315
316
317
318
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
319
320
""".strip() == ts.to_string().strip()

321
322
    ts = tso.get(engine, 'ts_through_time',
                 revision_date=datetime(2015, 1, 2, 18, 43, 23) )
323
324

    assert """
325
326
327
328
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
329
330
""".strip() == ts.to_string().strip()

331
332
    ts = tso.get(engine, 'ts_through_time',
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
333
334

    assert """
335
336
337
338
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
339
340
""".strip() == ts.to_string().strip()

341
342
    ts = tso.get(engine, 'ts_through_time',
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
343
344
345

    assert ts is None

346
347
348

def test_snapshots(engine):
    tso = TimeSerie()
349
    tso._snapshot_interval = 4
350
351

    with engine.connect() as cnx:
352
        for tscount in range(1, 11):
353
354
355
            ts = pd.Series([1] * tscount,
                           index=pd.date_range(datetime(2015, 1, 1),
                                               freq='D', periods=tscount))
356
357
358
359
360
            diff = tso.insert(cnx, ts, 'growing', 'babar')
            assert diff.index[0] == diff.index[-1] == ts.index[-1]

    diff = tso.insert(engine, ts, 'growing', 'babar')
    assert diff is None
361

362
363
    df = pd.read_sql("select id from timeserie.growing where snapshot is not null",
                     engine)
364
365
366
    assert """
   id
0   1
367
368
369
1   4
2   8
3  10
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
""".strip() == df.to_string().strip()

    ts = tso.get(engine, 'growing')
    assert """
2015-01-01    1.0
2015-01-02    1.0
2015-01-03    1.0
2015-01-04    1.0
2015-01-05    1.0
2015-01-06    1.0
2015-01-07    1.0
2015-01-08    1.0
2015-01-09    1.0
2015-01-10    1.0
""".strip() == ts.to_string().strip()
385

386
    df = pd.read_sql("select id, diff, snapshot from timeserie.growing order by id", engine)
387
388
389
390
391
    for attr in ('diff', 'snapshot'):
        df[attr] = df[attr].apply(lambda x: 0 if x is None else len(x))

    assert """
   id  diff  snapshot
Arnaud Campeas's avatar
Arnaud Campeas committed
392
393
394
395
396
397
398
399
400
401
0   1     0        32
1   2    32         0
2   3    32         0
3   4    32       125
4   5    32         0
5   6    32         0
6   7    32         0
7   8    32       249
8   9    32         0
9  10    32       311
402
""".strip() == df.to_string().strip()
403
404
405
406
407

    table = tso._get_ts_table(engine, 'growing')
    snapid, snap = tso._find_snapshot(engine, table, ())
    assert snapid == 10
    assert (ts == snap).all()
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491


def test_deletion(engine):
    tso = TimeSerie()

    ts_begin = pd.Series(range(10))
    ts_begin.index = pd.date_range(start=datetime(2010, 1, 1), freq='D', periods=10)
    tso.insert(engine, ts_begin, 'ts_del', 'test')

    ts_begin.iloc[3] = np.nan
    ts_begin.iloc[0] = np.nan

    tso.insert(engine, ts_begin, 'ts_del', 'test')

    assert """
2010-01-02    1.0
2010-01-03    2.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0""".strip() == tso.get(engine, 'ts_del').to_string().strip()

    ts_begin.iloc[0] = 42
    ts_begin.iloc[3] = 23

    tso.insert(engine, ts_begin, 'ts_del', 'test')

    assert """
2010-01-01    42.0
2010-01-02     1.0
2010-01-03     2.0
2010-01-04    23.0
2010-01-05     4.0
2010-01-06     5.0
2010-01-07     6.0
2010-01-08     7.0
2010-01-09     8.0
2010-01-10     9.0""".strip() == tso.get(engine, 'ts_del').to_string().strip()

    # now with string!

    ts_string = pd.Series(['machin'] * 10)
    ts_string.index = pd.date_range(start=datetime(2010, 1, 1), freq='D', periods=10)
    tso.insert(engine, ts_string, 'ts_string_del', 'test')

    ts_string[4] = None
    ts_string[5] = None

    tso.insert(engine, ts_string, 'ts_string_del', 'test')
    assert"""
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-07    machin
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin
""".strip() == tso.get(engine, 'ts_string_del').to_string().strip()

    ts_string[4] = 'truc'
    ts_string[6] = 'truc'

    tso.insert(engine, ts_string, 'ts_string_del', 'test')
    assert """
2010-01-01    machin
2010-01-02    machin
2010-01-03    machin
2010-01-04    machin
2010-01-05      truc
2010-01-07      truc
2010-01-08    machin
2010-01-09    machin
2010-01-10    machin""".strip() == tso.get(engine, 'ts_string_del').to_string().strip()

    # first insertion with only nan

    ts_begin = pd.Series([np.nan] * 10)
    ts_begin.index = pd.date_range(start=datetime(2010, 1, 1), freq='D', periods=10)
    tso.insert(engine, ts_begin, 'ts_null', 'test')

    assert tso.get(engine, 'ts_null') is None