Statistiche sulla partecipazione al Bebras italiano 2017/18

In [1]:
from IPython.display import HTML, Markdown

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<input type="button" value="Clicca per vedere/nascondere il codice Python" onclick="code_toggle()">''')
Out[1]:

Insegnanti, stima delle squadre e alunni

In [2]:
%matplotlib inline
import pandas as pd
import urllib.request
from IPython.display import display, Markdown
import matplotlib.pyplot as plt

pd.options.display.max_rows = None
plt.style.use('ggplot')

with open('secret.key') as k:
    key = k.readline().strip()

r = urllib.request.urlopen("https://bebras.it/api?key={}&view=teachers".format(key))
with open("teachers.json", "w") as tw:
     tw.writelines(r.read().decode('utf-8'))

teachers = pd.DataFrame(pd.read_json("teachers.json", convert_axes=True))[3:]

teachers.index = range(len(teachers))
teachers['confirm_time'] = pd.to_datetime(teachers['confirm_time'], unit='s')
filledt = teachers[(teachers['subscription'] > 0) & 
                   (teachers['confirm_time'] > pd.datetime(2017,9,18))]

teachers['school_code'] = teachers['school_code'].str.strip().str.upper()
filled = len(filledt)
expected = filledt.sum()
regteams = expected['teams_active']

today = pd.datetime.today()
s = """*{}:* **{:d}** insegnanti hanno confermato la partecipazione; 
ci sono **{:d}** squadre già registrate (~*{:d}* alunni).
Nuovi insegnanti iscritti dal 18/9: **{:d}**.
"""
display(Markdown(s.format(str(today)[:19], 
                          filled, regteams, regteams*4,
                         len(teachers) - 1235)))

if today < pd.datetime(2017,11,5):
    isotoday = today.isoformat()[:10]
    with open("stats-" + isotoday + ".txt", "w") as stat:
        stat.write("{:d} {:d} {:d}\n".format(filled, regteams, regteams*4))

2017-12-16 08:14:04: 1131 insegnanti hanno confermato la partecipazione; ci sono 13890 squadre già registrate (~55560 alunni). Nuovi insegnanti iscritti dal 18/9: 928.

In [3]:
institutes = teachers[(teachers['school_code'].str.strip() != "") 
                      & (teachers['subscription'] > 0) 
                      & (teachers['confirm_time'] > pd.datetime(2017,9,18))].groupby('school_code')['id'].count()

print("Totale istituti con codice meccanografico: {}; numero medio insegnanti {:.2f}".format(len(institutes), institutes.mean()))
Totale istituti con codice meccanografico: 811; numero medio insegnanti 1.39
In [4]:
import os
data = []
for path, dirs, files in os.walk("."):
    for f in files:
        if path == '.' and f.startswith("stats-"):
            d = [int(x) for x in f.split('.')[0].split('-')[1:4]]
            with open(f,"r") as df:
                nn = [int(x) for x in df.readline().strip().split(" ")]
                d = pd.datetime(2017, 11, 13) - pd.datetime.fromtimestamp(os.stat(f).st_mtime)
                data.append((d, nn))
data = pd.DataFrame.from_items(data, orient="index", 
                               columns=["insegnanti","squadre","alunni"]).sort_index(ascending=False)
In [5]:
olddata = []
for path, dirs, files in os.walk("old"):
    for f in files:
        if f.startswith("stats-"):
            d = [int(x) for x in f.split('.')[0].split('-')[1:4]]
            with open(path + "/" + f,"r") as df:
                nn = [int(x) for x in df.readline().strip().split(" ")]
                olddata.append((pd.datetime(2016,11,6) - pd.datetime(*d), nn))
olddata = pd.DataFrame.from_items(olddata, orient="index", 
                                  columns=["insegnanti","squadre","alunni"]).sort_index(ascending=False)
In [6]:
fig, ax = plt.subplots(1,2)
fig.set_size_inches(11,5)

for i, t in enumerate(['squadre', 'insegnanti']):
    data[t].plot(ax=ax[i], legend=True)
    olddata[t].plot(ax=ax[i], legend=True, style='--', label=t + ' 2016')
    delta = (data[t].max()-olddata[t].max())/olddata[t].max()
    ax[i].text(.7*data[t].count(), .9*data[t].max(), '{:+.1f}%'.format(delta*100), color='tomato')

plt.show()

La popolazione studentesca nazionale

Dati ISTAT popolazione studentesca 2014 (fonte: http://dati.istat.it)

In [7]:
istat = pd.DataFrame.from_items([
                      ("PIEMONTE",              (191399, 117997, 168439)),
                      ("VALLE D'AOSTA",         (  5981,   3691,   5309)),
                      ("LIGURIA",               ( 61566,  39213,  60184)),
                      ("LOMBARDIA",             (468662, 283007, 381619)),
                      ("TRENTINO-ALTO ADIGE",   ( 27028,  16890,  21836)),
                      ("VENETO",                (232694, 142401, 204262)),
                      ("FRIULI-VENEZIA GIULIA", ( 51830,  32143,  46949)),
                      ("EMILIA-ROMAGNA",        (198417, 118460, 176968)),
                      ("TOSCANA",               (161001,  98203, 152886)),
                      ("UMBRIA",                ( 39181,  23488,  36946)),
                      ("MARCHE",                ( 67996,  42095,  70602)),
                      ("LAZIO",                 (268133, 161573, 249145)),
                      ("ABRUZZO",               ( 57146,  35828,  58578)),
                      ("MOLISE",                ( 12595,   8354,  14990)),
                      ("CAMPANIA",              (317346, 204223, 326644)),
                      ("PUGLIA",                (198662, 130675, 213545)),
                      ("BASILICATA",            (25237,  17097,   30214)),
                      ("CALABRIA",              (93277,  59624,  101208)),
                      ("SICILIA",               (254023, 164520, 252730)),
                      ("SARDEGNA",              (67379,  44105,   74003)),
                      ("ESTERO",       (pd.np.NaN, pd.np.NaN, pd.np.NaN))],
                      orient = "index",
                      columns = ('E','M','S'))
istat['totale'] = istat['E'] + istat['M'] + istat['S']
display(istat)
E M S totale
PIEMONTE 191399.0 117997.0 168439.0 477835.0
VALLE D'AOSTA 5981.0 3691.0 5309.0 14981.0
LIGURIA 61566.0 39213.0 60184.0 160963.0
LOMBARDIA 468662.0 283007.0 381619.0 1133288.0
TRENTINO-ALTO ADIGE 27028.0 16890.0 21836.0 65754.0
VENETO 232694.0 142401.0 204262.0 579357.0
FRIULI-VENEZIA GIULIA 51830.0 32143.0 46949.0 130922.0
EMILIA-ROMAGNA 198417.0 118460.0 176968.0 493845.0
TOSCANA 161001.0 98203.0 152886.0 412090.0
UMBRIA 39181.0 23488.0 36946.0 99615.0
MARCHE 67996.0 42095.0 70602.0 180693.0
LAZIO 268133.0 161573.0 249145.0 678851.0
ABRUZZO 57146.0 35828.0 58578.0 151552.0
MOLISE 12595.0 8354.0 14990.0 35939.0
CAMPANIA 317346.0 204223.0 326644.0 848213.0
PUGLIA 198662.0 130675.0 213545.0 542882.0
BASILICATA 25237.0 17097.0 30214.0 72548.0
CALABRIA 93277.0 59624.0 101208.0 254109.0
SICILIA 254023.0 164520.0 252730.0 671273.0
SARDEGNA 67379.0 44105.0 74003.0 185487.0
ESTERO NaN NaN NaN NaN

Analisi della distribuzione geografica delle squadre

In [8]:
miur = pd.read_csv('bebras_school_list.zip', low_memory=False)
In [9]:
def norm_region(r):
    """Normalize the name of a region. It also corrects wrong names."""
    r = r.strip().upper()
    if r == 'FVG' or r.startswith('FRIULI'):
       return 'FRIULI-VENEZIA GIULIA'
    if r.startswith('EMILIA'):
       return 'EMILIA-ROMAGNA'
    if r.startswith('TRENTINO'):
       return 'TRENTINO-ALTO ADIGE'
    if r.startswith('LOMB'):
       return 'LOMBARDIA'
    elif r == 'ALBANIA' or r == 'BAVIERA' or r == 'SIERRA' or r == 'DDDD':
       return 'ESTERO'
    else:
       return r

def infer_school_type(k):
    knorm = k['school_kind'].strip().upper()
    cnorm = k['school_code'].strip().upper()
    if cnorm and miur[miur['i_code'] == cnorm]['i_type'].count() > 0:
        knorm = str(miur[miur['i_code'] == cnorm]['i_type'].iloc[0])
    if 'PRIMARIA' in knorm or 'INFANZIA' in knorm or 'ELEMENTARE' in knorm:
        return 'E'
    if 'PRIMO GRADO' in knorm or ('MEDIA' in knorm and (not 'SUP' in knorm))\
    or '1°' in knorm or ' I GRADO' in knorm or knorm == 'IC':
        return 'M'
    if 'COMPRENSIVO' in knorm:
        return 'EM'
    if 'SECONDO GRADO' in knorm or '2°' in knorm  or 'II GRADO' in knorm \
    or 'LICEO' in knorm or 'ITI' in knorm or 'PROF' in knorm or 'IST TEC' in knorm \
    or 'TECNICO' in knorm or 'MAGISTRALE' in knorm or 'SUPERIORE' in knorm:
        return 'S'
    if knorm == 'STATALE' or 'C.D.38':
        return 'EMS'
    else:
        return knorm
   

stat = pd.DataFrame()
stat['regione'] = filledt['school_region'].map(norm_region)
stat['tipo'] = filledt[['school_kind','school_code']].apply(infer_school_type, axis=1)
stat['squadre attese'] = filledt['teams_active']

expected = stat.groupby(['regione', 'tipo']).aggregate('sum')
for (reg, tipo), row in expected.iterrows():
    if len(tipo) > 1:
       for t in tipo:
           try:
            expected.loc[(reg, t), 'squadre attese'] += row[0] / len(tipo)
           except:
            expected.loc[(reg, t), 'squadre attese'] = row[0] / len(tipo)
            try:
                expected.loc[(reg, t), 'popolazione'] = istat.loc[reg, t]
            except:
                print(":{}:{}:NOT FOUND".format(reg, t))
                expected.loc[(reg, t), 'popolazione'] = pd.np.NaN
    else:
        try:
            expected.loc[(reg, tipo), 'popolazione'] = istat.loc[reg, tipo]
        except:
            print("_{}_{}_NOT FOUND".format(reg, tipo))
            expected.loc[(reg, tipo), 'popolazione'] = pd.np.NaN
expected = expected[expected.index.isin(['E','M','S'], level=1)].sort_index()
expected['alunni attesi'] = expected['squadre attese'] * 4
expected['copertura (alunni ogni mille)'] = 1000 * expected['alunni attesi'] / expected['popolazione']
display(expected)
squadre attese popolazione alunni attesi copertura (alunni ogni mille)
regione tipo
ABRUZZO E 132.500000 57146.0 530.000000 9.274490
M 71.500000 35828.0 286.000000 7.982583
S 43.000000 58578.0 172.000000 2.936256
BASILICATA E 41.500000 25237.0 166.000000 6.577644
M 48.500000 17097.0 194.000000 11.347020
S 10.000000 30214.0 40.000000 1.323890
CALABRIA E 95.000000 93277.0 380.000000 4.073887
M 55.000000 59624.0 220.000000 3.689789
S 95.000000 101208.0 380.000000 3.754644
CAMPANIA E 766.166667 317346.0 3064.666667 9.657178
M 551.166667 204223.0 2204.666667 10.795389
S 333.666667 326644.0 1334.666667 4.085998
EMILIA-ROMAGNA E 123.000000 198417.0 492.000000 2.479626
M 260.000000 118460.0 1040.000000 8.779335
S 268.000000 176968.0 1072.000000 6.057592
ESTERO E 0.333333 NaN 1.333333 NaN
M 0.333333 NaN 1.333333 NaN
S 0.333333 NaN 1.333333 NaN
FRIULI-VENEZIA GIULIA E 50.000000 51830.0 200.000000 3.858769
M 104.000000 32143.0 416.000000 12.942165
S 190.000000 46949.0 760.000000 16.187778
LAZIO E 385.000000 268133.0 1540.000000 5.743418
M 286.000000 161573.0 1144.000000 7.080391
S 360.000000 249145.0 1440.000000 5.779767
LIGURIA E 81.500000 61566.0 326.000000 5.295130
M 137.500000 39213.0 550.000000 14.025961
S 619.000000 60184.0 2476.000000 41.140502
LOMBARDIA E 1180.833333 468662.0 4723.333333 10.078336
M 1112.833333 283007.0 4451.333333 15.728704
S 665.333333 381619.0 2661.333333 6.973797
MARCHE E 113.500000 67996.0 454.000000 6.676863
M 95.500000 42095.0 382.000000 9.074712
S 142.000000 70602.0 568.000000 8.045098
MOLISE E 38.500000 12595.0 154.000000 12.227074
M 63.500000 8354.0 254.000000 30.404597
S 9.000000 14990.0 36.000000 2.401601
PIEMONTE E 301.166667 191399.0 1204.666667 6.294007
M 432.166667 117997.0 1728.666667 14.650090
S 206.666667 168439.0 826.666667 4.907810
PUGLIA E 504.500000 198662.0 2018.000000 10.157957
M 709.500000 130675.0 2838.000000 21.718003
S 60.000000 213545.0 240.000000 1.123885
SARDEGNA E 81.500000 67379.0 326.000000 4.838303
M 137.500000 44105.0 550.000000 12.470241
S 26.000000 74003.0 104.000000 1.405348
SICILIA E 327.000000 254023.0 1308.000000 5.149140
M 194.000000 164520.0 776.000000 4.716752
S 134.000000 252730.0 536.000000 2.120840
TOSCANA E 142.500000 161001.0 570.000000 3.540351
M 244.500000 98203.0 978.000000 9.958963
S 49.000000 152886.0 196.000000 1.282001
TRENTINO-ALTO ADIGE E 46.500000 27028.0 186.000000 6.881752
M 122.500000 16890.0 490.000000 29.011249
S 29.000000 21836.0 116.000000 5.312328
UMBRIA E 70.166667 39181.0 280.666667 7.163336
M 46.166667 23488.0 184.666667 7.862171
S 17.666667 36946.0 70.666667 1.912701
VALLE D'AOSTA E 9.000000 5981.0 36.000000 6.019060
M 47.000000 3691.0 188.000000 50.934706
VENETO E 466.500000 232694.0 1866.000000 8.019115
M 379.500000 142401.0 1518.000000 10.660037
S 577.000000 204262.0 2308.000000 11.299214
In [10]:
tot = expected[['squadre attese','alunni attesi', 'popolazione']].groupby(level='tipo').sum()
tot['copertura (alunni ogni mille)'] = 1000 * tot['alunni attesi'] / tot['popolazione']
display(tot)
squadre attese alunni attesi popolazione copertura (alunni ogni mille)
tipo
E 4956.666667 19826.666667 2799553.0 7.082083
M 5098.666667 20394.666667 1743587.0 11.696960
S 3834.666667 15338.666667 2641748.0 5.806257
In [11]:
glob = tot.sum()
print("""squadre attese: {}\t alunni attesi: {}
popolazione: {}\t copertura (alunni ogni mille) {:0.1f}""".format(int(glob["squadre attese"]), 
                                                                  int(glob["alunni attesi"]), 
                                                                  int(glob["popolazione"]), 
                                                                  1000 * glob["alunni attesi"] / glob["popolazione"]))
squadre attese: 13890	 alunni attesi: 55560
popolazione: 7184888	 copertura (alunni ogni mille) 7.7
In [12]:
exp_reg = expected[['squadre attese','alunni attesi', 'popolazione']].groupby(level='regione').sum()
exp_reg['copertura (alunni ogni mille)'] = 1000 * exp_reg['alunni attesi'] / exp_reg['popolazione']
display(exp_reg)
squadre attese alunni attesi popolazione copertura (alunni ogni mille)
regione
ABRUZZO 247.0 988.0 151552.0 6.519215
BASILICATA 100.0 400.0 72548.0 5.513591
CALABRIA 245.0 980.0 254109.0 3.856613
CAMPANIA 1651.0 6604.0 848213.0 7.785780
EMILIA-ROMAGNA 651.0 2604.0 493845.0 5.272910
ESTERO 1.0 4.0 NaN NaN
FRIULI-VENEZIA GIULIA 344.0 1376.0 130922.0 10.510075
LAZIO 1031.0 4124.0 678851.0 6.074971
LIGURIA 838.0 3352.0 160963.0 20.824662
LOMBARDIA 2959.0 11836.0 1133288.0 10.443947
MARCHE 351.0 1404.0 180693.0 7.770085
MOLISE 111.0 444.0 35939.0 12.354267
PIEMONTE 940.0 3760.0 477835.0 7.868825
PUGLIA 1274.0 5096.0 542882.0 9.386939
SARDEGNA 245.0 980.0 185487.0 5.283389
SICILIA 655.0 2620.0 671273.0 3.903032
TOSCANA 436.0 1744.0 412090.0 4.232085
TRENTINO-ALTO ADIGE 198.0 792.0 65754.0 12.044895
UMBRIA 134.0 536.0 99615.0 5.380716
VALLE D'AOSTA 56.0 224.0 9672.0 23.159636
VENETO 1423.0 5692.0 579357.0 9.824685

Cartografia ISTAT 2011 (fonte: http://www.istat.it/it/archivio/24613), convertita con il comando:

ogr2ogr -f GeoJSON -s_srs reg2011_g.prj -t_srs EPSG:4326 it.json reg2011_g.shp

(fonte: https://gist.github.com/riccardoscalco/6029355)

In [13]:
%%capture _
!conda install -y -c conda-forge geopandas
In [14]:
import geopandas as gpd
%matplotlib inline

it = gpd.read_file("it.json")

TYPES = ('totale', 'primaria', 'secondaria primo grado', 'secondaria secondo grado')


def get_data_with_default(geo, i, t, data, j, label='squadre attese'):
    try:
        geo.loc[i, label + ' ' + t] = data.loc[j, label]
    except:
        geo.loc[i, label + ' ' + t] = 0
    finally:
        return geo.loc[i, label + ' ' + t]

for i, r in it.iterrows():
    for cname in istat.index:
        if r['NOME_REG'][0:5] == cname[0:5]:
            it.loc[i, 'NOME_REG'] = cname
            get_data_with_default(it, i, TYPES[0], exp_reg, cname)
            get_data_with_default(it, i, TYPES[1], expected, (cname, 'E'))
            get_data_with_default(it, i, TYPES[2], expected, (cname, 'M'))
            get_data_with_default(it, i, TYPES[3], expected, (cname, 'S'))
                
            it.loc[i, 'popolazione ' + TYPES[0]] = istat.loc[cname, 'totale']
            it.loc[i, 'popolazione ' + TYPES[1]] = istat.loc[cname, 'E']
            it.loc[i, 'popolazione ' + TYPES[2]] = istat.loc[cname, 'M']
            it.loc[i, 'popolazione ' + TYPES[3]] = istat.loc[cname, 'S']
            break

for t in TYPES:
    it['alunni attesi ' + t] = it['squadre attese ' + t] * 4
    it['copertura ' + t] = 1000 * it['alunni attesi ' + t] / it['popolazione ' + t]

fig, ax = plt.subplots(2,2)
fig.set_size_inches(15,11)
for i, t in enumerate(TYPES):
    r = i // 2
    c = i % 2
    ax[r][c].set_aspect("equal")
    ax[r][c].set_axis_off()
    ax[r][c].set_title("Alunni attesi ogni mille ({})".format(t))
    it.plot(ax=ax[r][c], column='copertura ' + t, cmap='YlOrRd', scheme='quantiles', legend=True)
    
fig.savefig('italia.pdf')    
plt.show()    

Il Bebras nel mondo (dati 2016)

In [15]:
w = gpd.read_file("world.json")
w = w.set_index("name")

with open("wbebras.json", "r") as t:
    wbebras = pd.DataFrame(pd.read_json(t, convert_axes=True, orient='index'))

wbebras['copertura'] = 1000 * wbebras["bebras"] / wbebras["oecd"]    
    
for i in wbebras.index:
    try:
        w.loc[i, "bebras"] = wbebras.loc[i, "bebras"]
        w.loc[i, "oecd"]   = wbebras.loc[i, "oecd"]
        w.loc[i, "copertura"]   = wbebras.loc[i, "copertura"]
    except:
        print(i)

plt.figure(figsize=(20,20))
ax = plt.subplot(212)
ax.set_aspect("equal")
ax.set_axis_off()
ax.set_title("Partecipanti 2016 ogni 1000 studenti (dati OECD 2015)")       
w.dropna().plot(ax=ax,column='copertura', cmap='Blues', scheme='quantiles', legend=True)


ax = plt.subplot(211)
ax.set_aspect("equal")
ax.set_axis_off()
ax.set_title("Partecipanti Bebras 2016")       
p = w.dropna(subset=["bebras"]).plot(ax=ax,column='bebras', cmap='YlOrRd', scheme='quantiles', legend=True)

Numeri assoluti

In [16]:
display(wbebras.sort_values("bebras",ascending=False)[["bebras","oecd","copertura"]])
bebras oecd copertura
France 474901 10075812.0 47.132777
Germany 290802 10351237.0 28.093454
Ukraine 98766 NaN NaN
United Kingdom 81658 48504360.0 1.683519
Czech Republic 71792 NaN NaN
Belarus 70753 NaN NaN
Slovakia 62981 727693.0 86.548861
Taiwan 60744 NaN NaN
United States of America 51782 48504360.0 1.067574
Republic of Serbia 35554 NaN NaN
Lithuania 33006 NaN NaN
Slovenia 29083 256122.0 113.551354
South Africa 28543 11848282.0 2.409041
Italy 28368 7461117.0 3.802112
Russia 27298 14774916.0 1.847591
Netherlands 24476 NaN NaN
Austria 21191 1041322.0 20.350093
Turkey 19951 NaN NaN
Macedonia 18126 NaN NaN
Hungary 18046 1310996.0 13.765107
Australia 16925 4460447.0 3.794463
Poland 15862 5085090.0 3.119315
Switzerland 12665 NaN NaN
Vietnam 12000 NaN NaN
Canada 11732 4950935.0 2.369653
Romania 11222 NaN NaN
Sweden 9396 NaN NaN
Pakistan 8488 NaN NaN
Latvia 7510 250298.0 30.004235
Finland 5827 896483.0 6.499844
Croatia 5624 NaN NaN
Azerbaijan 5153 NaN NaN
Estonia 4095 159432.0 25.684932
Japan 4080 14212385.0 0.287074
Kazakhstan 2995 NaN NaN
Iran 2967 NaN NaN
Belgium 2023 1935904.0 1.044990
Iceland 1780 NaN NaN
Indonesia 1533 NaN NaN
Ireland 1129 861872.0 1.309939
Cyprus 713 NaN NaN
Spain 671 6206261.0 0.108117
Malaysia 600 NaN NaN
Bulgaria 431 NaN NaN
New Zealand 315 849060.0 0.370999
Israel 250 1575180.0 0.158712
Egypt 0 NaN NaN
South Korea 0 NaN NaN
Singapore 0 NaN NaN
Bosnia and Herzegovina 0 NaN NaN
In [17]:
print("In totale nel mondo {} partecipanti".format(wbebras['bebras'].sum()))
In totale nel mondo 1693807 partecipanti

Analisi delle gare

In [18]:
CATS = ('kilo', 'mega', 'giga', 'tera', 'peta')
In [19]:
for i, k in enumerate(CATS):
    r = urllib.request.urlopen("https://bebras.it/api?key={}&view=exams&test={}&examdata=0".format(key,50+i))
    with open("overview-{}.json".format(k), "w") as tw:
        tw.writelines(r.read().decode('utf-8'))
In [20]:
import json

overview = []
for k in CATS:
    with open("overview-{}.json".format(k), "r") as t:
        j = json.load(t)
        overview += j['exams']
In [21]:
dfov = pd.DataFrame(overview)
gare = pd.DataFrame()
gare['categoria'] = dfov['category'].str.lower().astype('category', categories=CATS, ordered=True)
gare['insegnante'] = dfov['teacher_id'].astype('int64')
gare['login'] = dfov['login']
gare['status'] = dfov['exam_valid_score']
gare['risultato'] = dfov['score']
gare['data'] = pd.to_datetime(dfov['time'])
In [22]:
fid = filledt.set_index('id')
fid['regione'] = fid['school_region'].map(norm_region)
gare = gare.join(fid[['regione']],on='insegnante')
In [23]:
done = gare[gare['status'] == 1]

Insegnanti partecipanti

In [24]:
len(done.groupby(['insegnante']))
Out[24]:
970

Insegnanti per regione che hanno partecipato

In [25]:
display(done.groupby(['regione'])['insegnante'].nunique())
regione
ABRUZZO                   20
BASILICATA                10
CALABRIA                  27
CAMPANIA                 144
EMILIA-ROMAGNA            40
FRIULI-VENEZIA GIULIA     20
LAZIO                     84
LIGURIA                   21
LOMBARDIA                205
MARCHE                    12
MOLISE                     8
PIEMONTE                  74
PUGLIA                    89
SARDEGNA                  16
SICILIA                   37
TOSCANA                   41
TRENTINO-ALTO ADIGE        8
UMBRIA                    16
VALLE D'AOSTA              7
VENETO                    84
Name: insegnante, dtype: int64

Insegnanti per categoria

In [26]:
display(done.groupby(['categoria'])['insegnante'].nunique())
categoria
kilo    429
mega    345
giga    254
tera    143
peta    120
Name: insegnante, dtype: int64

Squadre per categoria

In [27]:
done.groupby(['regione', 'categoria']).count()
Out[27]:
insegnante login status risultato data
regione categoria
ABRUZZO kilo 96.0 96.0 96.0 96.0 96.0
mega 46.0 46.0 46.0 46.0 46.0
giga 21.0 21.0 21.0 21.0 21.0
tera 22.0 22.0 22.0 22.0 22.0
peta 13.0 13.0 13.0 13.0 13.0
BASILICATA kilo 5.0 5.0 5.0 5.0 5.0
mega 30.0 30.0 30.0 30.0 30.0
giga 31.0 31.0 31.0 31.0 31.0
tera 10.0 10.0 10.0 10.0 10.0
peta NaN NaN NaN NaN NaN
CALABRIA kilo 55.0 55.0 55.0 55.0 55.0
mega 37.0 37.0 37.0 37.0 37.0
giga 26.0 26.0 26.0 26.0 26.0
tera 67.0 67.0 67.0 67.0 67.0
peta 12.0 12.0 12.0 12.0 12.0
CAMPANIA kilo 561.0 561.0 561.0 561.0 561.0
mega 434.0 434.0 434.0 434.0 434.0
giga 171.0 171.0 171.0 171.0 171.0
tera 124.0 124.0 124.0 124.0 124.0
peta 163.0 163.0 163.0 163.0 163.0
EMILIA-ROMAGNA kilo 77.0 77.0 77.0 77.0 77.0
mega 193.0 193.0 193.0 193.0 193.0
giga 88.0 88.0 88.0 88.0 88.0
tera 160.0 160.0 160.0 160.0 160.0
peta 91.0 91.0 91.0 91.0 91.0
FRIULI-VENEZIA GIULIA kilo 3.0 3.0 3.0 3.0 3.0
mega 100.0 100.0 100.0 100.0 100.0
giga 31.0 31.0 31.0 31.0 31.0
tera 112.0 112.0 112.0 112.0 112.0
peta 49.0 49.0 49.0 49.0 49.0
LAZIO kilo 401.0 401.0 401.0 401.0 401.0
mega 123.0 123.0 123.0 123.0 123.0
giga 85.0 85.0 85.0 85.0 85.0
tera 173.0 173.0 173.0 173.0 173.0
peta 153.0 153.0 153.0 153.0 153.0
LIGURIA kilo 69.0 69.0 69.0 69.0 69.0
mega 85.0 85.0 85.0 85.0 85.0
giga 42.0 42.0 42.0 42.0 42.0
tera 229.0 229.0 229.0 229.0 229.0
peta 193.0 193.0 193.0 193.0 193.0
LOMBARDIA kilo 845.0 845.0 845.0 845.0 845.0
mega 919.0 919.0 919.0 919.0 919.0
giga 400.0 400.0 400.0 400.0 400.0
tera 357.0 357.0 357.0 357.0 357.0
peta 219.0 219.0 219.0 219.0 219.0
MARCHE kilo 18.0 18.0 18.0 18.0 18.0
mega 124.0 124.0 124.0 124.0 124.0
giga 45.0 45.0 45.0 45.0 45.0
tera 59.0 59.0 59.0 59.0 59.0
peta 49.0 49.0 49.0 49.0 49.0
MOLISE kilo 15.0 15.0 15.0 15.0 15.0
mega 55.0 55.0 55.0 55.0 55.0
giga 29.0 29.0 29.0 29.0 29.0
tera NaN NaN NaN NaN NaN
peta 9.0 9.0 9.0 9.0 9.0
PIEMONTE kilo 217.0 217.0 217.0 217.0 217.0
mega 285.0 285.0 285.0 285.0 285.0
giga 158.0 158.0 158.0 158.0 158.0
tera 92.0 92.0 92.0 92.0 92.0
peta 82.0 82.0 82.0 82.0 82.0
PUGLIA kilo 383.0 383.0 383.0 383.0 383.0
mega 427.0 427.0 427.0 427.0 427.0
giga 199.0 199.0 199.0 199.0 199.0
tera 43.0 43.0 43.0 43.0 43.0
peta 11.0 11.0 11.0 11.0 11.0
SARDEGNA kilo 15.0 15.0 15.0 15.0 15.0
mega 96.0 96.0 96.0 96.0 96.0
giga 53.0 53.0 53.0 53.0 53.0
tera 7.0 7.0 7.0 7.0 7.0
peta 14.0 14.0 14.0 14.0 14.0
SICILIA kilo 97.0 97.0 97.0 97.0 97.0
mega 121.0 121.0 121.0 121.0 121.0
giga 71.0 71.0 71.0 71.0 71.0
tera 63.0 63.0 63.0 63.0 63.0
peta 54.0 54.0 54.0 54.0 54.0
TOSCANA kilo 94.0 94.0 94.0 94.0 94.0
mega 193.0 193.0 193.0 193.0 193.0
giga 70.0 70.0 70.0 70.0 70.0
tera 19.0 19.0 19.0 19.0 19.0
peta 17.0 17.0 17.0 17.0 17.0
TRENTINO-ALTO ADIGE kilo 6.0 6.0 6.0 6.0 6.0
mega 92.0 92.0 92.0 92.0 92.0
giga 55.0 55.0 55.0 55.0 55.0
tera 17.0 17.0 17.0 17.0 17.0
peta 11.0 11.0 11.0 11.0 11.0
UMBRIA kilo 43.0 43.0 43.0 43.0 43.0
mega 40.0 40.0 40.0 40.0 40.0
giga 9.0 9.0 9.0 9.0 9.0
tera 8.0 8.0 8.0 8.0 8.0
peta 1.0 1.0 1.0 1.0 1.0
VALLE D'AOSTA kilo 9.0 9.0 9.0 9.0 9.0
mega 32.0 32.0 32.0 32.0 32.0
giga 15.0 15.0 15.0 15.0 15.0
tera NaN NaN NaN NaN NaN
peta NaN NaN NaN NaN NaN
VENETO kilo 315.0 315.0 315.0 315.0 315.0
mega 301.0 301.0 301.0 301.0 301.0
giga 173.0 173.0 173.0 173.0 173.0
tera 335.0 335.0 335.0 335.0 335.0
peta 207.0 207.0 207.0 207.0 207.0
In [ ]: