Première version
This commit is contained in:
96069
scripts/finess-clean.csv
Normal file
96069
scripts/finess-clean.csv
Normal file
File diff suppressed because it is too large
Load Diff
162
scripts/finess-clean.py
Normal file
162
scripts/finess-clean.py
Normal file
@ -0,0 +1,162 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# ---
|
||||
# jupyter:
|
||||
# jupytext:
|
||||
# formats: ipynb,py:hydrogen
|
||||
# text_representation:
|
||||
# extension: .py
|
||||
# format_name: hydrogen
|
||||
# format_version: '1.3'
|
||||
# jupytext_version: 1.14.1
|
||||
# kernelspec:
|
||||
# display_name: Python 3 (ipykernel)
|
||||
# language: python
|
||||
# name: python3
|
||||
# ---
|
||||
|
||||
# %% [markdown]
|
||||
# # Production d'un csv utilisable de la base FINESS
|
||||
#
|
||||
# En l'état, l'export CSV de la [base FINESS][finess] n'est pas vraiment satisfaisant et utilisable.
|
||||
#
|
||||
# - Le fichier n'est pas réellement un CSV.
|
||||
# - Il est bizarrement découpé en deux sections qui correspondent au XML.
|
||||
# - Les colonnes n'ont pas de nom.
|
||||
# - Le fichier est encodé au format windows.
|
||||
#
|
||||
# [finess]: https://www.data.gouv.fr/en/datasets/finess-extraction-du-fichier-des-etablissements/
|
||||
|
||||
# %% gradient={"editing": false, "id": "4facc182", "kernelId": ""}
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import requests
|
||||
|
||||
# %% gradient={"editing": false, "id": "3f7b5d32", "kernelId": ""}
|
||||
dataset_api = "https://www.data.gouv.fr/api/1/datasets/finess-extraction-du-fichier-des-etablissements/"
|
||||
|
||||
# %% gradient={"editing": false, "id": "58d641d4", "kernelId": ""}
|
||||
resources = (requests
|
||||
.get(dataset_api)
|
||||
.json()
|
||||
['resources']
|
||||
)
|
||||
|
||||
resource_geoloc = [ r for r in resources if r['type'] == 'main' and 'géolocalisés' in r['title']][0]
|
||||
|
||||
# %% gradient={"editing": false, "id": "13dd939b", "kernelId": ""}
|
||||
headers = [
|
||||
'section',
|
||||
'nofinesset',
|
||||
'nofinessej',
|
||||
'rs',
|
||||
'rslongue',
|
||||
'complrs',
|
||||
'compldistrib',
|
||||
'numvoie',
|
||||
'typvoie',
|
||||
'voie',
|
||||
'compvoie',
|
||||
'lieuditbp',
|
||||
'commune',
|
||||
'departement',
|
||||
'libdepartement',
|
||||
'ligneacheminement',
|
||||
'telephone',
|
||||
'telecopie',
|
||||
'categetab',
|
||||
'libcategetab',
|
||||
'categagretab',
|
||||
'libcategagretab',
|
||||
'siret',
|
||||
'codeape',
|
||||
'codemft',
|
||||
'libmft',
|
||||
'codesph',
|
||||
'libsph',
|
||||
'dateouv',
|
||||
'dateautor',
|
||||
'maj',
|
||||
'numuai'
|
||||
]
|
||||
|
||||
# %% gradient={"editing": false, "id": "b68dac89", "kernelId": ""}
|
||||
geoloc_names = [
|
||||
'nofinesset',
|
||||
'coordxet',
|
||||
'coordyet',
|
||||
'sourcecoordet',
|
||||
'datemaj'
|
||||
]
|
||||
|
||||
# %% gradient={"editing": false, "id": "4492d3dd", "kernelId": ""}
|
||||
raw_df = (pd
|
||||
.read_csv(resource_geoloc['url'],
|
||||
sep=";", encoding="utf-8", header=None, skiprows=1,
|
||||
dtype='str',
|
||||
names=headers)
|
||||
.drop(columns=['section'])
|
||||
)
|
||||
|
||||
raw_df
|
||||
|
||||
# %% gradient={"editing": false, "id": "2efc14bc", "kernelId": ""}
|
||||
structures = (raw_df
|
||||
.iloc[:int(raw_df.index.size/2)]
|
||||
)
|
||||
|
||||
structures
|
||||
|
||||
# %% gradient={"editing": false, "id": "283be3bb", "kernelId": ""}
|
||||
geolocalisations = (raw_df
|
||||
.iloc[int(raw_df.index.size/2):]
|
||||
.drop(columns=raw_df.columns[5:])
|
||||
.rename(columns=lambda x: geoloc_names[list(raw_df.columns).index(x)])
|
||||
)
|
||||
|
||||
geolocalisations
|
||||
|
||||
# %% gradient={"editing": false, "id": "b54e527e", "kernelId": ""}
|
||||
clean_df = (structures
|
||||
.merge(geolocalisations, on="nofinesset", how="left")
|
||||
)
|
||||
|
||||
clean_df
|
||||
|
||||
# %%
|
||||
clean_df.sample().T
|
||||
|
||||
# %%
|
||||
clean_df["siret"]
|
||||
|
||||
# %% [markdown] gradient={"editing": false, "id": "82306369-229c-418f-9138-d753e1b71ce4", "kernelId": ""}
|
||||
# ## Vérification de la qualité des données
|
||||
|
||||
# %% gradient={"editing": false, "id": "64975e82-5f97-4bb4-b1d3-8aed85fa37cd", "kernelId": "", "source_hidden": false} jupyter={"outputs_hidden": false}
|
||||
intersection = pd.Series(np.intersect1d(structures.nofinesset.values, geolocalisations.nofinesset.values))
|
||||
|
||||
intersection.shape
|
||||
|
||||
# %% gradient={"editing": false, "id": "07e3c1cb-7032-4d83-833c-0979d2592f3c", "kernelId": "", "source_hidden": false} jupyter={"outputs_hidden": false}
|
||||
only_structures = (structures
|
||||
[ ~structures.nofinesset.isin(intersection) ]
|
||||
)
|
||||
|
||||
only_structures
|
||||
|
||||
# %% gradient={"editing": false, "id": "cfb13e95-b622-4d89-be56-61397dc4370e", "kernelId": "", "source_hidden": false} jupyter={"outputs_hidden": false}
|
||||
only_geolocalisations = (geolocalisations
|
||||
[ ~geolocalisations.nofinesset.isin(intersection) ]
|
||||
)
|
||||
|
||||
only_geolocalisations
|
||||
|
||||
# %% gradient={"editing": false, "id": "92cd9e34-74c8-454c-96d8-3c628e7b94bd", "kernelId": "", "source_hidden": false} jupyter={"outputs_hidden": false}
|
||||
geolocalisations_missing = []
|
||||
|
||||
# %% [markdown] gradient={"editing": false, "id": "ff24d2da-6b7e-49ca-8ac9-cc1e90d32235", "kernelId": ""}
|
||||
# ## Export final
|
||||
|
||||
# %% gradient={"editing": false, "id": "8f6f3c73-4c14-4e82-ac63-cdf9ab8e4b21", "kernelId": "", "source_hidden": false} jupyter={"outputs_hidden": false}
|
||||
clean_df.to_csv('finess-clean.csv', encoding='utf-8')
|
||||
|
||||
# %%
|
104
scripts/finess-sisa.py
Normal file
104
scripts/finess-sisa.py
Normal file
@ -0,0 +1,104 @@
|
||||
# import pandas with shortcut 'pd'
|
||||
import pandas as pd
|
||||
import os
|
||||
from pyproj import Transformer, transform
|
||||
|
||||
transformer = Transformer.from_crs(2154, 4326)
|
||||
|
||||
headers = [
|
||||
'section',
|
||||
'nofinesset',
|
||||
'nofinessej',
|
||||
'rs',
|
||||
'rslongue',
|
||||
'complrs',
|
||||
'compldistrib',
|
||||
'numvoie',
|
||||
'typvoie',
|
||||
'voie',
|
||||
'compvoie',
|
||||
'lieuditbp',
|
||||
'commune',
|
||||
'departement',
|
||||
'libdepartement',
|
||||
'ligneacheminement',
|
||||
'telephone',
|
||||
'telecopie',
|
||||
'categetab',
|
||||
'libcategetab',
|
||||
'categagretab',
|
||||
'libcategagretab',
|
||||
'siret',
|
||||
'codeape',
|
||||
'codemft',
|
||||
'libmft',
|
||||
'codesph',
|
||||
'libsph',
|
||||
'dateouv',
|
||||
'dateautor',
|
||||
'maj',
|
||||
'numuai',
|
||||
'coordxet',
|
||||
'coordyet',
|
||||
'sourcecoordet',
|
||||
'datemaj'
|
||||
]
|
||||
|
||||
# read_csv function which is used to read the required CSV file
|
||||
data = pd.read_csv('./finess-clean.csv', sep=",", dtype='str', names=headers)
|
||||
|
||||
# display
|
||||
#print("Original 'input.csv' CSV Data: \n")
|
||||
#print(data)
|
||||
|
||||
header_drop = [
|
||||
'section',
|
||||
# 'nofinesset',
|
||||
'nofinessej',
|
||||
#'rs',
|
||||
#'rslongue',
|
||||
'complrs',
|
||||
'compldistrib',
|
||||
'numvoie',
|
||||
'typvoie',
|
||||
'voie',
|
||||
'compvoie',
|
||||
'lieuditbp',
|
||||
'commune',
|
||||
#'departement',
|
||||
'libdepartement',
|
||||
#'ligneacheminement',
|
||||
#'telephone',
|
||||
'telecopie',
|
||||
#'categetab',
|
||||
'libcategetab',
|
||||
'categagretab',
|
||||
'libcategagretab',
|
||||
#'siret',
|
||||
'codeape',
|
||||
'codemft',
|
||||
'libmft',
|
||||
'codesph',
|
||||
'libsph',
|
||||
'dateouv',
|
||||
'dateautor',
|
||||
'maj',
|
||||
'numuai',
|
||||
#'coordxet',
|
||||
#'coordyet',
|
||||
'sourcecoordet',
|
||||
'datemaj'
|
||||
]
|
||||
|
||||
data = data.query('categetab == "603" or categetab == "620"')
|
||||
|
||||
# drop function which is used in removing or deleting rows or columns from the CSV files
|
||||
data.drop(header_drop, inplace=True, axis=1)
|
||||
|
||||
def convertCoord (row):
|
||||
row.coordxet, row.coordyet = transformer.transform(row.coordxet, row.coordyet)
|
||||
return row
|
||||
|
||||
data.transform(convertCoord, axis=1)
|
||||
|
||||
data.to_json('../static/data.json', orient='values') #https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_json.html?highlight=to_json#pandas.DataFrame.to_json
|
27
scripts/import-from-directus/index.js
Normal file
27
scripts/import-from-directus/index.js
Normal file
@ -0,0 +1,27 @@
|
||||
import { Directus } from '@directus/sdk';
|
||||
import fs from 'fs';
|
||||
|
||||
const directus_url = "https://formulaire.p4pillon.org"
|
||||
const directus = new Directus(directus_url);
|
||||
|
||||
async function getMspInfo() {
|
||||
const response = await directus.items('MSP_INFO').readByQuery({ sort: ['nofinesset']});
|
||||
|
||||
return response.data
|
||||
}
|
||||
|
||||
const mspInfo = await getMspInfo();
|
||||
const data = {}
|
||||
for (const msp of mspInfo) {
|
||||
const nofinesset = msp.nofinesset
|
||||
const info = msp
|
||||
delete msp.nofinesset
|
||||
data[nofinesset] = [
|
||||
msp.nom,
|
||||
msp.prenom_leader,
|
||||
msp.nom_leader,
|
||||
msp.avec_sante,
|
||||
msp.accord_conventionnel_interprofessionnel,
|
||||
];
|
||||
}
|
||||
fs.writeFileSync('../../static/data_p4pillon.json', JSON.stringify(data, null, 2) , 'utf-8');
|
178
scripts/import-from-directus/package-lock.json
generated
Normal file
178
scripts/import-from-directus/package-lock.json
generated
Normal file
@ -0,0 +1,178 @@
|
||||
{
|
||||
"name": "directus-to-markdown",
|
||||
"version": "0.0.0",
|
||||
"lockfileVersion": 2,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "directus-to-markdown",
|
||||
"version": "0.0.0",
|
||||
"dependencies": {
|
||||
"@directus/sdk": "^10.3.1"
|
||||
}
|
||||
},
|
||||
"../../themes/hugo-theme-lowtech/node_modules/@resilien/directus-to-markdown": {
|
||||
"extraneous": true
|
||||
},
|
||||
"../../themes/hugo-theme-lowtech/node_modules/url-slug": {
|
||||
"extraneous": true
|
||||
},
|
||||
"node_modules/@directus/sdk": {
|
||||
"version": "10.3.1",
|
||||
"resolved": "https://registry.npmjs.org/@directus/sdk/-/sdk-10.3.1.tgz",
|
||||
"integrity": "sha512-+FUs1kQ27dmrHbAxO+FmCmmAHZrzyyZn+cXZMCtixkeBD8KYBFM7sUKtesQskSmsp5wUksrq2L9Cm+Z93G/ONg==",
|
||||
"dependencies": {
|
||||
"axios": "^0.27.2"
|
||||
}
|
||||
},
|
||||
"node_modules/asynckit": {
|
||||
"version": "0.4.0",
|
||||
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
||||
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
|
||||
},
|
||||
"node_modules/axios": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-0.27.2.tgz",
|
||||
"integrity": "sha512-t+yRIyySRTp/wua5xEr+z1q60QmLq8ABsS5O9Me1AsE5dfKqgnCFzwiCZZ/cGNd1lq4/7akDWMxdhVlucjmnOQ==",
|
||||
"dependencies": {
|
||||
"follow-redirects": "^1.14.9",
|
||||
"form-data": "^4.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/combined-stream": {
|
||||
"version": "1.0.8",
|
||||
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
|
||||
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
|
||||
"dependencies": {
|
||||
"delayed-stream": "~1.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/delayed-stream": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
||||
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
|
||||
"engines": {
|
||||
"node": ">=0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/follow-redirects": {
|
||||
"version": "1.15.2",
|
||||
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
|
||||
"integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "individual",
|
||||
"url": "https://github.com/sponsors/RubenVerborgh"
|
||||
}
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=4.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"debug": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/form-data": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
||||
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
||||
"dependencies": {
|
||||
"asynckit": "^0.4.0",
|
||||
"combined-stream": "^1.0.8",
|
||||
"mime-types": "^2.1.12"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/mime-db": {
|
||||
"version": "1.52.0",
|
||||
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
|
||||
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
|
||||
"engines": {
|
||||
"node": ">= 0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/mime-types": {
|
||||
"version": "2.1.35",
|
||||
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
|
||||
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
|
||||
"dependencies": {
|
||||
"mime-db": "1.52.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.6"
|
||||
}
|
||||
}
|
||||
},
|
||||
"dependencies": {
|
||||
"@directus/sdk": {
|
||||
"version": "10.3.1",
|
||||
"resolved": "https://registry.npmjs.org/@directus/sdk/-/sdk-10.3.1.tgz",
|
||||
"integrity": "sha512-+FUs1kQ27dmrHbAxO+FmCmmAHZrzyyZn+cXZMCtixkeBD8KYBFM7sUKtesQskSmsp5wUksrq2L9Cm+Z93G/ONg==",
|
||||
"requires": {
|
||||
"axios": "^0.27.2"
|
||||
}
|
||||
},
|
||||
"asynckit": {
|
||||
"version": "0.4.0",
|
||||
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
||||
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
|
||||
},
|
||||
"axios": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-0.27.2.tgz",
|
||||
"integrity": "sha512-t+yRIyySRTp/wua5xEr+z1q60QmLq8ABsS5O9Me1AsE5dfKqgnCFzwiCZZ/cGNd1lq4/7akDWMxdhVlucjmnOQ==",
|
||||
"requires": {
|
||||
"follow-redirects": "^1.14.9",
|
||||
"form-data": "^4.0.0"
|
||||
}
|
||||
},
|
||||
"combined-stream": {
|
||||
"version": "1.0.8",
|
||||
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
|
||||
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
|
||||
"requires": {
|
||||
"delayed-stream": "~1.0.0"
|
||||
}
|
||||
},
|
||||
"delayed-stream": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
||||
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ=="
|
||||
},
|
||||
"follow-redirects": {
|
||||
"version": "1.15.2",
|
||||
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
|
||||
"integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA=="
|
||||
},
|
||||
"form-data": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
||||
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
||||
"requires": {
|
||||
"asynckit": "^0.4.0",
|
||||
"combined-stream": "^1.0.8",
|
||||
"mime-types": "^2.1.12"
|
||||
}
|
||||
},
|
||||
"mime-db": {
|
||||
"version": "1.52.0",
|
||||
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
|
||||
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg=="
|
||||
},
|
||||
"mime-types": {
|
||||
"version": "2.1.35",
|
||||
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
|
||||
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
|
||||
"requires": {
|
||||
"mime-db": "1.52.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
10
scripts/import-from-directus/package.json
Normal file
10
scripts/import-from-directus/package.json
Normal file
@ -0,0 +1,10 @@
|
||||
{
|
||||
"name": "annuaire-p4pillon",
|
||||
"version": "0.0.0",
|
||||
"description": "Import Directus.io to gohugo.io",
|
||||
"main": "index.js",
|
||||
"type": "module",
|
||||
"dependencies": {
|
||||
"@directus/sdk": "^10.3.1"
|
||||
}
|
||||
}
|
3
scripts/requirements.txt
Normal file
3
scripts/requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
pandas==1.5.0
|
||||
requests==2.28.1
|
||||
pyproj==3.4.0
|
Reference in New Issue
Block a user