From 31e32f91c08b1da950de33033bfa8970af6235e5 Mon Sep 17 00:00:00 2001 From: Guilian Celin--Davanture Date: Sun, 10 Dec 2023 21:33:34 +0100 Subject: [PATCH] big commit --- enrich.js | 96 +++++++++++++++++++++++ insee.js | 216 +++++++++++++++++++++++++++++++++++++++++++++++++++ osm.js | 136 ++++++++++++++++++++++++++++++++ package.json | 9 +++ 4 files changed, 457 insertions(+) create mode 100644 enrich.js create mode 100644 insee.js create mode 100644 osm.js create mode 100644 package.json diff --git a/enrich.js b/enrich.js new file mode 100644 index 0000000..1f1f4e8 --- /dev/null +++ b/enrich.js @@ -0,0 +1,96 @@ + + +async function merge(geo, data, correction_arrondissement = false) { + const features = {}; + for( const f of geo.features ) { + // les arrondissements ont un code type "91001" + // dans ce dataset ; c'est faux. Les codes + // d'arrondissements sont de type 911. + if( correction_arrondissement ) { + const code = f.properties.code.substring(0, 2) + f.properties.code.at(-1); + f.properties.code = code; + features[code] = f; + } + else { + features[f.properties.code] = f; + } + } + console.log(`Got ${Object.keys(features).length} features`) + + + // cet objet contiendra un sous-ensemble des zones + // présentes dans features. + const impots_dataset = {} + for( const node of data ) { + if( node !== null ) { + impots_dataset[node.code] = node; + } + } + + + const results = [] + for( const [code, feature] of Object.entries(features) ) { + const node = impots_dataset[code]; + + if( node ) { + results.push({ + ...feature, + properties: node, + }); + } + else { + results.push(feature); + } + } + + //for( const node of data ) { + // try { + // if(node === null) { + // continue; + // } + + // const obj = features[node.code]; + + // // node contient déjà "code" et "nom", + // // les deux seules propriétés présentes + // // dans le GeoJSON + // obj.properties = node; + + // results.push(obj); + // } + // catch(e) { + // error_count++; + // //console.error(node.code) + // //console.error(features[node.code]) + // //console.error(e); + // //process.exit() + // } + //} + + //console.log('Got ' + error_count + ' errors.') + + return { + ...geo, + features: results + }; +} + + +(async () => { + const dep_geo = await Bun.file("./departements.geojson").json(); + const arr_geo = await Bun.file("./arrondissements.geojson").json(); + const com_geo = await Bun.file("./communes.geojson").json(); + + const departements = await Bun.file("./full-data_departements.json").json(); + const arrondissements = await Bun.file("./full-data_arrondissements.json").json(); + const communes = await Bun.file("./full-data_communes.json").json(); + + const deps = await merge(dep_geo, departements); + const arrs = await merge(arr_geo, arrondissements, true); + const coms = await merge(com_geo, communes); + + + await Bun.write(`./_DEPARTEMENTS.geojson`, JSON.stringify(deps, null, 4)); + await Bun.write(`./_ARRONDISSEMENTS.geojson`, JSON.stringify(arrs, null, 4)); + await Bun.write(`./_COMMUNES.geojson`, JSON.stringify(coms, null, 4)); +})(); diff --git a/insee.js b/insee.js new file mode 100644 index 0000000..1525435 --- /dev/null +++ b/insee.js @@ -0,0 +1,216 @@ + + +const precision = { + commune: 'COM', + departement: 'DEP', + arrondissement: 'ARR', +} + +const API_KEYS = [ + "7f113b8e-b1ca-36c0-ae0b-4f038f941241", // original app + "aaaf836f-0cc9-3b6e-9f43-11c7c277ba38", // app 2 + "5511ea5b-442d-37f3-bb09-11f6af8a3e79", // app 3 + "3ce290d1-2bfa-38d9-b6a1-b4170765039d", // app 4 + "e5f49c24-6a4f-328e-9113-bcd06ff033eb", // app 5 + "5f252c10-64c8-3956-9467-2e1d0c0cc942", // app 6 + "dfebb8ea-427d-3b62-a62f-5e4b220e094d", // app 7 + "0781ac0b-fd37-3f10-b43a-7797eca237eb", // app 8 + "5f47c3ea-4311-311b-95e1-c2ed6c813839", // app 9 + "bde457a2-0de3-3c59-9190-df850c7facfd", // app 10 + "ceec0915-199c-304f-ae4d-bce9edd694cc", // app 11 + "80074dcf-3ded-3bd7-abb7-a7de94e4fb6a", // app 12 + "5de51d31-2580-3c14-8e6d-5e46f5d9da98", // app 13 + "22e87dc0-73e8-309c-9a41-e886808fd652", // app 14 + "da8be43f-baff-399d-a94d-574e0d7fee62", // app 15 + "92146137-957e-32c2-b309-8a8e656c4228", // app 16 + "3d79910f-b0d6-30af-a26f-65a6ff7d90b3", // app 17 + "af6eb70c-8a9c-351b-a7de-b39dcc92cb98", // app 18 + "c07501c9-1f03-362a-9e87-aaa5f071a839", // app 19 + "c8602bc1-e794-3d18-8d1a-c5ace7049fdb", // app 20 + "7295921f-0bf4-31d5-824e-1021084adcbf", // app 21 + "1fb4f0b7-e667-35ca-b905-1038e7321d95", // app 22 + "98e0c55b-bd34-3503-9c62-41a49aeffd91", // app 23 + "0fdd5b94-3501-3dd8-8947-0c14539f18ee", // app 24 + "c274217f-9052-3bff-9efd-c4a5ade8e86e", // app 25 + "3c8be149-a666-32ea-9194-73abbfcb290f", // app 26 + "673d5d64-add0-39c3-894d-76eff7d33d79", // app 27 + "eede4152-5fcf-3d21-aa60-5c5b5da74dc6", // app 28 + "ac0d96fa-f4be-335b-9b5f-04db1360b84b", // app 29 + "ad785a7b-8138-316d-842e-255058188620", // app 30 +]; + +// limit imposed by INSEE +const MAX_RPM = 30; +// 60 seconds divided by RPM => wait time (in seconds) per request (for one api key) +const WAIT_TIME_FOR_SINGLE = 60 / MAX_RPM; + +// wait time between each loop iteration, in milliseconds, +// so as to maximize the number of api calls +// by switching to the next api key +const WAIT_TIME_MILLIS = ((WAIT_TIME_FOR_SINGLE / API_KEYS.length) * 1000); + +// add little margin of error to be sure +const WAIT_TIME = WAIT_TIME_MILLIS + ( WAIT_TIME_MILLIS * 0.1 ); + + +console.log('='.repeat(80)); +console.log(`Got ${API_KEYS.length} API keys :`); +console.log(`Going to wait ${WAIT_TIME}ms between each call.`); +console.log('='.repeat(80)); +console.log(); + +let CALL_COUNTER = 0; + +async function request_data(code, granularity='COM'){ + //console.log(`Requesting ${granularity} : ${code}`) + + const api_n = CALL_COUNTER % API_KEYS.length; + const api_key = `Bearer ${API_KEYS[api_n]}`; + + CALL_COUNTER += 1; + + const res = await fetch(`https://api.insee.fr/donnees-locales/V0.1/donnees/geo-INDICS_FILO_DISP_DET@GEO2023FILO2020_BV/${granularity}-${code}.ALL`, { + headers: { + 'Accept': 'application/json', + "Authorization": api_key, + } + }); + + let text; + let data; + try { + text = await res.text(); + + data = JSON.parse(text); + } + catch(e){ + if( text.trim() === 'Aucune zone ne correspond à la requête' ) { + //console.error('No data.') + console.log(`[\x1b[31m${granularity}-${code}\x1b[0m] ... \x1b[0m`); + return null; + } + else { + console.error(`[WARNING] Error parsing data ! for ${code} at granularity "${granularity}"`) + } + return undefined; + } + + const commune = { + code: code, + }; + + commune.nom = data.Zone?.Millesime?.Nccenr; + + if( commune.nom === undefined ) { + console.error(`[WARNING] Unnamed zone ! for ${code} at granularity "${granularity}"`) + return undefined; + } + + if( data.Cellule ) { + const cellules_impots = data.Cellule.filter( cell => { + if( cell ) { + return cell.Mesure["@code"] === 'PIMPOT'; + } else { + return false; + } + }) + + commune.impots = Math.abs( Number(cellules_impots[0].Valeur) ); + } + else { + //console.error(`No Cellule data for ${code}`); + //console.log(data); + console.log(`[\x1b[31m${granularity}-${code}\x1b[0m] ${commune.nom} \x1b[0m`); + return undefined; + } + + console.log(`[\x1b[1;32m${granularity}-${code}\x1b[0m\x1b[1m] ${commune.nom} : \x1b[1;33m${commune.impots}\x1b[0m`); + return commune; +} + +async function sleep(ms) { + return await new Promise((resolve, reject) => setTimeout(resolve, ms)); +} + + + +(async () => { + const com_data = []; + const dep_data = []; + + for( let dep = 1; dep < 99; dep++ ) { + const code_departement = String(dep).padStart(2, '0'); + const departement = await request_data(code_departement, precision.departement); + + if( departement ) { + dep_data.push(departement); + + let empty_in_a_row = 0; + + // -------- + // Communes + // -------- + // + //for( let com = 1; com < 999; com++ ) { + // const code_commune = code_departement + String(com).padStart(3, '0'); + // const commune = await request_data(code_commune, precision.commune); + + // if( commune ) { + // empty_in_a_row = 0; + // com_data.push(commune); + // } + // else if ( commune === null ) { + // empty_in_a_row += 1; + + // if(empty_in_a_row > 100) { + // console.log(`Got 100 empty data in a row : assuming no more data for dep.`) + // break; + // } + // } + // + // await sleep(WAIT_TIME); + //} + + // --------------- + // Arrondissements + // --------------- + // + for( let com = 1; com < 10; com++ ) { + const code_commune = code_departement + String(com)//.padStart(3, '0'); + const commune = await request_data(code_commune, precision.arrondissement); + + if( commune ) { + empty_in_a_row = 0; + com_data.push(commune); + } + else if ( commune === null ) { + empty_in_a_row += 1; + + if(empty_in_a_row > 100) { + console.log(`Got 100 empty data in a row : assuming no more data for dep.`) + break; + } + } + + await sleep(WAIT_TIME); + } + + + } + + //console.log(`\n==> Saving communes for dep ${code_departement}...`) + //await Bun.write(`./communes-${code_departement}.json`, JSON.stringify( com_data, null, 4 )); + //console.log('==> Done.\n') + console.log(`\n==> Saving arrondissements for dep ${code_departement}...`) + await Bun.write(`./arrondissements-${code_departement}.json`, JSON.stringify( com_data, null, 4 )); + console.log('==> Done.\n') + await sleep(WAIT_TIME); + } + + //await Bun.write('./departements.json', JSON.stringify( dep_data, null, 4 )); + + + //console.log(dep_data) + console.log('Done !') +})(); + diff --git a/osm.js b/osm.js new file mode 100644 index 0000000..8fd64d3 --- /dev/null +++ b/osm.js @@ -0,0 +1,136 @@ +import pLimit from 'p-limit'; + + +const overpass_endpoint = 'https://overpass-api.de/api/interpreter'; +// Const overpass_endpoint = "https://maps.mail.ru/osm/tools/overpass/api/interpreter"; +//const overpass_endpoint = "https://overpass.kumi.systems/api/interpreter"; + +async function get_road_length(insee_code) { + console.log(`Getting data for INSEE:${insee_code}`) + try { + const res = await fetch( + overpass_endpoint, + { + method: 'POST', + headers: { + Pragma: 'no-cache', + 'Cache-Control': 'no-cache', + }, + referrer: overpass_endpoint, + body: 'data=' + encodeURIComponent(` + [out:json]; + area["ref:INSEE"="${insee_code}"]->.searchArea; + way(area.searchArea); + for(t["highway"]) + { + make stat "highway"=_.val, number=count(ways),length=sum(length()); + out; + }; + out body; + `), + }, + ); + + if (String(res.status)[0] !== '2') { + console.error(`Error ${res.status}`); + return; + } + + const text = await res.text(); + + const json = JSON.parse(text); + + const data = json.elements; + + return data.map(road => ({ + type: road.tags.highway, + count: Number(road.tags.number), + // Len because I THINK js is making 'length' disappear in console.log + // which drives me nuts + len: Number(road.tags.length), + })); + } catch (error) { + console.error(error); + return undefined; + } +} + +// https://wiki.openstreetmap.org/wiki/Key:highway . +const actual_roads_types = new Set([ + 'motorway', + 'trunk', + 'busway', + 'living_street', + 'residential', + 'primary', + 'secondary', + 'tertiary', + 'unclassified', + 'road', + + 'motorway_link', + 'trunk_link', + 'primary_link', + 'secondary_link', + 'tertiary_link', + +]); + +async function get_data(node) { + const roads = (await get_road_length(node.code)); + + if (roads === undefined) { + console.log(`[\u001B[31m${node.code}\u001B[0m] ${node.nom} \u001B[0m`); + return undefined; + } + else { + const sum = roads.map(r => r.len).reduce((previous, curr) => previous + curr, 0); + + const filtered_sum = roads.filter(r => actual_roads_types.has(r.type)).map(r => r.len).reduce((previous, curr) => previous + curr, 0); + + // Km / point d'impot + const ratio = (filtered_sum / 1000) / node.impots; + + console.log(`[\u001B[1;32m${node.code}\u001B[0m\u001B[1m] ${node.nom} : \u001B[1;33m${Math.round(filtered_sum / 1000)}km\u001B[0m of road for \u001B[1;33m${node.impots}%\u001B[0m impôts => \u001B[1;35m${ratio}\u001B[0m.`); + + return { + ...node, + + road_length: filtered_sum, + ratio, + + total_highways: sum, + + roads_data: roads, + }; + } +} + +async function get_data_for_each(file) { + const json = await Bun.file(file).json(); + + const limit = pLimit(6); + const result = await Promise.all( json.map( el => limit(() => get_data(el)) ) ); + + const results_normalized = result.sort( (a,b) => Number(a.code) - Number(b.code) ); + + //console.log(results_normalized); + + console.log('\n==> Saving...'); + await Bun.write(`./full-data_${file}`, JSON.stringify(results_normalized, null, 4)); + console.log('==> Done.\n'); + + //for (const r of results_normalized) { + // console.log(`[\u001B[1;32m${r.code}\u001B[0m\u001B[1m] ${r.nom} : \u001B[1;35m${r.ratio_normalized}\u001B[0m (${r.ratio})`); + //} + + return result; +} + +(async () => { + await get_data_for_each('departements.json'); + await get_data_for_each('arrondissements.json'); + await get_data_for_each('communes.json'); + + // Get_road_length(91471).then(console.log) +})(); diff --git a/package.json b/package.json new file mode 100644 index 0000000..e63b743 --- /dev/null +++ b/package.json @@ -0,0 +1,9 @@ +{ + "name": "projet-bd", + "devDependencies": { + "bun-types": "latest" + }, + "dependencies": { + "p-limit": "^5.0.0" + } +}