depyth/index.ts

553 lines
13 KiB
TypeScript
Raw Permalink Normal View History

import path from 'node:path';
import express from 'express';
import {Liquid} from 'liquidjs';
import {RateLimit} from 'async-sema';
import fileUpload from 'express-fileupload';
import Graph from 'graphology';
import forceAtlas2 from 'graphology-layout-forceatlas2';
import circular from 'graphology-layout/circular';
import {assignLayout} from 'graphology-layout/utils';
import {singleSource, singleSourceLength} from 'graphology-shortest-path/unweighted';
import Pep440Version, {Pep440VersionRule} from './pep440version.ts';
import {version} from 'bun';
const liquidEngine = new Liquid({root: path.resolve(__dirname, 'views/'), jsTruthy: true});
const app = express();
app.use(express.json()); // To support JSON-encoded bodies
app.use(express.urlencoded()); // To support URL-encoded bodies
app.use(fileUpload({
limits: { fileSize: 50 * 1024 * 1024 },
}));
app.engine('liquid', liquidEngine.express());
app.set('views', [path.join(__dirname, 'views')]);
app.set('view engine', 'liquid');
app.use('/static', express.static('static'));
app.get('/', (request, res) => {
res.render('index');
});
declare class RateLimit {
constructor(max: number);
}
const rateLimit: any = new RateLimit(5);
function extractInfoFromPythonDepString(depstring: string) {
const parts = depstring.split(';');
const namever = parts[0];
// https://stackoverflow.com/questions/24470567/what-are-the-requirements-for-naming-python-modules
const match = namever.match(/^(?<name>[a-zA-Z_\-][\w\-]*)((?<operator>.=)(?<version>.+))?/);
if (match !== null && match.groups !== undefined) {
let rule;
if( match.groups.version ) {
const version = new Pep440Version(match.groups.version);
rule = new Pep440VersionRule(
version,
match.groups.operator
)
}
else {
rule = Pep440VersionRule.getRuleMatchingAnyVersion()
}
return {
name: match.groups.name,
rule: rule
};
}
else {
return undefined;
}
}
async function getRawPackageInfo(packagename: string) {
await rateLimit();
const pypires = await fetch(`https://pypi.org/pypi/${packagename}/json`);
const json = await pypires.json();
return json;
}
interface PipReleaseFile {
size: number;
packagetype: string;
upload_time_iso_8601: string;
[key: string]: any;
}
async function getFullPackageInfo(packagename: string, rule: Pep440VersionRule = Pep440VersionRule.getRuleMatchingAnyVersion(), fromGraph = new Graph()) {
const dependencyGraph = fromGraph;
// Analyse the requested package
const packageJson = await getRawPackageInfo(packagename);
let longestDependencyChain = 0;
//const dependenciesList = packageJson.info.requires_dist ?? [];
const recurseDepsPackageInfo = async (packagename: string, rule: Pep440VersionRule = Pep440VersionRule.getRuleMatchingAnyVersion(), descPath: string = "") => {
console.log(`[Recurse] Going to scan package ${packagename}`);
if( descPath == "" ) {
descPath = packagename;
}
else {
descPath += `/${packagename}`
}
if( dependencyGraph.hasNode(packagename) ) {
// figure out if the current path taken to this package is shorter than the one in the graph
// have to add the current package name manually as it isn't added yet
const paramPathParts: string[] = descPath.split('/');
const graphPathParts: string[] = dependencyGraph.getNodeAttribute(packagename, 'path').split('/');
if( paramPathParts.length < graphPathParts.length ) {
// if so, change the graph path to the current one
dependencyGraph.setNodeAttribute(packagename, 'path', descPath)
console.log(`[Recurse] Found better way of getting to ${packagename}: ${descPath}`);
}
const shortestDist = Math.min( paramPathParts.length, graphPathParts.length );
if( longestDependencyChain < shortestDist ) {
longestDependencyChain = shortestDist;
}
console.log(`[Recurse] Already scanned ${packagename} ; skipping.`);
return;
}
console.log(`[Recurse] Scanning package ${packagename}`);
const json = await getRawPackageInfo(packagename);
// ==================================================
// 1. get biggest file of highest possible version
// TODO: don't choose biggest file, choose in a smart way (like asking the user's OS and arch)
const releasesEntries = Object.entries(json.releases as {[key: string]: PipReleaseFile[]});
const releases: {[key: string]: PipReleaseFile[]} = {};
for( const [version, data] of releasesEntries ) {
try {
releases[new Pep440Version(version).releaseString()] = data;
}
catch {
console.log(`Failed to parse version: ${version}`);
}
}
const versions = Object.keys(releases).map( v => {
console.log(`[Recurse] Parsing version ${v} for ${packagename}`);
return new Pep440Version(v);
});
const highestVersions = rule.getSortedMatchingVersions(versions);
if( highestVersions.length === 0 ) {
throw new Error("MERDE!");
}
let highestVersion: undefined | Pep440Version = undefined;
for( const version of highestVersions ) {
if( releases[version.releaseString()].length !== 0 ) {
highestVersion = version;
break;
}
}
if( highestVersion === undefined ) {
throw new Error("MERDE!");
}
const highestRelease = releases[highestVersion.releaseString()];
const biggestFile = highestRelease.sort( (a, b) => b.size - a.size )[0];
console.log(`[Recurse] ${packagename} has highest version ${highestVersion.releaseString()}`);
// ==================================================
// 2. add to the graph
const summary = json.summary;
dependencyGraph.addNode(packagename, {
label: packagename,
size: 10,
summary: summary,
version: highestVersion.releaseString(),
weight: biggestFile.size,
path: descPath
});
/////////////////
const dependenciesList = packageJson.info.requires_dist ?? [];
console.log(`[Recurse] ${packagename} has deps:\n - ${dependenciesList.join('\n - ')}`);
for( const depString of dependenciesList ) {
console.log(`[Recurse] ${packagename} depends on: ${depString}`);
const info = extractInfoFromPythonDepString(depString);
if( info === undefined ) continue;
await recurseDepsPackageInfo(info.name, info.rule, descPath);
if( dependencyGraph.hasEdge(packagename, info.name) === false ) {
dependencyGraph.addEdge(packagename, info.name);
}
}
}
await recurseDepsPackageInfo(packagename, rule);
console.log(`Longest dependency chain is ${longestDependencyChain}`);
//const paths = singleSource(dependencyGraph, packagename)
const depthColors = [
"#D14D41",
"#DA702C",
"#D0A215",
"#879A39",
"#3AA99F",
"#4385BE",
"#8B7EC8",
"#CE5D97",
]
const lengthmap = singleSourceLength(dependencyGraph, packagename);
const longestPathLength = Math.max(...Object.values(lengthmap))
for( const [node, dist] of Object.entries(lengthmap) ) {
dependencyGraph.setNodeAttribute(node, 'color', depthColors[dist + 1] )
dependencyGraph.setNodeAttribute(node, 'actual_depth', dist )
}
dependencyGraph.forEachEdge((edge, attrs, source, target, sourceAttrs, targetAttrs) => {
const sourceDepth = sourceAttrs.actual_depth; //.path.split('/').length - 1;
const targetDepth = targetAttrs.actual_depth; //.path.split('/').length - 1;
if( sourceDepth >= targetDepth ) {
dependencyGraph.setEdgeAttribute( edge, "color", "#B7B5AC")
dependencyGraph.setEdgeAttribute( edge, "size", 1)
dependencyGraph.setEdgeAttribute( edge, "weight", 1)
}
else {
dependencyGraph.setEdgeAttribute( edge, "color", depthColors[ sourceDepth ])
dependencyGraph.setEdgeAttribute( edge, "size", 3 + 3 * (longestPathLength - sourceDepth))
dependencyGraph.setEdgeAttribute( edge, "weight", 3 + 3 * (longestPathLength - sourceDepth))
}
dependencyGraph.setEdgeAttribute( edge, "type", "arrow");
});
//circular.assign(dependencyGraph, {scale: 100});
//forceAtlas2.assign(dependencyGraph, 100)
dependencyGraph.setNodeAttribute(packagename, 'size', 20)
//dependencyGraph.setNodeAttribute(packagename, 'x', 0)
//dependencyGraph.setNodeAttribute(packagename, 'y', 0)
circular.assign(dependencyGraph, {scale: 100});
const positions = forceAtlas2(dependencyGraph, {
iterations: 50,
settings: {
edgeWeightInfluence: 1,
...forceAtlas2.inferSettings(dependencyGraph),
}
});
assignLayout(dependencyGraph, positions);
return dependencyGraph;
}
app.get('/graph/:packagename', (req, res) => {
const packagename = req.params.packagename;
res.render('graph', {
packagename: packagename,
})
});
app.get('/api/graph/:packagename', async (req, res) => {
const packagename = req.params.packagename;
const depGraph = await getFullPackageInfo(packagename);
const serializedDepGraph = depGraph.export();
let totalDownloadSize = 0;
depGraph.forEachNode((node, attrs) => {
totalDownloadSize += attrs.weight;
})
const units = [
'b',
'kb',
'Mb',
'Gb'
]
let currentUnit = 0;
while( totalDownloadSize > 1000 ) {
totalDownloadSize = totalDownloadSize / 1000;
currentUnit++;
}
const unit = units[currentUnit];
res.json({
weight: totalDownloadSize,
weightUnit: unit,
graph: serializedDepGraph
});
});
app.post('/api/upload-requirements', async (req, res) => {
// get the uploaded requirements.txt file, from the input with the name "file"
const files = req.files;
if( files === undefined || files === null ) {
res.status(400);
res.send("No file uploaded");
return;
}
const file = files.requirementsFile;
if( file === undefined || file === null ) {
res.status(400);
res.send("No file uploaded");
return;
}
if( Array.isArray(file) ) {
res.status(400);
res.send("Don't send multiple files.");
return;
}
// convert to string
const content = '' + file.data;
const infos = content.replaceAll('\r\n', '\n').split('\n').map(extractInfoFromPythonDepString);
let dependencyGraph = new Graph();
for( const info of infos ) {
if( info === undefined ) {
console.log(`Got an undefined info !`)
continue;
}
dependencyGraph = await getFullPackageInfo(info.name, info.rule, dependencyGraph);
}
const serializedDepGraph = dependencyGraph.export();
const getHumanWeight = (weight: number) => {
const units = [
'b',
'kb',
'Mb',
'Gb'
]
let currentUnit = 0;
while( weight > 1000 ) {
weight = weight / 1000;
currentUnit++;
}
return `${Math.round(weight)}${units[currentUnit]}`
}
let nbNodes = 0;
let totalDownloadSize = 0;
dependencyGraph.forEachNode((node, attrs) => {
nbNodes++;
totalDownloadSize += attrs.weight;
})
let packageByWeight: {name: string, weight: number, humanWeight: string}[] = [];
dependencyGraph.forEachNode((node, attrs) => {
packageByWeight.push({
name: node,
weight: attrs.weight,
humanWeight: getHumanWeight(attrs.weight),
})
});
packageByWeight = packageByWeight.sort( (a, b) => b.weight - a.weight);
const humanDownloadSize = getHumanWeight(totalDownloadSize);
res.render('fullgraph', {
//packagename: packagename,
nb_deps: nbNodes,
totalWeight: {
raw: totalDownloadSize,
value: humanDownloadSize,
},
graph: serializedDepGraph,
packageByWeight: packageByWeight
});
})
/**
* Get info about a package
*/ /*
async function getFullPackageInfo(packagename: string, versionRequirement={operator: "", version: ""}, seenBefore: Set<string> = new Set([])) {
const packagejson = await getRawPackageInfo(packagename);
const name = packagejson.info.name;
const summary = packagejson.info.summary;
const dependenciesList = packagejson.info.requires_dist ?? [];
const builtDepsList: Map<string, {[key: string]: string]}>
for( const depString of dependenciesList ) {
const dep = extractInfoFromPythonDepString(depString);
if( seenBefore.has(dep.name) ) {
continue;
}
const json = await getRawPackageInfo(dep.name, dep.version)
}
}*/
/*
app.post('/api/dependency-tree', async (request, res) => {
const packagename = request.body.package;
console.log("Requested package: ", packagename);
const json = await getRawPackageInfo(packagename);
const name = json.info.name;
const summary = json.info.summary;
let deps = json.info.requires_dist ? json.info.requires_dist.map(e => extractInfoFromPythonDepString(e)) : [];
// avoid duplicates (sometimes packages list themselves as a dependency to create "dependency groups" (?))
deps = deps.filter( dep => dep.name !== packagename );
const versions = Object.entries(json.releases).filter( ([k, v]) => v.length > 0 );
// sort by size, preferring wheels
const versionsSorted = versions.map( ([versionNumber, filesArray]) => {
const wheels = filesArray.filter( file => file.packagetype === 'bdist_wheel' );
if( wheels.length > 0 ) {
// sort by size
const biggestWheel = wheels.sort( (a, b) => b.size - a.size )[0];
return [versionNumber, biggestWheel];
}
else {
const biggestFile = filesArray.sort( (a, b) => b.size - a.size )[0];
return [versionNumber, biggestFile];
}
});
//const sorted = filesArray.sort( (a, b) => b.upload_time_iso8601 - a.upload_time_iso8601 );
const [latestVersionNumber, latestVersion] = versions[0];
const sortedFiles = latestVersion.sort((a, b) => b.size - a.size);
const size = sortedFiles[0].size;
res.render('dependency', {
name,
summary,
weight: size,
dependencies: deps,
// DEBUG
has_parent: request.body.isfirst !== "true",
});
});*/
app.listen(8080);