chmod +x sort-photorec.sh
./sort-photorec.sh /tmp/photorec-disk
php dedoublonne.php /tmp --no-dry
<?php
if (!isset($argv[1])) {
exit('Dir argument is needed'.PHP_EOL);
}
$testMode = true;
foreach($argv as $argument) {
if ($argument === '--no-dry') {
$testMode = false;
}
}
$dirPath = $argv[1];
if (!is_dir($dirPath)) {
exit('The dir does not exist'.PHP_EOL);
}
$dirPath = preg_replace('#/?$#', '', $dirPath);
function findFiles($dirPath)
{
$files = array();
foreach(scandir($dirPath) as $currentFileName) {
if ($currentFileName == '.' || $currentFileName == '..') {
continue;
}
$currentFilePath = sprintf('%s/%s', $dirPath, $currentFileName);
if (is_dir($currentFilePath)) {
$files = array_merge(findFiles($currentFilePath), $files);
} else {
$currentFileSize = filesize($currentFilePath);
$files[] = [
'path' => $currentFilePath,
'name' => $currentFileName,
'size' => $currentFileSize,
];
}
}
return $files;
}
$files = findFiles($dirPath);
usort($files, function($fileA, $fileB) {
return $fileA['size'] >= $fileB['size'];
});
$lastFile = null;
$filePoolBySize = array();
$lastFileSize = null;
$nbDeletedFiles = 0;
foreach ($files as $currentFile) {
$currentFileIsDeleted = false;
if (!empty($filePoolBySize[$currentFile['size']])) {
foreach ($filePoolBySize[$currentFile['size']] as $lastFile) {
$currentHash = hash_file('md5', $currentFile['path']);
$lastHash = hash_file('md5', $lastFile['path']);
if ($currentHash === $lastHash) {
if ($testMode === false) {
unlink($currentFile['path']);
}
$nbDeletedFiles++;
$currentFileIsDeleted = true;
echo sprintf('Suppression doublon "%s"'.PHP_EOL, $currentFile['path']);
break;
}
}
}
if ($currentFileIsDeleted == false) {
$filePoolBySize[$currentFile['size']][] = $currentFile;
$lastFile = $currentFile;
}
}
echo sprintf('Nb fichiers : %s'.PHP_EOL, count($files));
echo sprintf('Nb doublons supprimés : %s'.PHP_EOL, $nbDeletedFiles);
echo sprintf('Nb fichiers restants : %s'.PHP_EOL, count($files) - $nbDeletedFiles);