From 850faffc29706efc36703363f9baba88b649e331 Mon Sep 17 00:00:00 2001 From: Thayol Date: Wed, 5 Jan 2022 01:17:59 +0100 Subject: [PATCH 1/3] Add PowerShell scripts --- .gitattributes | 2 ++ scripts/extract_failed_ids.ps1 | 21 +++++++++++++++++++++ scripts/extract_successful_ids.ps1 | 21 +++++++++++++++++++++ scripts/print_summary.ps1 | 30 ++++++++++++++++++++++++++++++ 4 files changed, 74 insertions(+) create mode 100644 .gitattributes create mode 100644 scripts/extract_failed_ids.ps1 create mode 100644 scripts/extract_successful_ids.ps1 create mode 100644 scripts/print_summary.ps1 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..c16e947 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Declare files that will always have CRLF line endings on checkout. +*.ps1 text eol=crlf \ No newline at end of file diff --git a/scripts/extract_failed_ids.ps1 b/scripts/extract_failed_ids.ps1 new file mode 100644 index 0000000..17d96f6 --- /dev/null +++ b/scripts/extract_failed_ids.ps1 @@ -0,0 +1,21 @@ +if (Test-Path -Path $args[0] -PathType Leaf) { + $file=$args[0] +} +else { + Write-Host "CANNOT FIND LOG FILE" + Exit 1 +} + +if ($args[1] -ne $null) { + $output=$args[1] + Write-Host "Outputting IDs to $output" +} +else { + $output="./failed.txt" +} + +Select-String -Path $file -Pattern "Could not download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 11 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output +Select-String -Path $file -Pattern "Failed to download resource" | ForEach-Object { -split $_.Line | Select-Object -Skip 14 | Select-Object -First 1 } >> $output +Select-String -Path $file -Pattern "failed to download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output +Select-String -Path $file -Pattern "Failed to write file" | ForEach-Object { -split $_.Line | Select-Object -Skip 12 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output +Select-String -Path $file -Pattern "skipped due to disabled module" | ForEach-Object { -split $_.Line | Select-Object -Skip 8 | Select-Object -First 1 } >> $output diff --git a/scripts/extract_successful_ids.ps1 b/scripts/extract_successful_ids.ps1 new file mode 100644 index 0000000..3dbb315 --- /dev/null +++ b/scripts/extract_successful_ids.ps1 @@ -0,0 +1,21 @@ +if (Test-Path -Path $args[0] -PathType Leaf) { + $file=$args[0] +} +else { + Write-Host "CANNOT FIND LOG FILE" + Exit 1 +} + +if ($args[1] -ne $null) { + $output=$args[1] + Write-Host "Outputting IDs to $output" +} +else { + $output="./successful.txt" +} + +Select-String -Path $file -Pattern "Downloaded submission" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output +Select-String -Path $file -Pattern "Resource hash" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output +Select-String -Path $file -Pattern "Download filter" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output +Select-String -Path $file -Pattern "already exists, continuing" | ForEach-Object { -split $_.Line | Select-Object -Last 4 | Select-Object -SkipLast 3 } >> $output +Select-String -Path $file -Pattern "Hard link made" | ForEach-Object { -split $_.Line | Select-Object -Last 1 } >> $output diff --git a/scripts/print_summary.ps1 b/scripts/print_summary.ps1 new file mode 100644 index 0000000..5d85b09 --- /dev/null +++ b/scripts/print_summary.ps1 @@ -0,0 +1,30 @@ +if (Test-Path -Path $args[0] -PathType Leaf) { + $file=$args[0] +} +else { + Write-Host "CANNOT FIND LOG FILE" + Exit 1 +} + +if ($args[1] -ne $null) { + $output=$args[1] + Write-Host "Outputting IDs to $output" +} +else { + $output="./successful.txt" +} + +Write-Host -NoNewline "Downloaded submissions: " +Write-Host (Select-String -Path $file -Pattern "Downloaded submission" -AllMatches).Matches.Count +Write-Host -NoNewline "Failed downloads: " +Write-Host (Select-String -Path $file -Pattern "failed to download submission" -AllMatches).Matches.Count +Write-Host -NoNewline "Files already downloaded: " +Write-Host (Select-String -Path $file -Pattern "already exists, continuing" -AllMatches).Matches.Count +Write-Host -NoNewline "Hard linked submissions: " +Write-Host (Select-String -Path $file -Pattern "Hard link made" -AllMatches).Matches.Count +Write-Host -NoNewline "Excluded submissions: " +Write-Host (Select-String -Path $file -Pattern "in exclusion list" -AllMatches).Matches.Count +Write-Host -NoNewline "Files with existing hash skipped: " +Write-Host (Select-String -Path $file -Pattern "downloaded elsewhere" -AllMatches).Matches.Count +Write-Host -NoNewline "Submissions from excluded subreddits: " +Write-Host (Select-String -Path $file -Pattern "in skip list" -AllMatches).Matches.Count From ac3a8e913df84019b0d6dcd7403d5f9a4e946832 Mon Sep 17 00:00:00 2001 From: Thayol Date: Wed, 5 Jan 2022 13:13:45 +0100 Subject: [PATCH 2/3] Fix wrong offset --- scripts/extract_successful_ids.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/extract_successful_ids.ps1 b/scripts/extract_successful_ids.ps1 index 3dbb315..00722f1 100644 --- a/scripts/extract_successful_ids.ps1 +++ b/scripts/extract_successful_ids.ps1 @@ -16,6 +16,6 @@ else { Select-String -Path $file -Pattern "Downloaded submission" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output Select-String -Path $file -Pattern "Resource hash" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output -Select-String -Path $file -Pattern "Download filter" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output +Select-String -Path $file -Pattern "Download filter" | ForEach-Object { -split $_.Line | Select-Object -Last 4 | Select-Object -SkipLast 3 } >> $output Select-String -Path $file -Pattern "already exists, continuing" | ForEach-Object { -split $_.Line | Select-Object -Last 4 | Select-Object -SkipLast 3 } >> $output Select-String -Path $file -Pattern "Hard link made" | ForEach-Object { -split $_.Line | Select-Object -Last 1 } >> $output From 3811ec37fb121675a3d5c3007ab96c9c44794144 Mon Sep 17 00:00:00 2001 From: Thayol Date: Thu, 6 Jan 2022 12:16:44 +0100 Subject: [PATCH 3/3] Fix offset and remove substring --- scripts/extract_failed_ids.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/extract_failed_ids.ps1 b/scripts/extract_failed_ids.ps1 index 17d96f6..be2d2cb 100644 --- a/scripts/extract_failed_ids.ps1 +++ b/scripts/extract_failed_ids.ps1 @@ -17,5 +17,5 @@ else { Select-String -Path $file -Pattern "Could not download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 11 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output Select-String -Path $file -Pattern "Failed to download resource" | ForEach-Object { -split $_.Line | Select-Object -Skip 14 | Select-Object -First 1 } >> $output Select-String -Path $file -Pattern "failed to download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output -Select-String -Path $file -Pattern "Failed to write file" | ForEach-Object { -split $_.Line | Select-Object -Skip 12 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output +Select-String -Path $file -Pattern "Failed to write file" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } >> $output Select-String -Path $file -Pattern "skipped due to disabled module" | ForEach-Object { -split $_.Line | Select-Object -Skip 8 | Select-Object -First 1 } >> $output