-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit fd65833
Showing
12 changed files
with
599 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
## WordList Cleaner v.0.2.3 | ||
|
||
Remove non-printable words, trim words length, search duplicates, sorting, words counting. | ||
|
||
 | ||
|
||
Sequence of the option keys is not critical. | ||
|
||
05.09.2015 - Firts commit. | ||
|
||
06.09.2015 - Added automatic processing of all files in a directory by extension. | ||
|
||
07.09.2015 - Added lines calculator. Fixed some errors. | ||
|
||
12.09.2015 - All algorithms has been rewritten to consume less memory. Usage menu and option keys has been changed too. | ||
|
||
Examples: | ||
``` | ||
wordlistcleaner.exe -min 8 -max 10 -src Source.dic -new New.dic remove trim | ||
wordlistcleaner.exe -src Source.dic -new New.dic trim | ||
wordlistcleaner.exe -a -ext txt duplicate | ||
wordlistcleaner.exe -a sort | ||
wordlistcleaner.exe -a calculate | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
package main | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
"os" | ||
"path/filepath" | ||
"strconv" | ||
"time" | ||
|
||
o "./operations" | ||
s "./service" | ||
) | ||
|
||
// Do job | ||
func DoJob(remove, trim, duplicate, sorting, calculate bool, min, max int, src_file, new_file string) error { | ||
|
||
// Check operations | ||
if !remove && !trim && !duplicate && !sorting && !calculate { | ||
return errors.New("Not specified operations.") | ||
} | ||
|
||
// Cleaning | ||
if (remove || trim) && (!duplicate && !sorting && !calculate) { | ||
if err := o.DoClean(remove, trim, min, max, src_file, new_file); err != nil { | ||
return err | ||
} | ||
return nil | ||
} | ||
|
||
//Duplicate search | ||
if duplicate && (!remove && !trim && !sorting && !calculate) { | ||
if err := o.DoDuplicate(src_file, new_file); err != nil { | ||
return err | ||
} | ||
return nil | ||
} | ||
|
||
// Sorting | ||
if sorting && (!remove && !trim && !duplicate && !calculate) { | ||
if err := o.DoSorting(src_file, new_file); err != nil { | ||
return err | ||
} | ||
return nil | ||
} | ||
|
||
// calculate | ||
if calculate && (!remove && !trim && !duplicate && !sorting) { | ||
if err := o.DoCalculate(src_file); err != nil { | ||
return err | ||
} | ||
return nil | ||
} | ||
|
||
return errors.New("Incorrect combination of operations.") | ||
|
||
} | ||
|
||
// Split file by name and extension | ||
func SplitFileName(file string) (string, string) { | ||
ext := filepath.Ext(file) | ||
name := file[0 : len(file)-len(ext)] | ||
return name, ext | ||
} | ||
|
||
func main() { | ||
|
||
// variables | ||
var remove bool = false | ||
var trim bool = false | ||
var duplicate bool = false | ||
var sorting bool = false | ||
var calculate bool = false | ||
var min int = 8 | ||
var max int = 63 | ||
var src_file string = "Dict.dic" | ||
var new_file string = "Dict_cleaned.dic" | ||
var auto bool = false | ||
var file_ext = ".dic" | ||
var version = "0.2.3" | ||
|
||
// args | ||
for k, arg := range os.Args { | ||
switch arg { | ||
case "-h": | ||
s.Usage() | ||
return | ||
case "-v": | ||
fmt.Println(version) | ||
return | ||
case "remove": | ||
remove = true | ||
case "trim": | ||
trim = true | ||
case "duplicate": | ||
duplicate = true | ||
case "sort": | ||
sorting = true | ||
case "calculate": | ||
calculate = true | ||
case "-min": | ||
err := s.CheckArgs(len(os.Args), k) | ||
s.CheckError(err) | ||
i, err := strconv.Atoi(os.Args[k+1]) | ||
s.CheckError(err) | ||
min = i | ||
case "-max": | ||
err := s.CheckArgs(len(os.Args), k) | ||
s.CheckError(err) | ||
i, err := strconv.Atoi(os.Args[k+1]) | ||
s.CheckError(err) | ||
max = i | ||
case "-src": | ||
err := s.CheckArgs(len(os.Args), k) | ||
s.CheckError(err) | ||
src_file = os.Args[k+1] | ||
case "-new": | ||
err := s.CheckArgs(len(os.Args), k) | ||
s.CheckError(err) | ||
new_file = os.Args[k+1] | ||
case "-a": | ||
auto = true | ||
case "-ext": | ||
err := s.CheckArgs(len(os.Args), k) | ||
s.CheckError(err) | ||
file_ext = "." + os.Args[k+1] | ||
} | ||
} | ||
|
||
// start time | ||
start := time.Now() | ||
|
||
if auto { | ||
files_list, err := s.SearchFilesInDir(file_ext, "./") | ||
s.CheckError(err) | ||
fmt.Println() | ||
fmt.Println(len(files_list), "files found.") | ||
fmt.Println() | ||
for _, src_file := range files_list { | ||
name, ext := SplitFileName(src_file) | ||
new_file := name + "_cleaned" + ext | ||
err = DoJob(remove, trim, duplicate, sorting, calculate, min, max, src_file, new_file) | ||
s.CheckError(err) | ||
} | ||
} else { | ||
err := DoJob(remove, trim, duplicate, sorting, calculate, min, max, src_file, new_file) | ||
s.CheckError(err) | ||
} | ||
|
||
// elapsed time | ||
elapsed := time.Since(start) | ||
fmt.Println("\nElapsed time: ", elapsed) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package operations | ||
|
||
import ( | ||
"fmt" | ||
|
||
s "../service" | ||
) | ||
|
||
// Calculate lines in source file | ||
func DoCalculate(src_file string) error { | ||
|
||
total, err := s.CalculateLines(src_file) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
fmt.Printf("|%-40s|%20d|\n", src_file, total) | ||
|
||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
package operations | ||
|
||
import ( | ||
"bufio" | ||
"fmt" | ||
"os" | ||
|
||
s "../service" | ||
) | ||
|
||
// Search duplicates in source file and write uniq to new file | ||
func DoDuplicate(src_file, new_file string) error { | ||
|
||
m := map[string]bool{} | ||
counter := 0 | ||
percent := 0 | ||
added := 0 | ||
total, err := s.CalculateLines(src_file) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
in, err := os.Open(src_file) | ||
if err != nil { | ||
return err | ||
} | ||
defer in.Close() | ||
|
||
out, err := os.Create(new_file) | ||
if err != nil { | ||
return err | ||
} | ||
defer out.Close() | ||
|
||
scanner := bufio.NewScanner(in) | ||
writer := bufio.NewWriter(out) | ||
|
||
fmt.Printf("\n%s processing: ", src_file) | ||
|
||
for scanner.Scan() { | ||
line := scanner.Text() | ||
|
||
if _, seen := m[line]; !seen { | ||
fmt.Fprintln(writer, line) | ||
m[line] = true | ||
added++ | ||
} | ||
|
||
counter++ | ||
if counter == 100000 { | ||
percent += counter | ||
fmt.Printf("..%d%%", (percent * 100 / total)) | ||
counter = 0 | ||
} | ||
} | ||
|
||
if err := writer.Flush(); err != nil { | ||
return err | ||
} | ||
|
||
fmt.Println() | ||
fmt.Println("Duplicate search result") | ||
fmt.Printf("|%-20s|%20d|\n", "Total", total) | ||
fmt.Printf("|%-20s|%20d|\n", "Removed", (total - added)) | ||
fmt.Printf("|%-20s|%20d|\n", "Result", added) | ||
|
||
return scanner.Err() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
package operations | ||
|
||
import ( | ||
"bufio" | ||
"fmt" | ||
"os" | ||
|
||
s "../service" | ||
) | ||
|
||
func IsPrint(text string) bool { | ||
for _, r := range text { | ||
if r < 32 || r > 126 { | ||
return false | ||
} | ||
} | ||
return true | ||
} | ||
|
||
func IsSize(min, max int, line string) bool { | ||
if len([]rune(line)) < min || len([]rune(line)) > max { | ||
return false | ||
} | ||
return true | ||
} | ||
|
||
func DoClean(remove, trim bool, min, max int, src_file, new_file string) error { | ||
|
||
counter := 0 | ||
percent := 0 | ||
added := 0 | ||
total, err := s.CalculateLines(src_file) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
in, err := os.Open(src_file) | ||
if err != nil { | ||
return err | ||
} | ||
defer in.Close() | ||
|
||
out, err := os.Create(new_file) | ||
if err != nil { | ||
return err | ||
} | ||
defer out.Close() | ||
|
||
scanner := bufio.NewScanner(in) | ||
writer := bufio.NewWriter(out) | ||
|
||
fmt.Printf("\n%s processing: ", src_file) | ||
|
||
for scanner.Scan() { | ||
line := scanner.Text() | ||
|
||
if remove && trim { | ||
if IsPrint(line) && IsSize(min, max, line) { | ||
fmt.Fprintln(writer, line) | ||
added++ | ||
} | ||
} | ||
|
||
if remove && !trim { | ||
if IsPrint(line) { | ||
fmt.Fprintln(writer, line) | ||
added++ | ||
} | ||
} | ||
|
||
if !remove && trim { | ||
if IsSize(min, max, line) { | ||
fmt.Fprintln(writer, line) | ||
added++ | ||
} | ||
} | ||
|
||
counter++ | ||
if counter == 100000 { | ||
percent += counter | ||
fmt.Printf("..%d%%", (percent * 100 / total)) | ||
counter = 0 | ||
} | ||
} | ||
|
||
if err := writer.Flush(); err != nil { | ||
return err | ||
} | ||
|
||
fmt.Println() | ||
fmt.Println("Cleaning result") | ||
fmt.Printf("|%-20s|%20d|\n", "Total", total) | ||
fmt.Printf("|%-20s|%20d|\n", "Removed", (total - added)) | ||
fmt.Printf("|%-20s|%20d|\n", "Result", added) | ||
|
||
return scanner.Err() | ||
} |
Oops, something went wrong.