Skip to content

Commit

Permalink
first commint of version 0.2.3
Browse files Browse the repository at this point in the history
  • Loading branch information
gkiryaziev committed Sep 12, 2015
0 parents commit fd65833
Show file tree
Hide file tree
Showing 12 changed files with 599 additions and 0 deletions.
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
## WordList Cleaner v.0.2.3

Remove non-printable words, trim words length, search duplicates, sorting, words counting.

![Alt text](/screenshot.jpg?raw=true "Usage")

Sequence of the option keys is not critical.

05.09.2015 - Firts commit.

06.09.2015 - Added automatic processing of all files in a directory by extension.

07.09.2015 - Added lines calculator. Fixed some errors.

12.09.2015 - All algorithms has been rewritten to consume less memory. Usage menu and option keys has been changed too.

Examples:
```
wordlistcleaner.exe -min 8 -max 10 -src Source.dic -new New.dic remove trim
wordlistcleaner.exe -src Source.dic -new New.dic trim
wordlistcleaner.exe -a -ext txt duplicate
wordlistcleaner.exe -a sort
wordlistcleaner.exe -a calculate
```
153 changes: 153 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package main

import (
"errors"
"fmt"
"os"
"path/filepath"
"strconv"
"time"

o "./operations"
s "./service"
)

// Do job
func DoJob(remove, trim, duplicate, sorting, calculate bool, min, max int, src_file, new_file string) error {

// Check operations
if !remove && !trim && !duplicate && !sorting && !calculate {
return errors.New("Not specified operations.")
}

// Cleaning
if (remove || trim) && (!duplicate && !sorting && !calculate) {
if err := o.DoClean(remove, trim, min, max, src_file, new_file); err != nil {
return err
}
return nil
}

//Duplicate search
if duplicate && (!remove && !trim && !sorting && !calculate) {
if err := o.DoDuplicate(src_file, new_file); err != nil {
return err
}
return nil
}

// Sorting
if sorting && (!remove && !trim && !duplicate && !calculate) {
if err := o.DoSorting(src_file, new_file); err != nil {
return err
}
return nil
}

// calculate
if calculate && (!remove && !trim && !duplicate && !sorting) {
if err := o.DoCalculate(src_file); err != nil {
return err
}
return nil
}

return errors.New("Incorrect combination of operations.")

}

// Split file by name and extension
func SplitFileName(file string) (string, string) {
ext := filepath.Ext(file)
name := file[0 : len(file)-len(ext)]
return name, ext
}

func main() {

// variables
var remove bool = false
var trim bool = false
var duplicate bool = false
var sorting bool = false
var calculate bool = false
var min int = 8
var max int = 63
var src_file string = "Dict.dic"
var new_file string = "Dict_cleaned.dic"
var auto bool = false
var file_ext = ".dic"
var version = "0.2.3"

// args
for k, arg := range os.Args {
switch arg {
case "-h":
s.Usage()
return
case "-v":
fmt.Println(version)
return
case "remove":
remove = true
case "trim":
trim = true
case "duplicate":
duplicate = true
case "sort":
sorting = true
case "calculate":
calculate = true
case "-min":
err := s.CheckArgs(len(os.Args), k)
s.CheckError(err)
i, err := strconv.Atoi(os.Args[k+1])
s.CheckError(err)
min = i
case "-max":
err := s.CheckArgs(len(os.Args), k)
s.CheckError(err)
i, err := strconv.Atoi(os.Args[k+1])
s.CheckError(err)
max = i
case "-src":
err := s.CheckArgs(len(os.Args), k)
s.CheckError(err)
src_file = os.Args[k+1]
case "-new":
err := s.CheckArgs(len(os.Args), k)
s.CheckError(err)
new_file = os.Args[k+1]
case "-a":
auto = true
case "-ext":
err := s.CheckArgs(len(os.Args), k)
s.CheckError(err)
file_ext = "." + os.Args[k+1]
}
}

// start time
start := time.Now()

if auto {
files_list, err := s.SearchFilesInDir(file_ext, "./")
s.CheckError(err)
fmt.Println()
fmt.Println(len(files_list), "files found.")
fmt.Println()
for _, src_file := range files_list {
name, ext := SplitFileName(src_file)
new_file := name + "_cleaned" + ext
err = DoJob(remove, trim, duplicate, sorting, calculate, min, max, src_file, new_file)
s.CheckError(err)
}
} else {
err := DoJob(remove, trim, duplicate, sorting, calculate, min, max, src_file, new_file)
s.CheckError(err)
}

// elapsed time
elapsed := time.Since(start)
fmt.Println("\nElapsed time: ", elapsed)
}
20 changes: 20 additions & 0 deletions operations/calculate.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package operations

import (
"fmt"

s "../service"
)

// Calculate lines in source file
func DoCalculate(src_file string) error {

total, err := s.CalculateLines(src_file)
if err != nil {
return err
}

fmt.Printf("|%-40s|%20d|\n", src_file, total)

return nil
}
68 changes: 68 additions & 0 deletions operations/duplicate_search.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package operations

import (
"bufio"
"fmt"
"os"

s "../service"
)

// Search duplicates in source file and write uniq to new file
func DoDuplicate(src_file, new_file string) error {

m := map[string]bool{}
counter := 0
percent := 0
added := 0
total, err := s.CalculateLines(src_file)
if err != nil {
return err
}

in, err := os.Open(src_file)
if err != nil {
return err
}
defer in.Close()

out, err := os.Create(new_file)
if err != nil {
return err
}
defer out.Close()

scanner := bufio.NewScanner(in)
writer := bufio.NewWriter(out)

fmt.Printf("\n%s processing: ", src_file)

for scanner.Scan() {
line := scanner.Text()

if _, seen := m[line]; !seen {
fmt.Fprintln(writer, line)
m[line] = true
added++
}

counter++
if counter == 100000 {
percent += counter
fmt.Printf("..%d%%", (percent * 100 / total))
counter = 0
}
}

if err := writer.Flush(); err != nil {
return err
}

fmt.Println()
fmt.Println("Duplicate search result")
fmt.Printf("|%-20s|%20d|\n", "Total", total)
fmt.Printf("|%-20s|%20d|\n", "Removed", (total - added))
fmt.Printf("|%-20s|%20d|\n", "Result", added)

return scanner.Err()
}
97 changes: 97 additions & 0 deletions operations/remove_trim.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package operations

import (
"bufio"
"fmt"
"os"

s "../service"
)

func IsPrint(text string) bool {
for _, r := range text {
if r < 32 || r > 126 {
return false
}
}
return true
}

func IsSize(min, max int, line string) bool {
if len([]rune(line)) < min || len([]rune(line)) > max {
return false
}
return true
}

func DoClean(remove, trim bool, min, max int, src_file, new_file string) error {

counter := 0
percent := 0
added := 0
total, err := s.CalculateLines(src_file)
if err != nil {
return err
}

in, err := os.Open(src_file)
if err != nil {
return err
}
defer in.Close()

out, err := os.Create(new_file)
if err != nil {
return err
}
defer out.Close()

scanner := bufio.NewScanner(in)
writer := bufio.NewWriter(out)

fmt.Printf("\n%s processing: ", src_file)

for scanner.Scan() {
line := scanner.Text()

if remove && trim {
if IsPrint(line) && IsSize(min, max, line) {
fmt.Fprintln(writer, line)
added++
}
}

if remove && !trim {
if IsPrint(line) {
fmt.Fprintln(writer, line)
added++
}
}

if !remove && trim {
if IsSize(min, max, line) {
fmt.Fprintln(writer, line)
added++
}
}

counter++
if counter == 100000 {
percent += counter
fmt.Printf("..%d%%", (percent * 100 / total))
counter = 0
}
}

if err := writer.Flush(); err != nil {
return err
}

fmt.Println()
fmt.Println("Cleaning result")
fmt.Printf("|%-20s|%20d|\n", "Total", total)
fmt.Printf("|%-20s|%20d|\n", "Removed", (total - added))
fmt.Printf("|%-20s|%20d|\n", "Result", added)

return scanner.Err()
}
Loading

0 comments on commit fd65833

Please sign in to comment.