0

I have the following data

Final_DF$Pswd
 [1] "Password"       "abc1"           "football"       "Pr?hStMz3xuJ"   "Strawberries"   "JTA9SsTLjU"    
 [7] "Jazzed!"        "D!gU4"          "12345!"         "mQf0J?ad2"      "password1"      "CATS?"         
[13] "F!!FtBm"        "!!!!"           "HBpqA?xya9SIi"  "Os4Ft%D"        "fryj0t9KS"      "Password"      
[19] "shadow"         "gpbvYZxYLBq7P"  "blackberries"   "Abc1!"          "p0$YpH4"        "SLOTH"         
[25] "Qwertyuiop"     "SqbmZZ!abHj"    "?Lnp6X6TNz"     "boatstatecat"   "shearer9"       "1B!aKnQadm"    
[31] "JTA9SsTLjU"     "DOGS"           "12345!"         "pgJz8!Hdde"     "qwerty"         "1q2w3e4r5t"    
[37] "flightrockcups" "ashley"         "Htkv5TDS51"     "C8cFMWH?a$S"    "boat"           "password!"     
[43] "aircraft"       "Se3PKKeg?dU"    "abc1"           "Bug!5$r"        "123"            "football"      
[49] "password1"      "Strawberries" 

And have written the following set of search terms using grepl

# Match passwords which contain only abc, no ABC, no digits, no punct
only.abc <- sum(grepl("^(?!.*[[:digit:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^(?!.*[[:punct:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^(?!.*[[:upper:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^.*[[:lower:]]+.*$",Final_DF$Pswd))
# Match passwords which contain only ABC, no abc, no 123, no !!!
only.ABC <- sum(grepl("^(?!.*[[:digit:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^(?!.*[[:punct:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^(?!.*[[:lower:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^.*[[:upper:]]+.*$",Final_DF$Pswd))
# Match passwords which contain only digits
only.digits <- sum(grepl("^(?!.*[[:upper:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^(?!.*[[:punct:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^(?!.*[[:lower:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^.*[[:digit:]]+.*$",Final_DF$Pswd))
# Match passwords which contain only punct
only.punct <- sum(grepl("^(?!.*[[:digit:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^(?!.*[[:upper:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^(?!.*[[:lower:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^.*[[:punct:]]+.*$",Final_DF$Pswd))
# Match passwords which contain only abc and ABC
contains.abc.and.ABC <- sum(grepl("^(?!.*[[:digit:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^(?!.*[[:punct:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^.*[[:lower:]]+.*$",Final_DF$Pswd) & grepl("^.*[[:upper:]]+.*$",Final_DF$Pswd))
# Match passwords which contain abc and digit
contains.abc.and.digits <- sum(grepl("^(?!.*[[:punct:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^(?!.*[[:upper:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^.*[[:lower:]]+.*$",Final_DF$Pswd) & grepl("^.*[[:digit:]]+.*$",Final_DF$Pswd))
# Match passwords which contain abc and punct
contains.abc.and.punct <- sum(grepl("^(?!.*[[:digit:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^(?!.*[[:upper:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^.*[[:lower:]]+.*$",Final_DF$Pswd) & grepl("^.*[[:punct:]]+.*$",Final_DF$Pswd))
# Match passwords which contain abc, digit, and punct
contains.abc.digit.and.punct <- sum(grepl("^(?!.*[[:upper:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^.*[[:lower:]]+.*$",Final_DF$Pswd) & grepl("^.*[[:digit:]]+.*$",Final_DF$Pswd) & grepl("^.*[[:punct:]]+.*$",Final_DF$Pswd))
# Match passwords which contain abc, ABC and digit
contains.abc.ABC.and.digit <- sum(grepl("^(?!.*[[:punct:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^.*[[:lower:]]+.*$",Final_DF$Pswd) & grepl("^.*[[:digit:]]+.*$",Final_DF$Pswd) & grepl("^.*[[:upper:]]+.*$",Final_DF$Pswd))
# Match passwords which contain abc, ABC and punct
contains.abc.ABC.and.punct <- sum(grepl("^(?!.*[[:digit:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^.*[[:lower:]]+.*$",Final_DF$Pswd) & grepl("^.*[[:punct:]]+.*$",Final_DF$Pswd) & grepl("^.*[[:upper:]]+.*$",Final_DF$Pswd))
# Match passwords which contain abc, ABC, digit and punct
contains.abc.ABC.digit.and.punct <- sum(grepl("^.*[[:lower:]]+.*$",Final_DF$Pswd) & grepl("^.*[[:upper:]]+.*$",Final_DF$Pswd) & grepl("^.*[[:digit:]]+.*$",Final_DF$Pswd) & grepl("^.*[[:punct:]]+.*$",Final_DF$Pswd))
# Match passwords which contain ABC and digit
contains.ABC.and.digit <- sum(grepl("^(?!.*[[:punct:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^(?!.*[[:lower:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^.*[[:digit:]]+.*$",Final_DF$Pswd) & grepl("^.*[[:upper:]]+.*$",Final_DF$Pswd))
# Match passwords which contain ABC and punct
contains.ABC.and.punct <- sum(grepl("^(?!.*[[:digit:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^(?!.*[[:lower:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^.*[[:punct:]]+.*$",Final_DF$Pswd) & grepl("^.*[[:upper:]]+.*$",Final_DF$Pswd))
# Match passwords which contain ABC, digit and punct
contains.ABC.digit.and.punct <- sum(grepl("^(?!.*[[:lower:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^.*[[:upper:]]+.*$",Final_DF$Pswd) & grepl("^.*[[:digit:]]+.*$",Final_DF$Pswd) & grepl("^.*[[:punct:]]+.*$",Final_DF$Pswd))
# Match passwords which contain digit and punct
contains.digit.and.punct <- sum(grepl("^(?!.*[[:lower:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^(?!.*[[:upper:]]).*$",Final_DF$Pswd, perl = TRUE) & grepl("^.*[[:digit:]]+.*$",Final_DF$Pswd) & grepl("^.*[[:punct:]]+.*$",Final_DF$Pswd))

Using those terms I would now like to create a table which shows a frequency count and percentage for the terms something like table showing outputs of searches

I could then possibly order the table by counts in descending order, with highest at the top

Any ideas how to proceed please?

Marty
  • 75
  • 1
  • 6

2 Answers2

1

Put all your vectors along with their name in one dataframe.

result <- data.frame(Content = c('only.abc', 'only.ABC', 'only.digits', ....), 
                     Count = c(only.abc, only.ABC, only.digits, ....))

You can then use prop.table to count the percentage.

result$Percent <- prop.table(result$Count) * 100
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
0
newdata <- as.data.frame(rbind(only.abc, only.ABC, only.digits, only.punct, contains.abc.and.ABC,
      contains.abc.and.digits, contains.abc.and.punct, contains.abc.digit.and.punct,
      contains.abc.ABC.and.digit, contains.abc.ABC.and.punct, contains.abc.ABC.digit.and.punct,
      contains.ABC.and.digit, contains.ABC.and.punct, contains.ABC.digit.and.punct, contains.digit.and.punct))
colnames(newdata) <- "Count"
newdata$Percent <- (newdata$Count / length(Pswd)) * 100
arrange(newdata, desc(Count))

gives

                                 Count Percent
contains.abc.ABC.digit.and.punct    13      26
only.abc                            10      20
contains.abc.and.digits              6      12
contains.abc.and.ABC                 5      10
contains.abc.ABC.and.digit           5      10
contains.abc.ABC.and.punct           3       6
only.ABC                             2       4
contains.digit.and.punct             2       4
only.digits                          1       2
only.punct                           1       2
contains.abc.and.punct               1       2
contains.ABC.and.punct               1       2
contains.abc.digit.and.punct         0       0
contains.ABC.and.digit               0       0
contains.ABC.digit.and.punct         0       0

Although out of scope for this question - the original code is rather unpleasant to read, something the tidyverse may help improve.

Donald Seinen
  • 4,179
  • 5
  • 15
  • 40