NAME
Data::Mining::Apriori - Perl extension for implement the apriori algorithm of data mining.
SYNOPSIS
use strict;
use warnings;
use Data::Mining::Apriori;
# TRANSACTION 103:CEREAL 101:MILK 102:BREAD
# 1101 1 1 0
# 1102 1 0 1
# 1103 1 1 1
# 1104 1 1 1
# 1105 0 1 1
# 1106 1 1 1
# 1107 1 1 1
# 1108 1 0 1
# 1109 1 1 1
# 1110 1 1 1
my $apriori = new Data::Mining::Apriori;
$apriori->{minSupport}=1.55; # The minimum support, default values is 1(percent)
$apriori->{minConfidence}=1.55; # The minimum confidence(percent, optional)
$apriori->{minLift}=1; # The minimum lift(optional)
$apriori->{minLeverage}=0; # The minimum leverage(optional)
$apriori->{minConviction}=0; # The minimum conviction(optional)
$apriori->{minCoverage}=0; # The minimum coverage(percent, optional)
$apriori->{output}=1;
# The output type (1 - Export to text file delimited by tab; 2 - Export to excel file with chart)(optional)
$apriori->{messages}=1; # A value boolean to display the messages(optional)
$apriori->{itemsKeyDescription}{'101'}='MILK'; # Hash table to add items by key and description
$apriori->{itemsKeyDescription}{102}='BREAD';
$apriori->{itemsKeyDescription}{'103'}='CEREAL';
my@items=(103,101);
$apriori->insert_key_items_transaction(\@items); # Insert key items per transaction
$apriori->insert_key_items_transaction([103,102]);
$apriori->insert_key_items_transaction([103,101,102]);
$apriori->insert_key_items_transaction([103,101,102]);
$apriori->insert_key_items_transaction([101,102]);
$apriori->insert_key_items_transaction([103,101,102]);
$apriori->insert_key_items_transaction([103,101,102]);
$apriori->insert_key_items_transaction([103,102]);
$apriori->insert_key_items_transaction([103,101,102]);
$apriori->insert_key_items_transaction([103,101,102]);
# or from a data file
# $apriori->input_data_file("datafile.txt",",");
# Insert items per line(transaction), accepts the arguments of path to data file and item separator
# file contents
103,101
103,102
103,101,102
103,101,102
101,102
103,101,102
103,101,102
103,102
103,101,102
103,101,102
print "\n${\$apriori->quantity_possible_rules}"; # Show the quantity of possible rules
$apriori->{limitRules}=10; # The limit of rules
$apriori->generate_rules;
# Generate association rules to no longer meet the minimum support, confidence, lift, leverage, conviction, coverage or limit of rules
print "\n@{$apriori->{frequentItemset}}\n"; # Show frequent items
#output messages
12
3 items, 12 possible rules
Large itemset size 2, 3 items
Processing...
Frequent itemset: { 103, 102, 101 }, 3 items
Exporting to excel "output_large_itemset_size_2.xlsx"...
Large itemset size 3, 3 items
Processing...
Frequent itemset: { 103, 101, 102 }, 3 items
Exporting to excel "output_large_itemset_size_3.xlsx"...
103, 101, 102
#output file "output_itemset_size_2.txt"
Rules Support % Confidence % Lift Leverage Conviction Coverage %
R1 70,00 77,78 1,11 7,00 0,10 90,00
R2 80,00 88,89 1,11 8,00 0,09 90,00
R3 70,00 87,50 1,25 14,00 0,08 80,00
R4 70,00 87,50 1,25 14,00 0,08 80,00
R5 80,00 88,89 1,11 8,00 0,09 90,00
R6 70,00 77,78 1,11 7,00 0,10 90,00
Rule R1: { 103 } => { 101 }
Support: 70,00 %
Confidence: 77,78 %
Lift: 1,11
Leverage: 7,00
Conviction: 0,10
Coverage: 90,00 %
Items:
103 CEREAL
101 MILK
to be continued...
#output file "output_itemset_size_3.txt"
Rules Support % Confidence % Lift Leverage Conviction Coverage %
R7 60,00 66,67 1,11 6,00 0,12 90,00
R8 60,00 85,71 1,43 18,00 0,07 70,00
R9 60,00 75,00 1,25 12,00 0,09 80,00
R10 60,00 75,00 1,25 12,00 0,09 80,00
Rule R7: { 103 } => { 101, 102 }
Support: 60,00 %
Confidence: 66,67 %
Lift: 1,11
Leverage: 6,00
Conviction: 0,12
Coverage: 90,00 %
Items:
103 CEREAL
101 MILK
102 BREAD
Rule R8: { 101, 103 } => { 102 }
Support: 60,00 %
Confidence: 85,71 %
Lift: 1,43
Leverage: 18,00
Conviction: 0,07
Coverage: 70,00 %
Items:
101 MILK
103 CEREAL
102 BREAD
to be continued...
# or from a database
# CREATE TABLE dimension_product(
# product_key INTEGER NOT NULL PRIMARY KEY,
# product_alternate_key INTEGER NOT NULL,
# product_name TEXT NOT NULL,
# price REAL NOT NULL
# -- ...
# );
# INSERT INTO dimension_product VALUES(1,101,'MILK',10.00);
# INSERT INTO dimension_product VALUES(2,102,'BREAD',10.00);
# INSERT INTO dimension_product VALUES(3,103,'CEREAL',10.00);
# -- ...
# CREATE TABLE fact_sales(
# sales_order_number INTEGER NOT NULL,
# sales_order_line_number INTEGER NOT NULL,
# product_key INTEGER NOT NULL,
# quantity INTEGER NOT NULL,
# -- ...
# PRIMARY KEY(sales_order_number, sales_order_line_number),
# FOREIGN KEY(product_key) REFERENCES dimension_product(product_key)
# );
# INSERT INTO fact_sales VALUES(1101,1,3,1);
# INSERT INTO fact_sales VALUES(1101,2,1,1);
# INSERT INTO fact_sales VALUES(1102,1,3,1);
# INSERT INTO fact_sales VALUES(1102,2,2,1);
# INSERT INTO fact_sales VALUES(1103,1,1,1);
# INSERT INTO fact_sales VALUES(1103,2,2,1);
# INSERT INTO fact_sales VALUES(1103,3,3,1);
# INSERT INTO fact_sales VALUES(1104,1,1,1);
# INSERT INTO fact_sales VALUES(1104,2,2,1);
# INSERT INTO fact_sales VALUES(1104,3,3,1);
# INSERT INTO fact_sales VALUES(1105,1,1,1);
# INSERT INTO fact_sales VALUES(1105,2,2,1);
# INSERT INTO fact_sales VALUES(1106,1,1,1);
# INSERT INTO fact_sales VALUES(1106,2,2,1);
# INSERT INTO fact_sales VALUES(1106,3,3,1);
# INSERT INTO fact_sales VALUES(1107,1,1,1);
# INSERT INTO fact_sales VALUES(1107,2,2,1);
# INSERT INTO fact_sales VALUES(1107,3,3,1);
# INSERT INTO fact_sales VALUES(1108,1,3,1);
# INSERT INTO fact_sales VALUES(1108,2,2,1);
# INSERT INTO fact_sales VALUES(1109,1,1,1);
# INSERT INTO fact_sales VALUES(1109,2,2,1);
# INSERT INTO fact_sales VALUES(1109,3,3,1);
# INSERT INTO fact_sales VALUES(1110,1,1,1);
# INSERT INTO fact_sales VALUES(1110,2,2,1);
# INSERT INTO fact_sales VALUES(1110,3,3,1);
# -- ...
use DBD::SQLite;
use Data::Mining::Apriori;
my $apriori = new Data::Mining::Apriori;
$apriori->{minSupport}=1.55;
$apriori->{minConfidence}=1.55;
$apriori->{minLift}=1;
$apriori->{minLeverage}=0;
$apriori->{minConviction}=0;
$apriori->{minCoverage}=0;
$apriori->{output}=1;
$apriori->{messages}=1;
my $db = DBI->connect('dbi:SQLite:dbname=DW.db','','');
my$sql = qq~
SELECT DISTINCT(fs.sales_order_number)
FROM dimension_product dp
JOIN fact_sales fs ON
dp.product_key = fs.product_key
-- WHERE ...
~;
my$query = $db->prepare($sql);
$query->execute;
my$transactions=$query->fetchall_arrayref;
foreach my$transaction(@$transactions){
$sql = qq~
SELECT dp.product_alternate_key, dp.product_name
FROM dimension_product dp
JOIN fact_sales fs ON
dp.product_key = fs.product_key
WHERE fs.sales_order_number = $$transaction[0];
-- AND ...
~;
$query = $db->prepare($sql);
$query->execute;
my@items;
while(my($key,$description)=$query->fetchrow){
$apriori->{itemsKeyDescription}{$key}=$description;
push@items,$key;
}
$apriori->insert_key_items_transaction(\@items);
}
print "\n${\$apriori->quantity_possible_rules}";
$apriori->{limitRules}=10;
$apriori->generate_rules;
print "\n@{$apriori->{frequentItemset}}\n";
DESCRIPTION
This module implements the apriori algorithm of data mining.
ATTRIBUTES
totalTransactions
The total number of transactions.
minSupport
The minimum support.(percent)
minConfidence
The minimum confidence.(percent, optional)
minLift
The minimum lift.(optional)
minLeverage
The minimum leverage.(optional)
minConviction
The minimum conviction.(optional)
minCoverage
The minimum coverage.(percent, optional)
limitRules
The limit of rules.(optional)
output
The output type:(optional)
1 - Text file delimited by tab;
2 - Excel file with chart.
messages
A value boolean to display the messages.(optional)
itemsKeyDescription
Hash table to add items by key and description.
itemsKeyTransactions
Reference to array, to add the transactions of each item per key.
frequentItemset
Frequent itemset.
associationRules
A data structure to store the name of the rule, key items, implication, support, confidence, lift, leverage, conviction and coverage.
$self->{associationRules} = {
'1' => {
'R1' => {
'items' => [
'103',
'101'
],
'rule' => [
'{ 103 } => { 101 }',
'70,00',
'77,78',
'1,11',
'7,00',
'0,10',
'90,00'
]
}
},
# to be continued...
METHODS
new
Creates a new instance of Data::Mining::Apriori.
insert_key_items_transaction(\@items)
Insert key items per transaction. Accepts the following arguments:
An array reference to key items.
input_data_file("datafile.txt",",")
Insert items per line(transaction). Accepts the following arguments:
Data file;
Item separator.
quantity_possible_rules
Returns the quantity of possible rules.
generate_rules
Generate association rules until no set of items meets the minimum support, confidence, lift, leverage, conviction, coverage or limit of rules.
association_rules
Generate association rules by size of large itemsets.
AUTHOR
Alex Graciano, <agraciano@cpan.org>
COPYRIGHT AND LICENSE
Copyright (C) 2015-2016 by Alex Graciano
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.12.4 or, at your option, any later version of Perl 5 you may have available.