서버리스 기반 콘텐츠 추천 서비스 만들기
이상현, Vingle
추천 시스템을 만드는 방법 중 사용자의 평가를 분석하여 추천하는 협업 필터링(Collaborative Filtering)은 가장 잘 알려진 방법입니다. 이중 사용자-아이템 방식은 Amazon SageMaker나 Personalize 등으로 쉽게 구축 가능한 반면, 아이템-아이템 추천 방식은 Jaccard Score 계산을 해야 합니다. 본 세션에서는 Amazon S3와 Kinesis Firehose를 통해 수집된 데이터를 Amazon Athena 및 Aurora 등을 이용해 서버리스 기반 ML 알고리즘을 적용하여 콘텐츠 추천을 하는 방법을 살펴봅니다.
36. WITH
interest_reads AS (
SELECT user_id,
content_id as interest
FROM user_actions
WHERE (year || month || day) > date_format(CURRENT_TIMESTAMP - interval '30' DAY, '%Y%m%d')
GROUP BY 1, 2
),
ab_inner_reads_count AS (
SELECT a.interest AS a,
b.interest AS b,
count(1) AS count
FROM interest_reads a
JOIN interest_reads b ON a.user_id = b.user_id
GROUP BY 1, 2
),
reads_count AS (
SELECT interest,
count(1) AS count
FROM interest_reads
GROUP BY 1
),
similarity AS (
SELECT innerCnt.a AS a,
innerCnt.b AS b,
(innerCnt.count / (aCount.count + bCount.count - innerCnt.count)) AS score
FROM ab_inner_reads_count AS innerCnt
JOIN reads_count AS aCount ON aCount.interest = innerCnt.a
JOIN reads_count AS bCount ON bCount.interest = innerCnt.b
)
SELECT * FROM similarity
[user_id, interest]
[A, count]
[A, B, count]
[A, B, count]
JOIN [A, count]
JOIN [B, count]
[A, B, score]
44. LOAD DATA FROM S3
CREATE TABLE `interest_similarity` (
`interest` VARCHAR(50) NOT NULL,
`others` text COLLATE utf8mb4_bin NOT NULL,
`created_at` datetime NOT NULL,
PRIMARY KEY (`interest`)
)
LOAD DATA FROM S3 's3://athana-result/athena_output.csv'
REPLACE
INTO TABLE interest_similarity
CHARACTER SET 'utf8mb4'
FIELDS
TERMINATED BY ','
ENCLOSED BY '"'
IGNORE 1 ROWS (@interest, @others)
SET
interest = @interest,
others = @others,
created_at = CURRENT_TIMESTAMP;
45.
46. import * as AWS from "aws-sdk";
import * as csvParser from "csv-parser";
import * as es from "event-stream";
const s3 = new AWS.S3();
const dynamodb = new AWS.DynamoDB();
export async function streamFromS3() {
s3.getObject({
Bucket: "athena-result",
Key: "/athena-output.csv"
})
.createReadStream()
.pipe(csvParser({
escape: """,
separator: ",",
}))
.pipe(es.mapSync(async (record: { [key: string]: string }) => {
await dynamodb.putItem({
TableName: "ItemSimilarity",
Item: record,
}).promise();
}));
}