initial commit

This commit is contained in:
Youwen Wu 2025-01-07 20:13:13 -08:00
commit d025c735fa
Signed by: youwen5
GPG key ID: 865658ED1FE61EC3
12 changed files with 46418 additions and 0 deletions

1
.envrc Normal file
View file

@ -0,0 +1 @@
use flake

6
.gitignore vendored Normal file
View file

@ -0,0 +1,6 @@
model-out
.direnv
# Added by cargo
/target

43
Cargo.lock generated Normal file
View file

@ -0,0 +1,43 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "cc"
version = "1.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a012a0df96dd6d06ba9a1b29d6402d1a5d77c6befd2566afdc26e10603dc93d7"
dependencies = [
"shlex",
]
[[package]]
name = "cfasttext-sys"
version = "0.7.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "982185af4edba23861639c25e46b36e077d2d60e553c20d1341c9fbf17fdb369"
dependencies = [
"cc",
]
[[package]]
name = "fasttext"
version = "0.7.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd26f3978ff7b22e594af9026912da644237fd7d360889d0c5a6ac8ec4f940c8"
dependencies = [
"cfasttext-sys",
]
[[package]]
name = "fasttext-classifier"
version = "0.1.0"
dependencies = [
"fasttext",
]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"

7
Cargo.toml Normal file
View file

@ -0,0 +1,7 @@
[package]
name = "fasttext-classifier"
version = "0.1.0"
edition = "2021"
[dependencies]
fasttext = "0.7.8"

15404
data/cooking.stackexchange.id Normal file

File diff suppressed because it is too large Load diff

15404
data/cooking.stackexchange.txt Normal file

File diff suppressed because it is too large Load diff

12404
data/cooking.train Normal file

File diff suppressed because it is too large Load diff

3000
data/cooking.valid Normal file

File diff suppressed because it is too large Load diff

17
data/readme.txt Normal file
View file

@ -0,0 +1,17 @@
The data in this archive is derived from the user-contributed content on the
Cooking Stack Exchange website (https://cooking.stackexchange.com/), used under
CC-BY-SA 3.0 (http://creativecommons.org/licenses/by-sa/3.0/).
The original data dump can be downloaded from:
https://archive.org/download/stackexchange/cooking.stackexchange.com.7z
and details about the dump obtained from:
https://archive.org/details/stackexchange
We distribute two files, under CC-BY-SA 3.0:
- cooking.stackexchange.txt, which contains all question titles and
their associated tags (one question per line, tags are prefixed by
the string "__label__") ;
- cooking.stackexchange.id, which contains the corresponding row IDs,
from the original data dump.

43
flake.lock Normal file
View file

@ -0,0 +1,43 @@
{
"nodes": {
"crane": {
"locked": {
"lastModified": 1736101677,
"narHash": "sha256-iKOPq86AOWCohuzxwFy/MtC8PcSVGnrxBOvxpjpzrAY=",
"owner": "ipetkov",
"repo": "crane",
"rev": "61ba163d85e5adeddc7b3a69bb174034965965b2",
"type": "github"
},
"original": {
"owner": "ipetkov",
"repo": "crane",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1736012469,
"narHash": "sha256-/qlNWm/IEVVH7GfgAIyP6EsVZI6zjAx1cV5zNyrs+rI=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "8f3e1f807051e32d8c95cd12b9b421623850a34d",
"type": "github"
},
"original": {
"owner": "nixos",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"crane": "crane",
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

59
flake.nix Normal file
View file

@ -0,0 +1,59 @@
{
description = "Fasttext experiments";
inputs = {
nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable";
crane.url = "github:ipetkov/crane";
};
outputs =
{
self,
nixpkgs,
crane,
}:
let
systems = [
"x86_64-linux"
"aarch64-linux"
"aarch64-darwin"
"x86_64-darwin"
];
forAllSystems = nixpkgs.lib.genAttrs systems;
in
{
packages = forAllSystems (
system:
let
pkgs = import nixpkgs { inherit system; };
craneLib = crane.mkLib pkgs;
in
{
default = craneLib.buildPackage {
src = craneLib.cleanCargoSource ./.;
buildInputs = with pkgs; [
fasttext
];
};
}
);
devShells = forAllSystems (
system:
let
pkgs = import nixpkgs { inherit system; };
craneLib = crane.mkLib pkgs;
in
{
default = craneLib.devShell {
inputsFrom = [ self.packages.${system}.default ];
packages = with pkgs; [
clippy
rust-analyzer
];
};
}
);
};
}

30
src/main.rs Normal file
View file

@ -0,0 +1,30 @@
use std::path::Path;
use fasttext::{Args, FastText, LossName, ModelName};
fn train_cooking_model() -> Result<(), String> {
let mut args = Args::new();
args.set_input("../data/cooking.train").unwrap();
args.set_model(ModelName::SUP);
args.set_lr(1.0);
args.set_epoch(25);
//args.set_loss(LossName::SOFTMAX);
let mut ft_model = FastText::new();
ft_model.train(&args).unwrap();
ft_model.save_model("../model-out/out.bin")
}
fn test_cooking_model(filename: &Path) -> Vec<fasttext::Prediction> {
let mut text = FastText::new();
let _ = text.load_model(filename.to_str().unwrap());
text.predict("Safe temperatures to bake cookies at?", 3, 0.2)
.unwrap()
}
fn main() {
//train_cooking_model().unwrap();
let result = test_cooking_model(Path::new("../model-out/out.bin"));
println!("{:?}", result);
}