initial commit
This commit is contained in:
commit
d025c735fa
12 changed files with 46418 additions and 0 deletions
1
.envrc
Normal file
1
.envrc
Normal file
|
@ -0,0 +1 @@
|
|||
use flake
|
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
|
@ -0,0 +1,6 @@
|
|||
model-out
|
||||
.direnv
|
||||
|
||||
# Added by cargo
|
||||
|
||||
/target
|
43
Cargo.lock
generated
Normal file
43
Cargo.lock
generated
Normal file
|
@ -0,0 +1,43 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a012a0df96dd6d06ba9a1b29d6402d1a5d77c6befd2566afdc26e10603dc93d7"
|
||||
dependencies = [
|
||||
"shlex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfasttext-sys"
|
||||
version = "0.7.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "982185af4edba23861639c25e46b36e077d2d60e553c20d1341c9fbf17fdb369"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fasttext"
|
||||
version = "0.7.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd26f3978ff7b22e594af9026912da644237fd7d360889d0c5a6ac8ec4f940c8"
|
||||
dependencies = [
|
||||
"cfasttext-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fasttext-classifier"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"fasttext",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
7
Cargo.toml
Normal file
7
Cargo.toml
Normal file
|
@ -0,0 +1,7 @@
|
|||
[package]
|
||||
name = "fasttext-classifier"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
fasttext = "0.7.8"
|
15404
data/cooking.stackexchange.id
Normal file
15404
data/cooking.stackexchange.id
Normal file
File diff suppressed because it is too large
Load diff
15404
data/cooking.stackexchange.txt
Normal file
15404
data/cooking.stackexchange.txt
Normal file
File diff suppressed because it is too large
Load diff
12404
data/cooking.train
Normal file
12404
data/cooking.train
Normal file
File diff suppressed because it is too large
Load diff
3000
data/cooking.valid
Normal file
3000
data/cooking.valid
Normal file
File diff suppressed because it is too large
Load diff
17
data/readme.txt
Normal file
17
data/readme.txt
Normal file
|
@ -0,0 +1,17 @@
|
|||
The data in this archive is derived from the user-contributed content on the
|
||||
Cooking Stack Exchange website (https://cooking.stackexchange.com/), used under
|
||||
CC-BY-SA 3.0 (http://creativecommons.org/licenses/by-sa/3.0/).
|
||||
|
||||
The original data dump can be downloaded from:
|
||||
https://archive.org/download/stackexchange/cooking.stackexchange.com.7z
|
||||
and details about the dump obtained from:
|
||||
https://archive.org/details/stackexchange
|
||||
|
||||
We distribute two files, under CC-BY-SA 3.0:
|
||||
|
||||
- cooking.stackexchange.txt, which contains all question titles and
|
||||
their associated tags (one question per line, tags are prefixed by
|
||||
the string "__label__") ;
|
||||
|
||||
- cooking.stackexchange.id, which contains the corresponding row IDs,
|
||||
from the original data dump.
|
43
flake.lock
Normal file
43
flake.lock
Normal file
|
@ -0,0 +1,43 @@
|
|||
{
|
||||
"nodes": {
|
||||
"crane": {
|
||||
"locked": {
|
||||
"lastModified": 1736101677,
|
||||
"narHash": "sha256-iKOPq86AOWCohuzxwFy/MtC8PcSVGnrxBOvxpjpzrAY=",
|
||||
"owner": "ipetkov",
|
||||
"repo": "crane",
|
||||
"rev": "61ba163d85e5adeddc7b3a69bb174034965965b2",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "ipetkov",
|
||||
"repo": "crane",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1736012469,
|
||||
"narHash": "sha256-/qlNWm/IEVVH7GfgAIyP6EsVZI6zjAx1cV5zNyrs+rI=",
|
||||
"owner": "nixos",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "8f3e1f807051e32d8c95cd12b9b421623850a34d",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nixos",
|
||||
"ref": "nixos-unstable",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"crane": "crane",
|
||||
"nixpkgs": "nixpkgs"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
"version": 7
|
||||
}
|
59
flake.nix
Normal file
59
flake.nix
Normal file
|
@ -0,0 +1,59 @@
|
|||
{
|
||||
description = "Fasttext experiments";
|
||||
|
||||
inputs = {
|
||||
nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable";
|
||||
crane.url = "github:ipetkov/crane";
|
||||
};
|
||||
|
||||
outputs =
|
||||
{
|
||||
self,
|
||||
nixpkgs,
|
||||
crane,
|
||||
}:
|
||||
let
|
||||
systems = [
|
||||
"x86_64-linux"
|
||||
"aarch64-linux"
|
||||
"aarch64-darwin"
|
||||
"x86_64-darwin"
|
||||
];
|
||||
forAllSystems = nixpkgs.lib.genAttrs systems;
|
||||
in
|
||||
{
|
||||
packages = forAllSystems (
|
||||
system:
|
||||
let
|
||||
pkgs = import nixpkgs { inherit system; };
|
||||
craneLib = crane.mkLib pkgs;
|
||||
in
|
||||
{
|
||||
default = craneLib.buildPackage {
|
||||
src = craneLib.cleanCargoSource ./.;
|
||||
|
||||
buildInputs = with pkgs; [
|
||||
fasttext
|
||||
];
|
||||
};
|
||||
}
|
||||
);
|
||||
devShells = forAllSystems (
|
||||
system:
|
||||
let
|
||||
pkgs = import nixpkgs { inherit system; };
|
||||
craneLib = crane.mkLib pkgs;
|
||||
in
|
||||
{
|
||||
default = craneLib.devShell {
|
||||
inputsFrom = [ self.packages.${system}.default ];
|
||||
|
||||
packages = with pkgs; [
|
||||
clippy
|
||||
rust-analyzer
|
||||
];
|
||||
};
|
||||
}
|
||||
);
|
||||
};
|
||||
}
|
30
src/main.rs
Normal file
30
src/main.rs
Normal file
|
@ -0,0 +1,30 @@
|
|||
use std::path::Path;
|
||||
|
||||
use fasttext::{Args, FastText, LossName, ModelName};
|
||||
|
||||
fn train_cooking_model() -> Result<(), String> {
|
||||
let mut args = Args::new();
|
||||
args.set_input("../data/cooking.train").unwrap();
|
||||
args.set_model(ModelName::SUP);
|
||||
args.set_lr(1.0);
|
||||
args.set_epoch(25);
|
||||
//args.set_loss(LossName::SOFTMAX);
|
||||
let mut ft_model = FastText::new();
|
||||
ft_model.train(&args).unwrap();
|
||||
|
||||
ft_model.save_model("../model-out/out.bin")
|
||||
}
|
||||
|
||||
fn test_cooking_model(filename: &Path) -> Vec<fasttext::Prediction> {
|
||||
let mut text = FastText::new();
|
||||
|
||||
let _ = text.load_model(filename.to_str().unwrap());
|
||||
text.predict("Safe temperatures to bake cookies at?", 3, 0.2)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn main() {
|
||||
//train_cooking_model().unwrap();
|
||||
let result = test_cooking_model(Path::new("../model-out/out.bin"));
|
||||
println!("{:?}", result);
|
||||
}
|
Loading…
Reference in a new issue